aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2008-12-31 07:35:57 -0500
committerRusty Russell <rusty@rustcorp.com.au>2008-12-31 07:35:57 -0500
commit2ca1a615835d9f4990f42102ab1f2ef434e7e89c (patch)
tree726cf3d5f29a6c66c44e4bd68e7ebed2fd83d059 /fs
parente12f0102ac81d660c9f801d0a0e10ccf4537a9de (diff)
parent6a94cb73064c952255336cc57731904174b2c58f (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: arch/x86/kernel/io_apic.c
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c100
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c320
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/inode.c209
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/lockd/clntlock.c23
-rw-r--r--fs/lockd/host.c10
-rw-r--r--fs/lockd/svc.c6
-rw-r--r--fs/nfs/callback.c36
-rw-r--r--fs/nfs/client.c95
-rw-r--r--fs/nfs/delegation.c260
-rw-r--r--fs/nfs/delegation.h33
-rw-r--r--fs/nfs/dir.c24
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/mount_clnt.c34
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4proc.c431
-rw-r--r--fs/nfs/nfs4renewd.c22
-rw-r--r--fs/nfs/nfs4state.c415
-rw-r--r--fs/nfs/nfs4xdr.c1235
-rw-r--r--fs/nfs/nfsroot.c27
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/super.c44
-rw-r--r--fs/nfs_common/nfsacl.c4
-rw-r--r--fs/nfsd/nfs4callback.c9
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/proc/stat.c16
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/linux-2.6/sv.h22
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c66
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c87
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h30
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c189
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c223
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h82
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c849
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h214
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c122
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h65
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c884
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c762
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h55
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h77
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c145
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h72
-rw-r--r--fs/xfs/quota/xfs_dquot.c39
-rw-r--r--fs/xfs/quota/xfs_dquot.h4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c45
-rw-r--r--fs/xfs/quota/xfs_qm.c57
-rw-r--r--fs/xfs/quota/xfs_qm.h3
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c5
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c151
-rw-r--r--fs/xfs/support/debug.c39
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/support/ktrace.c9
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_ag.h15
-rw-r--r--fs/xfs/xfs_alloc.c264
-rw-r--r--fs/xfs/xfs_alloc.h27
-rw-r--r--fs/xfs/xfs_alloc_btree.c2387
-rw-r--r--fs/xfs/xfs_alloc_btree.h107
-rw-r--r--fs/xfs/xfs_arch.h39
-rw-r--r--fs/xfs/xfs_bit.h3
-rw-r--r--fs/xfs/xfs_bmap.c410
-rw-r--r--fs/xfs/xfs_bmap.h72
-rw-r--r--fs/xfs/xfs_bmap_btree.c2617
-rw-r--r--fs/xfs/xfs_bmap_btree.h171
-rw-r--r--fs/xfs/xfs_btree.c3596
-rw-r--r--fs/xfs/xfs_btree.h392
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h116
-rw-r--r--fs/xfs/xfs_buf_item.c45
-rw-r--r--fs/xfs/xfs_clnt.h105
-rw-r--r--fs/xfs/xfs_da_btree.h24
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dfrag.h2
-rw-r--r--fs/xfs/xfs_dinode.h148
-rw-r--r--fs/xfs/xfs_dir2_sf.h7
-rw-r--r--fs/xfs/xfs_dmops.c5
-rw-r--r--fs/xfs/xfs_error.c15
-rw-r--r--fs/xfs/xfs_error.h12
-rw-r--r--fs/xfs/xfs_extfree_item.c45
-rw-r--r--fs/xfs/xfs_fs.h22
-rw-r--r--fs/xfs/xfs_fsops.c30
-rw-r--r--fs/xfs/xfs_ialloc.c449
-rw-r--r--fs/xfs/xfs_ialloc.h31
-rw-r--r--fs/xfs/xfs_ialloc_btree.c2193
-rw-r--r--fs/xfs/xfs_ialloc_btree.h111
-rw-r--r--fs/xfs/xfs_iget.c735
-rw-r--r--fs/xfs/xfs_imap.h40
-rw-r--r--fs/xfs/xfs_inode.c587
-rw-r--r--fs/xfs/xfs_inode.h375
-rw-r--r--fs/xfs/xfs_inode_item.c45
-rw-r--r--fs/xfs/xfs_inode_item.h41
-rw-r--r--fs/xfs/xfs_iomap.c28
-rw-r--r--fs/xfs/xfs_itable.c102
-rw-r--r--fs/xfs/xfs_itable.h14
-rw-r--r--fs/xfs/xfs_log.c81
-rw-r--r--fs/xfs/xfs_log.h4
-rw-r--r--fs/xfs/xfs_log_priv.h48
-rw-r--r--fs/xfs/xfs_log_recover.c416
-rw-r--r--fs/xfs/xfs_mount.c81
-rw-r--r--fs/xfs/xfs_mount.h73
-rw-r--r--fs/xfs/xfs_qmops.c5
-rw-r--r--fs/xfs/xfs_quota.h8
-rw-r--r--fs/xfs/xfs_rename.c61
-rw-r--r--fs/xfs/xfs_rtalloc.c41
-rw-r--r--fs/xfs/xfs_rw.c2
-rw-r--r--fs/xfs/xfs_sb.h167
-rw-r--r--fs/xfs/xfs_trans.c22
-rw-r--r--fs/xfs/xfs_trans.h322
-rw-r--r--fs/xfs/xfs_trans_ail.c362
-rw-r--r--fs/xfs/xfs_trans_buf.c7
-rw-r--r--fs/xfs/xfs_trans_inode.c30
-rw-r--r--fs/xfs/xfs_trans_item.c10
-rw-r--r--fs/xfs/xfs_trans_priv.h98
-rw-r--r--fs/xfs/xfs_utils.c12
-rw-r--r--fs/xfs/xfs_vfsops.c757
-rw-r--r--fs/xfs/xfs_vfsops.h16
-rw-r--r--fs/xfs/xfs_vnodeops.c354
-rw-r--r--fs/xfs/xfs_vnodeops.h10
139 files changed, 12319 insertions, 14278 deletions
diff --git a/fs/aio.c b/fs/aio.c
index f658441d5666..d6f89d3c15e8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194static void ctx_rcu_free(struct rcu_head *head)
195{
196 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
197 unsigned nr_events = ctx->max_reqs;
198
199 kmem_cache_free(kioctx_cachep, ctx);
200
201 if (nr_events) {
202 spin_lock(&aio_nr_lock);
203 BUG_ON(aio_nr - nr_events > aio_nr);
204 aio_nr -= nr_events;
205 spin_unlock(&aio_nr_lock);
206 }
207}
194 208
195/* __put_ioctx 209/* __put_ioctx
196 * Called when the last user of an aio context has gone away, 210 * Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
198 */ 212 */
199static void __put_ioctx(struct kioctx *ctx) 213static void __put_ioctx(struct kioctx *ctx)
200{ 214{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active); 215 BUG_ON(ctx->reqs_active);
204 216
205 cancel_delayed_work(&ctx->wq); 217 cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
208 mmdrop(ctx->mm); 220 mmdrop(ctx->mm);
209 ctx->mm = NULL; 221 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx); 222 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx); 223 call_rcu(&ctx->rcu_head, ctx_rcu_free);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219} 224}
220 225
221#define get_ioctx(kioctx) do { \ 226#define get_ioctx(kioctx) do { \
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
235{ 240{
236 struct mm_struct *mm; 241 struct mm_struct *mm;
237 struct kioctx *ctx; 242 struct kioctx *ctx;
243 int did_sync = 0;
238 244
239 /* Prevent overflows */ 245 /* Prevent overflows */
240 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 246 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
267 goto out_freectx; 273 goto out_freectx;
268 274
269 /* limit the number of system wide aios */ 275 /* limit the number of system wide aios */
270 spin_lock(&aio_nr_lock); 276 do {
271 if (aio_nr + ctx->max_reqs > aio_max_nr || 277 spin_lock_bh(&aio_nr_lock);
272 aio_nr + ctx->max_reqs < aio_nr) 278 if (aio_nr + nr_events > aio_max_nr ||
273 ctx->max_reqs = 0; 279 aio_nr + nr_events < aio_nr)
274 else 280 ctx->max_reqs = 0;
275 aio_nr += ctx->max_reqs; 281 else
276 spin_unlock(&aio_nr_lock); 282 aio_nr += ctx->max_reqs;
283 spin_unlock_bh(&aio_nr_lock);
284 if (ctx->max_reqs || did_sync)
285 break;
286
287 /* wait for rcu callbacks to have completed before giving up */
288 synchronize_rcu();
289 did_sync = 1;
290 ctx->max_reqs = nr_events;
291 } while (1);
292
277 if (ctx->max_reqs == 0) 293 if (ctx->max_reqs == 0)
278 goto out_cleanup; 294 goto out_cleanup;
279 295
280 /* now link into global list. */ 296 /* now link into global list. */
281 write_lock(&mm->ioctx_list_lock); 297 spin_lock(&mm->ioctx_lock);
282 ctx->next = mm->ioctx_list; 298 hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
283 mm->ioctx_list = ctx; 299 spin_unlock(&mm->ioctx_lock);
284 write_unlock(&mm->ioctx_list_lock);
285 300
286 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 301 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
287 ctx, ctx->user_id, current->mm, ctx->ring_info.nr); 302 ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
375 */ 390 */
376void exit_aio(struct mm_struct *mm) 391void exit_aio(struct mm_struct *mm)
377{ 392{
378 struct kioctx *ctx = mm->ioctx_list; 393 struct kioctx *ctx;
379 mm->ioctx_list = NULL; 394
380 while (ctx) { 395 while (!hlist_empty(&mm->ioctx_list)) {
381 struct kioctx *next = ctx->next; 396 ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
382 ctx->next = NULL; 397 hlist_del_rcu(&ctx->list);
398
383 aio_cancel_all(ctx); 399 aio_cancel_all(ctx);
384 400
385 wait_for_all_aios(ctx); 401 wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
394 atomic_read(&ctx->users), ctx->dead, 410 atomic_read(&ctx->users), ctx->dead,
395 ctx->reqs_active); 411 ctx->reqs_active);
396 put_ioctx(ctx); 412 put_ioctx(ctx);
397 ctx = next;
398 } 413 }
399} 414}
400 415
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
555 570
556static struct kioctx *lookup_ioctx(unsigned long ctx_id) 571static struct kioctx *lookup_ioctx(unsigned long ctx_id)
557{ 572{
558 struct kioctx *ioctx; 573 struct mm_struct *mm = current->mm;
559 struct mm_struct *mm; 574 struct kioctx *ctx = NULL;
575 struct hlist_node *n;
560 576
561 mm = current->mm; 577 rcu_read_lock();
562 read_lock(&mm->ioctx_list_lock); 578
563 for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next) 579 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
564 if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) { 580 if (ctx->user_id == ctx_id && !ctx->dead) {
565 get_ioctx(ioctx); 581 get_ioctx(ctx);
566 break; 582 break;
567 } 583 }
568 read_unlock(&mm->ioctx_list_lock); 584 }
569 585
570 return ioctx; 586 rcu_read_unlock();
587 return ctx;
571} 588}
572 589
573/* 590/*
@@ -1215,19 +1232,14 @@ out:
1215static void io_destroy(struct kioctx *ioctx) 1232static void io_destroy(struct kioctx *ioctx)
1216{ 1233{
1217 struct mm_struct *mm = current->mm; 1234 struct mm_struct *mm = current->mm;
1218 struct kioctx **tmp;
1219 int was_dead; 1235 int was_dead;
1220 1236
1221 /* delete the entry from the list is someone else hasn't already */ 1237 /* delete the entry from the list is someone else hasn't already */
1222 write_lock(&mm->ioctx_list_lock); 1238 spin_lock(&mm->ioctx_lock);
1223 was_dead = ioctx->dead; 1239 was_dead = ioctx->dead;
1224 ioctx->dead = 1; 1240 ioctx->dead = 1;
1225 for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx; 1241 hlist_del_rcu(&ioctx->list);
1226 tmp = &(*tmp)->next) 1242 spin_unlock(&mm->ioctx_lock);
1227 ;
1228 if (*tmp)
1229 *tmp = ioctx->next;
1230 write_unlock(&mm->ioctx_list_lock);
1231 1243
1232 dprintk("aio_release(%p)\n", ioctx); 1244 dprintk("aio_release(%p)\n", ioctx);
1233 if (likely(!was_dead)) 1245 if (likely(!was_dead))
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962ac..77ebc3c263d6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
111 && bip->bip_buf != NULL) 111 && bip->bip_buf != NULL)
112 kfree(bip->bip_buf); 112 kfree(bip->bip_buf);
113 113
114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); 114 bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
115 mempool_free(bip, bs->bio_integrity_pool); 115 mempool_free(bip, bs->bio_integrity_pool);
116 116
117 bio->bi_integrity = NULL; 117 bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index df99c882b807..711cee103602 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,7 +31,11 @@
31 31
32DEFINE_TRACE(block_split); 32DEFINE_TRACE(block_split);
33 33
34static struct kmem_cache *bio_slab __read_mostly; 34/*
35 * Test patch to inline a certain number of bi_io_vec's inside the bio
36 * itself, to shrink a bio data allocation from two mempool calls to one
37 */
38#define BIO_INLINE_VECS 4
35 39
36static mempool_t *bio_split_pool __read_mostly; 40static mempool_t *bio_split_pool __read_mostly;
37 41
@@ -40,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly;
40 * break badly! cannot be bigger than what you can fit into an 44 * break badly! cannot be bigger than what you can fit into an
41 * unsigned short 45 * unsigned short
42 */ 46 */
43
44#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
45static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 48struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
46 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
47}; 50};
48#undef BV 51#undef BV
@@ -53,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
53 */ 56 */
54struct bio_set *fs_bio_set; 57struct bio_set *fs_bio_set;
55 58
59/*
60 * Our slab pool management
61 */
62struct bio_slab {
63 struct kmem_cache *slab;
64 unsigned int slab_ref;
65 unsigned int slab_size;
66 char name[8];
67};
68static DEFINE_MUTEX(bio_slab_lock);
69static struct bio_slab *bio_slabs;
70static unsigned int bio_slab_nr, bio_slab_max;
71
72static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{
74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab;
77 unsigned int i, entry = -1;
78
79 mutex_lock(&bio_slab_lock);
80
81 i = 0;
82 while (i < bio_slab_nr) {
83 struct bio_slab *bslab = &bio_slabs[i];
84
85 if (!bslab->slab && entry == -1)
86 entry = i;
87 else if (bslab->slab_size == sz) {
88 slab = bslab->slab;
89 bslab->slab_ref++;
90 break;
91 }
92 i++;
93 }
94
95 if (slab)
96 goto out_unlock;
97
98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL);
103 if (!bio_slabs)
104 goto out_unlock;
105 }
106 if (entry == -1)
107 entry = bio_slab_nr++;
108
109 bslab = &bio_slabs[entry];
110
111 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
112 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
113 if (!slab)
114 goto out_unlock;
115
116 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
117 bslab->slab = slab;
118 bslab->slab_ref = 1;
119 bslab->slab_size = sz;
120out_unlock:
121 mutex_unlock(&bio_slab_lock);
122 return slab;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int i;
129
130 mutex_lock(&bio_slab_lock);
131
132 for (i = 0; i < bio_slab_nr; i++) {
133 if (bs->bio_slab == bio_slabs[i].slab) {
134 bslab = &bio_slabs[i];
135 break;
136 }
137 }
138
139 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
140 goto out;
141
142 WARN_ON(!bslab->slab_ref);
143
144 if (--bslab->slab_ref)
145 goto out;
146
147 kmem_cache_destroy(bslab->slab);
148 bslab->slab = NULL;
149
150out:
151 mutex_unlock(&bio_slab_lock);
152}
153
56unsigned int bvec_nr_vecs(unsigned short idx) 154unsigned int bvec_nr_vecs(unsigned short idx)
57{ 155{
58 return bvec_slabs[idx].nr_vecs; 156 return bvec_slabs[idx].nr_vecs;
59} 157}
60 158
61struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 159void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
160{
161 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
162
163 if (idx == BIOVEC_MAX_IDX)
164 mempool_free(bv, bs->bvec_pool);
165 else {
166 struct biovec_slab *bvs = bvec_slabs + idx;
167
168 kmem_cache_free(bvs->slab, bv);
169 }
170}
171
172struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
173 struct bio_set *bs)
62{ 174{
63 struct bio_vec *bvl; 175 struct bio_vec *bvl;
64 176
@@ -67,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
67 * If not, this is a bio_kmalloc() allocation and just do a 179 * If not, this is a bio_kmalloc() allocation and just do a
68 * kzalloc() for the exact number of vecs right away. 180 * kzalloc() for the exact number of vecs right away.
69 */ 181 */
70 if (bs) { 182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define!
187 */
188 switch (nr) {
189 case 1:
190 *idx = 0;
191 break;
192 case 2 ... 4:
193 *idx = 1;
194 break;
195 case 5 ... 16:
196 *idx = 2;
197 break;
198 case 17 ... 64:
199 *idx = 3;
200 break;
201 case 65 ... 128:
202 *idx = 4;
203 break;
204 case 129 ... BIO_MAX_PAGES:
205 *idx = 5;
206 break;
207 default:
208 return NULL;
209 }
210
211 /*
212 * idx now points to the pool we want to allocate from. only the
213 * 1-vec entry pool is mempool backed.
214 */
215 if (*idx == BIOVEC_MAX_IDX) {
216fallback:
217 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
218 } else {
219 struct biovec_slab *bvs = bvec_slabs + *idx;
220 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
221
71 /* 222 /*
72 * see comment near bvec_array define! 223 * Make this allocation restricted and don't dump info on
224 * allocation failures, since we'll fallback to the mempool
225 * in case of failure.
73 */ 226 */
74 switch (nr) { 227 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
75 case 1:
76 *idx = 0;
77 break;
78 case 2 ... 4:
79 *idx = 1;
80 break;
81 case 5 ... 16:
82 *idx = 2;
83 break;
84 case 17 ... 64:
85 *idx = 3;
86 break;
87 case 65 ... 128:
88 *idx = 4;
89 break;
90 case 129 ... BIO_MAX_PAGES:
91 *idx = 5;
92 break;
93 default:
94 return NULL;
95 }
96 228
97 /* 229 /*
98 * idx now points to the pool we want to allocate from 230 * Try a slab allocation. If this fails and __GFP_WAIT
231 * is set, retry with the 1-entry mempool
99 */ 232 */
100 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 233 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
101 if (bvl) 234 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
102 memset(bvl, 0, 235 *idx = BIOVEC_MAX_IDX;
103 bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 236 goto fallback;
104 } else 237 }
105 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); 238 }
106 239
107 return bvl; 240 return bvl;
108} 241}
109 242
110void bio_free(struct bio *bio, struct bio_set *bio_set) 243void bio_free(struct bio *bio, struct bio_set *bs)
111{ 244{
112 if (bio->bi_io_vec) { 245 void *p;
113 const int pool_idx = BIO_POOL_IDX(bio);
114 246
115 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 247 if (bio_has_allocated_vec(bio))
116 248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 }
119 249
120 if (bio_integrity(bio)) 250 if (bio_integrity(bio))
121 bio_integrity_free(bio, bio_set); 251 bio_integrity_free(bio, bs);
252
253 /*
254 * If we have front padding, adjust the bio pointer before freeing
255 */
256 p = bio;
257 if (bs->front_pad)
258 p -= bs->front_pad;
122 259
123 mempool_free(bio, bio_set->bio_pool); 260 mempool_free(p, bs->bio_pool);
124} 261}
125 262
126/* 263/*
@@ -133,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio)
133 270
134static void bio_kmalloc_destructor(struct bio *bio) 271static void bio_kmalloc_destructor(struct bio *bio)
135{ 272{
136 kfree(bio->bi_io_vec); 273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
137 kfree(bio); 275 kfree(bio);
138} 276}
139 277
@@ -157,16 +295,20 @@ void bio_init(struct bio *bio)
157 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 295 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
158 * fall back to just using @kmalloc to allocate the required memory. 296 * fall back to just using @kmalloc to allocate the required memory.
159 * 297 *
160 * allocate bio and iovecs from the memory pools specified by the 298 * Note that the caller must set ->bi_destructor on succesful return
161 * bio_set structure, or @kmalloc if none given. 299 * of a bio, to do the appropriate freeing of the bio once the reference
300 * count drops to zero.
162 **/ 301 **/
163struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
164{ 303{
165 struct bio *bio; 304 struct bio *bio = NULL;
305
306 if (bs) {
307 void *p = mempool_alloc(bs->bio_pool, gfp_mask);
166 308
167 if (bs) 309 if (p)
168 bio = mempool_alloc(bs->bio_pool, gfp_mask); 310 bio = p + bs->front_pad;
169 else 311 } else
170 bio = kmalloc(sizeof(*bio), gfp_mask); 312 bio = kmalloc(sizeof(*bio), gfp_mask);
171 313
172 if (likely(bio)) { 314 if (likely(bio)) {
@@ -176,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
176 if (likely(nr_iovecs)) { 318 if (likely(nr_iovecs)) {
177 unsigned long uninitialized_var(idx); 319 unsigned long uninitialized_var(idx);
178 320
179 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 321 if (nr_iovecs <= BIO_INLINE_VECS) {
322 idx = 0;
323 bvl = bio->bi_inline_vecs;
324 nr_iovecs = BIO_INLINE_VECS;
325 } else {
326 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
327 bs);
328 nr_iovecs = bvec_nr_vecs(idx);
329 }
180 if (unlikely(!bvl)) { 330 if (unlikely(!bvl)) {
181 if (bs) 331 if (bs)
182 mempool_free(bio, bs->bio_pool); 332 mempool_free(bio, bs->bio_pool);
@@ -186,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
186 goto out; 336 goto out;
187 } 337 }
188 bio->bi_flags |= idx << BIO_POOL_OFFSET; 338 bio->bi_flags |= idx << BIO_POOL_OFFSET;
189 bio->bi_max_vecs = bvec_nr_vecs(idx); 339 bio->bi_max_vecs = nr_iovecs;
190 } 340 }
191 bio->bi_io_vec = bvl; 341 bio->bi_io_vec = bvl;
192 } 342 }
@@ -1346,30 +1496,18 @@ EXPORT_SYMBOL(bio_sector_offset);
1346 */ 1496 */
1347static int biovec_create_pools(struct bio_set *bs, int pool_entries) 1497static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1348{ 1498{
1349 int i; 1499 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1350 1500
1351 for (i = 0; i < BIOVEC_NR_POOLS; i++) { 1501 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1352 struct biovec_slab *bp = bvec_slabs + i; 1502 if (!bs->bvec_pool)
1353 mempool_t **bvp = bs->bvec_pools + i; 1503 return -ENOMEM;
1354 1504
1355 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
1356 if (!*bvp)
1357 return -ENOMEM;
1358 }
1359 return 0; 1505 return 0;
1360} 1506}
1361 1507
1362static void biovec_free_pools(struct bio_set *bs) 1508static void biovec_free_pools(struct bio_set *bs)
1363{ 1509{
1364 int i; 1510 mempool_destroy(bs->bvec_pool);
1365
1366 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1367 mempool_t *bvp = bs->bvec_pools[i];
1368
1369 if (bvp)
1370 mempool_destroy(bvp);
1371 }
1372
1373} 1511}
1374 1512
1375void bioset_free(struct bio_set *bs) 1513void bioset_free(struct bio_set *bs)
@@ -1379,25 +1517,49 @@ void bioset_free(struct bio_set *bs)
1379 1517
1380 bioset_integrity_free(bs); 1518 bioset_integrity_free(bs);
1381 biovec_free_pools(bs); 1519 biovec_free_pools(bs);
1520 bio_put_slab(bs);
1382 1521
1383 kfree(bs); 1522 kfree(bs);
1384} 1523}
1385 1524
1386struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) 1525/**
1526 * bioset_create - Create a bio_set
1527 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1528 * @front_pad: Number of bytes to allocate in front of the returned bio
1529 *
1530 * Description:
1531 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1532 * to ask for a number of bytes to be allocated in front of the bio.
1533 * Front pad allocation is useful for embedding the bio inside
1534 * another structure, to avoid allocating extra data to go with the bio.
1535 * Note that the bio must be embedded at the END of that structure always,
1536 * or things will break badly.
1537 */
1538struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1387{ 1539{
1388 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); 1540 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1541 struct bio_set *bs;
1389 1542
1543 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1390 if (!bs) 1544 if (!bs)
1391 return NULL; 1545 return NULL;
1392 1546
1393 bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); 1547 bs->front_pad = front_pad;
1548
1549 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1550 if (!bs->bio_slab) {
1551 kfree(bs);
1552 return NULL;
1553 }
1554
1555 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1394 if (!bs->bio_pool) 1556 if (!bs->bio_pool)
1395 goto bad; 1557 goto bad;
1396 1558
1397 if (bioset_integrity_create(bs, bio_pool_size)) 1559 if (bioset_integrity_create(bs, pool_size))
1398 goto bad; 1560 goto bad;
1399 1561
1400 if (!biovec_create_pools(bs, bvec_pool_size)) 1562 if (!biovec_create_pools(bs, pool_size))
1401 return bs; 1563 return bs;
1402 1564
1403bad: 1565bad:
@@ -1421,12 +1583,16 @@ static void __init biovec_init_slabs(void)
1421 1583
1422static int __init init_bio(void) 1584static int __init init_bio(void)
1423{ 1585{
1424 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1586 bio_slab_max = 2;
1587 bio_slab_nr = 0;
1588 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1589 if (!bio_slabs)
1590 panic("bio: can't allocate bios\n");
1425 1591
1426 bio_integrity_init_slab(); 1592 bio_integrity_init_slab();
1427 biovec_init_slabs(); 1593 biovec_init_slabs();
1428 1594
1429 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1595 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1430 if (!fs_bio_set) 1596 if (!fs_bio_set)
1431 panic("bio: can't allocate bios\n"); 1597 panic("bio: can't allocate bios\n");
1432 1598
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa1152..776ae091d3b0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
99 page_cache_release(page); 99 page_cache_release(page);
100} 100}
101 101
102
103static int quiet_error(struct buffer_head *bh)
104{
105 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
106 return 0;
107 return 1;
108}
109
110
102static void buffer_io_error(struct buffer_head *bh) 111static void buffer_io_error(struct buffer_head *bh)
103{ 112{
104 char b[BDEVNAME_SIZE]; 113 char b[BDEVNAME_SIZE];
105
106 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", 114 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
107 bdevname(bh->b_bdev, b), 115 bdevname(bh->b_bdev, b),
108 (unsigned long long)bh->b_blocknr); 116 (unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
144 if (uptodate) { 152 if (uptodate) {
145 set_buffer_uptodate(bh); 153 set_buffer_uptodate(bh);
146 } else { 154 } else {
147 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { 155 if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
148 buffer_io_error(bh); 156 buffer_io_error(bh);
149 printk(KERN_WARNING "lost page write due to " 157 printk(KERN_WARNING "lost page write due to "
150 "I/O error on %s\n", 158 "I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
394 set_buffer_uptodate(bh); 402 set_buffer_uptodate(bh);
395 } else { 403 } else {
396 clear_buffer_uptodate(bh); 404 clear_buffer_uptodate(bh);
397 if (printk_ratelimit()) 405 if (!quiet_error(bh))
398 buffer_io_error(bh); 406 buffer_io_error(bh);
399 SetPageError(page); 407 SetPageError(page);
400 } 408 }
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
455 if (uptodate) { 463 if (uptodate) {
456 set_buffer_uptodate(bh); 464 set_buffer_uptodate(bh);
457 } else { 465 } else {
458 if (printk_ratelimit()) { 466 if (!quiet_error(bh)) {
459 buffer_io_error(bh); 467 buffer_io_error(bh);
460 printk(KERN_WARNING "lost page write due to " 468 printk(KERN_WARNING "lost page write due to "
461 "I/O error on %s\n", 469 "I/O error on %s\n",
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2913 set_bit(BH_Eopnotsupp, &bh->b_state); 2921 set_bit(BH_Eopnotsupp, &bh->b_state);
2914 } 2922 }
2915 2923
2924 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2925 set_bit(BH_Quiet, &bh->b_state);
2926
2916 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); 2927 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2917 bio_put(bio); 2928 bio_put(bio);
2918} 2929}
diff --git a/fs/exec.c b/fs/exec.c
index 1f59ea079cbb..02d2e120542d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -773,7 +773,6 @@ static int de_thread(struct task_struct *tsk)
773 struct signal_struct *sig = tsk->signal; 773 struct signal_struct *sig = tsk->signal;
774 struct sighand_struct *oldsighand = tsk->sighand; 774 struct sighand_struct *oldsighand = tsk->sighand;
775 spinlock_t *lock = &oldsighand->siglock; 775 spinlock_t *lock = &oldsighand->siglock;
776 struct task_struct *leader = NULL;
777 int count; 776 int count;
778 777
779 if (thread_group_empty(tsk)) 778 if (thread_group_empty(tsk))
@@ -811,7 +810,7 @@ static int de_thread(struct task_struct *tsk)
811 * and to assume its PID: 810 * and to assume its PID:
812 */ 811 */
813 if (!thread_group_leader(tsk)) { 812 if (!thread_group_leader(tsk)) {
814 leader = tsk->group_leader; 813 struct task_struct *leader = tsk->group_leader;
815 814
816 sig->notify_count = -1; /* for exit_notify() */ 815 sig->notify_count = -1; /* for exit_notify() */
817 for (;;) { 816 for (;;) {
@@ -863,8 +862,9 @@ static int de_thread(struct task_struct *tsk)
863 862
864 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 863 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
865 leader->exit_state = EXIT_DEAD; 864 leader->exit_state = EXIT_DEAD;
866
867 write_unlock_irq(&tasklist_lock); 865 write_unlock_irq(&tasklist_lock);
866
867 release_task(leader);
868 } 868 }
869 869
870 sig->group_exit_task = NULL; 870 sig->group_exit_task = NULL;
@@ -873,8 +873,6 @@ static int de_thread(struct task_struct *tsk)
873no_thread_group: 873no_thread_group:
874 exit_itimers(sig); 874 exit_itimers(sig);
875 flush_itimer_signals(); 875 flush_itimer_signals();
876 if (leader)
877 release_task(leader);
878 876
879 if (atomic_read(&oldsighand->count) != 1) { 877 if (atomic_read(&oldsighand->count) != 1) {
880 struct sighand_struct *newsighand; 878 struct sighand_struct *newsighand;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..04158ad74dbb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1721 /* small i_blocks in vfs inode? */ 1721 /* small i_blocks in vfs inode? */
1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1723 /* 1723 /*
1724 * CONFIG_LSF is not enabled implies the inode 1724 * CONFIG_LBD is not enabled implies the inode
1725 * i_block represent total blocks in 512 bytes 1725 * i_block represent total blocks in 512 bytes
1726 * 32 == size of vfs inode i_blocks * 8 1726 * 32 == size of vfs inode i_blocks * 8
1727 */ 1727 */
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1764 1764
1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1766 /* 1766 /*
1767 * !has_huge_files or CONFIG_LSF is not enabled 1767 * !has_huge_files or CONFIG_LBD is not enabled
1768 * implies the inode i_block represent total blocks in 1768 * implies the inode i_block represent total blocks in
1769 * 512 bytes 32 == size of vfs inode i_blocks * 8 1769 * 512 bytes 32 == size of vfs inode i_blocks * 8
1770 */ 1770 */
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2021 if (has_huge_files) { 2021 if (has_huge_files) {
2022 /* 2022 /*
2023 * Large file size enabled file system can only be 2023 * Large file size enabled file system can only be
2024 * mount if kernel is build with CONFIG_LSF 2024 * mount if kernel is build with CONFIG_LBD
2025 */ 2025 */
2026 if (sizeof(root->i_blocks) < sizeof(u64) && 2026 if (sizeof(root->i_blocks) < sizeof(u64) &&
2027 !(sb->s_flags & MS_RDONLY)) { 2027 !(sb->s_flags & MS_RDONLY)) {
2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2029 "files cannot be mounted read-write " 2029 "files cannot be mounted read-write "
2030 "without CONFIG_LSF.\n", sb->s_id); 2030 "without CONFIG_LBD.\n", sb->s_id);
2031 goto failed_mount; 2031 goto failed_mount;
2032 } 2032 }
2033 } 2033 }
diff --git a/fs/inode.c b/fs/inode.c
index 0487ddba1397..098a2443196f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode)
108 wake_up_bit(&inode->i_state, __I_LOCK); 108 wake_up_bit(&inode->i_state, __I_LOCK);
109} 109}
110 110
111static struct inode *alloc_inode(struct super_block *sb) 111/**
112 * inode_init_always - perform inode structure intialisation
113 * @sb - superblock inode belongs to.
114 * @inode - inode to initialise
115 *
116 * These are initializations that need to be done on every inode
117 * allocation as the fields are not initialised by slab allocation.
118 */
119struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
112{ 120{
113 static const struct address_space_operations empty_aops; 121 static const struct address_space_operations empty_aops;
114 static struct inode_operations empty_iops; 122 static struct inode_operations empty_iops;
115 static const struct file_operations empty_fops; 123 static const struct file_operations empty_fops;
116 struct inode *inode;
117
118 if (sb->s_op->alloc_inode)
119 inode = sb->s_op->alloc_inode(sb);
120 else
121 inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
122 124
123 if (inode) { 125 struct address_space * const mapping = &inode->i_data;
124 struct address_space * const mapping = &inode->i_data; 126
125 127 inode->i_sb = sb;
126 inode->i_sb = sb; 128 inode->i_blkbits = sb->s_blocksize_bits;
127 inode->i_blkbits = sb->s_blocksize_bits; 129 inode->i_flags = 0;
128 inode->i_flags = 0; 130 atomic_set(&inode->i_count, 1);
129 atomic_set(&inode->i_count, 1); 131 inode->i_op = &empty_iops;
130 inode->i_op = &empty_iops; 132 inode->i_fop = &empty_fops;
131 inode->i_fop = &empty_fops; 133 inode->i_nlink = 1;
132 inode->i_nlink = 1; 134 atomic_set(&inode->i_writecount, 0);
133 atomic_set(&inode->i_writecount, 0); 135 inode->i_size = 0;
134 inode->i_size = 0; 136 inode->i_blocks = 0;
135 inode->i_blocks = 0; 137 inode->i_bytes = 0;
136 inode->i_bytes = 0; 138 inode->i_generation = 0;
137 inode->i_generation = 0;
138#ifdef CONFIG_QUOTA 139#ifdef CONFIG_QUOTA
139 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 140 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
140#endif 141#endif
141 inode->i_pipe = NULL; 142 inode->i_pipe = NULL;
142 inode->i_bdev = NULL; 143 inode->i_bdev = NULL;
143 inode->i_cdev = NULL; 144 inode->i_cdev = NULL;
144 inode->i_rdev = 0; 145 inode->i_rdev = 0;
145 inode->dirtied_when = 0; 146 inode->dirtied_when = 0;
146 if (security_inode_alloc(inode)) { 147 if (security_inode_alloc(inode)) {
147 if (inode->i_sb->s_op->destroy_inode) 148 if (inode->i_sb->s_op->destroy_inode)
148 inode->i_sb->s_op->destroy_inode(inode); 149 inode->i_sb->s_op->destroy_inode(inode);
149 else 150 else
150 kmem_cache_free(inode_cachep, (inode)); 151 kmem_cache_free(inode_cachep, (inode));
151 return NULL; 152 return NULL;
152 } 153 }
153 154
154 spin_lock_init(&inode->i_lock); 155 spin_lock_init(&inode->i_lock);
155 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 156 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
156 157
157 mutex_init(&inode->i_mutex); 158 mutex_init(&inode->i_mutex);
158 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 159 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
159 160
160 init_rwsem(&inode->i_alloc_sem); 161 init_rwsem(&inode->i_alloc_sem);
161 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); 162 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
162 163
163 mapping->a_ops = &empty_aops; 164 mapping->a_ops = &empty_aops;
164 mapping->host = inode; 165 mapping->host = inode;
165 mapping->flags = 0; 166 mapping->flags = 0;
166 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); 167 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
167 mapping->assoc_mapping = NULL; 168 mapping->assoc_mapping = NULL;
168 mapping->backing_dev_info = &default_backing_dev_info; 169 mapping->backing_dev_info = &default_backing_dev_info;
169 mapping->writeback_index = 0; 170 mapping->writeback_index = 0;
170 171
171 /* 172 /*
172 * If the block_device provides a backing_dev_info for client 173 * If the block_device provides a backing_dev_info for client
173 * inodes then use that. Otherwise the inode share the bdev's 174 * inodes then use that. Otherwise the inode share the bdev's
174 * backing_dev_info. 175 * backing_dev_info.
175 */ 176 */
176 if (sb->s_bdev) { 177 if (sb->s_bdev) {
177 struct backing_dev_info *bdi; 178 struct backing_dev_info *bdi;
178 179
179 bdi = sb->s_bdev->bd_inode_backing_dev_info; 180 bdi = sb->s_bdev->bd_inode_backing_dev_info;
180 if (!bdi) 181 if (!bdi)
181 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 182 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
182 mapping->backing_dev_info = bdi; 183 mapping->backing_dev_info = bdi;
183 }
184 inode->i_private = NULL;
185 inode->i_mapping = mapping;
186 } 184 }
185 inode->i_private = NULL;
186 inode->i_mapping = mapping;
187
187 return inode; 188 return inode;
188} 189}
190EXPORT_SYMBOL(inode_init_always);
191
192static struct inode *alloc_inode(struct super_block *sb)
193{
194 struct inode *inode;
195
196 if (sb->s_op->alloc_inode)
197 inode = sb->s_op->alloc_inode(sb);
198 else
199 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
200
201 if (inode)
202 return inode_init_always(sb, inode);
203 return NULL;
204}
189 205
190void destroy_inode(struct inode *inode) 206void destroy_inode(struct inode *inode)
191{ 207{
@@ -196,6 +212,7 @@ void destroy_inode(struct inode *inode)
196 else 212 else
197 kmem_cache_free(inode_cachep, (inode)); 213 kmem_cache_free(inode_cachep, (inode));
198} 214}
215EXPORT_SYMBOL(destroy_inode);
199 216
200 217
201/* 218/*
@@ -534,6 +551,49 @@ repeat:
534 return node ? inode : NULL; 551 return node ? inode : NULL;
535} 552}
536 553
554static unsigned long hash(struct super_block *sb, unsigned long hashval)
555{
556 unsigned long tmp;
557
558 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
559 L1_CACHE_BYTES;
560 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
561 return tmp & I_HASHMASK;
562}
563
564static inline void
565__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
566 struct inode *inode)
567{
568 inodes_stat.nr_inodes++;
569 list_add(&inode->i_list, &inode_in_use);
570 list_add(&inode->i_sb_list, &sb->s_inodes);
571 if (head)
572 hlist_add_head(&inode->i_hash, head);
573}
574
575/**
576 * inode_add_to_lists - add a new inode to relevant lists
577 * @sb - superblock inode belongs to.
578 * @inode - inode to mark in use
579 *
580 * When an inode is allocated it needs to be accounted for, added to the in use
581 * list, the owning superblock and the inode hash. This needs to be done under
582 * the inode_lock, so export a function to do this rather than the inode lock
583 * itself. We calculate the hash list to add to here so it is all internal
584 * which requires the caller to have already set up the inode number in the
585 * inode to add.
586 */
587void inode_add_to_lists(struct super_block *sb, struct inode *inode)
588{
589 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
590
591 spin_lock(&inode_lock);
592 __inode_add_to_lists(sb, head, inode);
593 spin_unlock(&inode_lock);
594}
595EXPORT_SYMBOL_GPL(inode_add_to_lists);
596
537/** 597/**
538 * new_inode - obtain an inode 598 * new_inode - obtain an inode
539 * @sb: superblock 599 * @sb: superblock
@@ -561,9 +621,7 @@ struct inode *new_inode(struct super_block *sb)
561 inode = alloc_inode(sb); 621 inode = alloc_inode(sb);
562 if (inode) { 622 if (inode) {
563 spin_lock(&inode_lock); 623 spin_lock(&inode_lock);
564 inodes_stat.nr_inodes++; 624 __inode_add_to_lists(sb, NULL, inode);
565 list_add(&inode->i_list, &inode_in_use);
566 list_add(&inode->i_sb_list, &sb->s_inodes);
567 inode->i_ino = ++last_ino; 625 inode->i_ino = ++last_ino;
568 inode->i_state = 0; 626 inode->i_state = 0;
569 spin_unlock(&inode_lock); 627 spin_unlock(&inode_lock);
@@ -622,10 +680,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h
622 if (set(inode, data)) 680 if (set(inode, data))
623 goto set_failed; 681 goto set_failed;
624 682
625 inodes_stat.nr_inodes++; 683 __inode_add_to_lists(sb, head, inode);
626 list_add(&inode->i_list, &inode_in_use);
627 list_add(&inode->i_sb_list, &sb->s_inodes);
628 hlist_add_head(&inode->i_hash, head);
629 inode->i_state = I_LOCK|I_NEW; 684 inode->i_state = I_LOCK|I_NEW;
630 spin_unlock(&inode_lock); 685 spin_unlock(&inode_lock);
631 686
@@ -671,10 +726,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
671 old = find_inode_fast(sb, head, ino); 726 old = find_inode_fast(sb, head, ino);
672 if (!old) { 727 if (!old) {
673 inode->i_ino = ino; 728 inode->i_ino = ino;
674 inodes_stat.nr_inodes++; 729 __inode_add_to_lists(sb, head, inode);
675 list_add(&inode->i_list, &inode_in_use);
676 list_add(&inode->i_sb_list, &sb->s_inodes);
677 hlist_add_head(&inode->i_hash, head);
678 inode->i_state = I_LOCK|I_NEW; 730 inode->i_state = I_LOCK|I_NEW;
679 spin_unlock(&inode_lock); 731 spin_unlock(&inode_lock);
680 732
@@ -698,16 +750,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
698 return inode; 750 return inode;
699} 751}
700 752
701static unsigned long hash(struct super_block *sb, unsigned long hashval)
702{
703 unsigned long tmp;
704
705 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
706 L1_CACHE_BYTES;
707 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
708 return tmp & I_HASHMASK;
709}
710
711/** 753/**
712 * iunique - get a unique inode number 754 * iunique - get a unique inode number
713 * @sb: superblock 755 * @sb: superblock
@@ -1292,6 +1334,7 @@ int inode_wait(void *word)
1292 schedule(); 1334 schedule();
1293 return 0; 1335 return 0;
1294} 1336}
1337EXPORT_SYMBOL(inode_wait);
1295 1338
1296/* 1339/*
1297 * If we try to find an inode in the inode hash while it is being 1340 * If we try to find an inode in the inode hash while it is being
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 210339784b56..b00ee9f05a06 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -59,8 +59,14 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino)
59 if (inode->i_size >= IDATASIZE) { 59 if (inode->i_size >= IDATASIZE) {
60 inode->i_op = &page_symlink_inode_operations; 60 inode->i_op = &page_symlink_inode_operations;
61 inode->i_mapping->a_ops = &jfs_aops; 61 inode->i_mapping->a_ops = &jfs_aops;
62 } else 62 } else {
63 inode->i_op = &jfs_symlink_inode_operations; 63 inode->i_op = &jfs_symlink_inode_operations;
64 /*
65 * The inline data should be null-terminated, but
66 * don't let on-disk corruption crash the kernel
67 */
68 JFS_IP(inode)->i_inline[inode->i_size] = '\0';
69 }
64 } else { 70 } else {
65 inode->i_op = &jfs_file_inode_operations; 71 inode->i_op = &jfs_file_inode_operations;
66 init_special_inode(inode, inode->i_mode, inode->i_rdev); 72 init_special_inode(inode, inode->i_mode, inode->i_rdev);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 8307dd64bf46..1f3b0fc0d351 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,6 +14,7 @@
14#include <linux/sunrpc/svc.h> 14#include <linux/sunrpc/svc.h>
15#include <linux/lockd/lockd.h> 15#include <linux/lockd/lockd.h>
16#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
17#include <linux/kthread.h>
17 18
18#define NLMDBG_FACILITY NLMDBG_CLIENT 19#define NLMDBG_FACILITY NLMDBG_CLIENT
19 20
@@ -60,7 +61,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
60 61
61 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, 62 host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen,
62 nlm_init->protocol, nlm_version, 63 nlm_init->protocol, nlm_version,
63 nlm_init->hostname); 64 nlm_init->hostname, nlm_init->noresvport);
64 if (host == NULL) { 65 if (host == NULL) {
65 lockd_down(); 66 lockd_down();
66 return ERR_PTR(-ENOLCK); 67 return ERR_PTR(-ENOLCK);
@@ -191,11 +192,15 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
191void 192void
192nlmclnt_recovery(struct nlm_host *host) 193nlmclnt_recovery(struct nlm_host *host)
193{ 194{
195 struct task_struct *task;
196
194 if (!host->h_reclaiming++) { 197 if (!host->h_reclaiming++) {
195 nlm_get_host(host); 198 nlm_get_host(host);
196 __module_get(THIS_MODULE); 199 task = kthread_run(reclaimer, host, "%s-reclaim", host->h_name);
197 if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0) 200 if (IS_ERR(task))
198 module_put(THIS_MODULE); 201 printk(KERN_ERR "lockd: unable to spawn reclaimer "
202 "thread. Locks for %s won't be reclaimed! "
203 "(%ld)\n", host->h_name, PTR_ERR(task));
199 } 204 }
200} 205}
201 206
@@ -207,7 +212,6 @@ reclaimer(void *ptr)
207 struct file_lock *fl, *next; 212 struct file_lock *fl, *next;
208 u32 nsmstate; 213 u32 nsmstate;
209 214
210 daemonize("%s-reclaim", host->h_name);
211 allow_signal(SIGKILL); 215 allow_signal(SIGKILL);
212 216
213 down_write(&host->h_rwsem); 217 down_write(&host->h_rwsem);
@@ -233,7 +237,12 @@ restart:
233 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) { 237 list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
234 list_del_init(&fl->fl_u.nfs_fl.list); 238 list_del_init(&fl->fl_u.nfs_fl.list);
235 239
236 /* Why are we leaking memory here? --okir */ 240 /*
241 * sending this thread a SIGKILL will result in any unreclaimed
242 * locks being removed from the h_granted list. This means that
243 * the kernel will not attempt to reclaim them again if a new
244 * reclaimer thread is spawned for this host.
245 */
237 if (signalled()) 246 if (signalled())
238 continue; 247 continue;
239 if (nlmclnt_reclaim(host, fl) != 0) 248 if (nlmclnt_reclaim(host, fl) != 0)
@@ -261,5 +270,5 @@ restart:
261 nlm_release_host(host); 270 nlm_release_host(host);
262 lockd_down(); 271 lockd_down();
263 unlock_kernel(); 272 unlock_kernel();
264 module_put_and_exit(0); 273 return 0;
265} 274}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index e05d04416037..abdebf76b820 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -48,6 +48,7 @@ struct nlm_lookup_host_info {
48 const size_t hostname_len; /* it's length */ 48 const size_t hostname_len; /* it's length */
49 const struct sockaddr *src_sap; /* our address (optional) */ 49 const struct sockaddr *src_sap; /* our address (optional) */
50 const size_t src_len; /* it's length */ 50 const size_t src_len; /* it's length */
51 const int noresvport; /* use non-priv port */
51}; 52};
52 53
53/* 54/*
@@ -222,6 +223,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
222 host->h_nsmstate = 0; /* real NSM state */ 223 host->h_nsmstate = 0; /* real NSM state */
223 host->h_nsmhandle = nsm; 224 host->h_nsmhandle = nsm;
224 host->h_server = ni->server; 225 host->h_server = ni->server;
226 host->h_noresvport = ni->noresvport;
225 hlist_add_head(&host->h_hash, chain); 227 hlist_add_head(&host->h_hash, chain);
226 INIT_LIST_HEAD(&host->h_lockowners); 228 INIT_LIST_HEAD(&host->h_lockowners);
227 spin_lock_init(&host->h_lock); 229 spin_lock_init(&host->h_lock);
@@ -272,6 +274,7 @@ nlm_destroy_host(struct nlm_host *host)
272 * @protocol: transport protocol to use 274 * @protocol: transport protocol to use
273 * @version: NLM protocol version 275 * @version: NLM protocol version
274 * @hostname: '\0'-terminated hostname of server 276 * @hostname: '\0'-terminated hostname of server
277 * @noresvport: 1 if non-privileged port should be used
275 * 278 *
276 * Returns an nlm_host structure that matches the passed-in 279 * Returns an nlm_host structure that matches the passed-in
277 * [server address, transport protocol, NLM version, server hostname]. 280 * [server address, transport protocol, NLM version, server hostname].
@@ -281,7 +284,9 @@ nlm_destroy_host(struct nlm_host *host)
281struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, 284struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
282 const size_t salen, 285 const size_t salen,
283 const unsigned short protocol, 286 const unsigned short protocol,
284 const u32 version, const char *hostname) 287 const u32 version,
288 const char *hostname,
289 int noresvport)
285{ 290{
286 const struct sockaddr source = { 291 const struct sockaddr source = {
287 .sa_family = AF_UNSPEC, 292 .sa_family = AF_UNSPEC,
@@ -296,6 +301,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
296 .hostname_len = strlen(hostname), 301 .hostname_len = strlen(hostname),
297 .src_sap = &source, 302 .src_sap = &source,
298 .src_len = sizeof(source), 303 .src_len = sizeof(source),
304 .noresvport = noresvport,
299 }; 305 };
300 306
301 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, 307 dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
@@ -417,6 +423,8 @@ nlm_bind_host(struct nlm_host *host)
417 */ 423 */
418 if (!host->h_server) 424 if (!host->h_server)
419 args.flags |= RPC_CLNT_CREATE_HARDRTRY; 425 args.flags |= RPC_CLNT_CREATE_HARDRTRY;
426 if (host->h_noresvport)
427 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
420 428
421 clnt = rpc_create(&args); 429 clnt = rpc_create(&args);
422 if (!IS_ERR(clnt)) 430 if (!IS_ERR(clnt))
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 56b076736b56..252d80163d02 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -45,7 +45,7 @@
45static struct svc_program nlmsvc_program; 45static struct svc_program nlmsvc_program;
46 46
47struct nlmsvc_binding * nlmsvc_ops; 47struct nlmsvc_binding * nlmsvc_ops;
48EXPORT_SYMBOL(nlmsvc_ops); 48EXPORT_SYMBOL_GPL(nlmsvc_ops);
49 49
50static DEFINE_MUTEX(nlmsvc_mutex); 50static DEFINE_MUTEX(nlmsvc_mutex);
51static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
@@ -300,7 +300,7 @@ out:
300 mutex_unlock(&nlmsvc_mutex); 300 mutex_unlock(&nlmsvc_mutex);
301 return error; 301 return error;
302} 302}
303EXPORT_SYMBOL(lockd_up); 303EXPORT_SYMBOL_GPL(lockd_up);
304 304
305/* 305/*
306 * Decrement the user count and bring down lockd if we're the last. 306 * Decrement the user count and bring down lockd if we're the last.
@@ -329,7 +329,7 @@ lockd_down(void)
329out: 329out:
330 mutex_unlock(&nlmsvc_mutex); 330 mutex_unlock(&nlmsvc_mutex);
331} 331}
332EXPORT_SYMBOL(lockd_down); 332EXPORT_SYMBOL_GPL(lockd_down);
333 333
334#ifdef CONFIG_SYSCTL 334#ifdef CONFIG_SYSCTL
335 335
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c2e9cfd9e5a4..3e634f2a1083 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -16,6 +16,7 @@
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/freezer.h> 17#include <linux/freezer.h>
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/sunrpc/svcauth_gss.h>
19 20
20#include <net/inet_sock.h> 21#include <net/inet_sock.h>
21 22
@@ -182,10 +183,34 @@ void nfs_callback_down(void)
182 mutex_unlock(&nfs_callback_mutex); 183 mutex_unlock(&nfs_callback_mutex);
183} 184}
184 185
186static int check_gss_callback_principal(struct nfs_client *clp,
187 struct svc_rqst *rqstp)
188{
189 struct rpc_clnt *r = clp->cl_rpcclient;
190 char *p = svc_gss_principal(rqstp);
191
192 /*
193 * It might just be a normal user principal, in which case
194 * userspace won't bother to tell us the name at all.
195 */
196 if (p == NULL)
197 return SVC_DENIED;
198
199 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
200
201 if (memcmp(p, "nfs@", 4) != 0)
202 return SVC_DENIED;
203 p += 4;
204 if (strcmp(p, r->cl_server) != 0)
205 return SVC_DENIED;
206 return SVC_OK;
207}
208
185static int nfs_callback_authenticate(struct svc_rqst *rqstp) 209static int nfs_callback_authenticate(struct svc_rqst *rqstp)
186{ 210{
187 struct nfs_client *clp; 211 struct nfs_client *clp;
188 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 212 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
213 int ret = SVC_OK;
189 214
190 /* Don't talk to strangers */ 215 /* Don't talk to strangers */
191 clp = nfs_find_client(svc_addr(rqstp), 4); 216 clp = nfs_find_client(svc_addr(rqstp), 4);
@@ -194,21 +219,22 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp)
194 219
195 dprintk("%s: %s NFSv4 callback!\n", __func__, 220 dprintk("%s: %s NFSv4 callback!\n", __func__,
196 svc_print_addr(rqstp, buf, sizeof(buf))); 221 svc_print_addr(rqstp, buf, sizeof(buf)));
197 nfs_put_client(clp);
198 222
199 switch (rqstp->rq_authop->flavour) { 223 switch (rqstp->rq_authop->flavour) {
200 case RPC_AUTH_NULL: 224 case RPC_AUTH_NULL:
201 if (rqstp->rq_proc != CB_NULL) 225 if (rqstp->rq_proc != CB_NULL)
202 return SVC_DENIED; 226 ret = SVC_DENIED;
203 break; 227 break;
204 case RPC_AUTH_UNIX: 228 case RPC_AUTH_UNIX:
205 break; 229 break;
206 case RPC_AUTH_GSS: 230 case RPC_AUTH_GSS:
207 /* FIXME: RPCSEC_GSS handling? */ 231 ret = check_gss_callback_principal(clp, rqstp);
232 break;
208 default: 233 default:
209 return SVC_DENIED; 234 ret = SVC_DENIED;
210 } 235 }
211 return SVC_OK; 236 nfs_put_client(clp);
237 return ret;
212} 238}
213 239
214/* 240/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7547600b6174..9b728f3565a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
143 clp->cl_proto = cl_init->proto; 143 clp->cl_proto = cl_init->proto;
144 144
145#ifdef CONFIG_NFS_V4 145#ifdef CONFIG_NFS_V4
146 init_rwsem(&clp->cl_sem);
147 INIT_LIST_HEAD(&clp->cl_delegations); 146 INIT_LIST_HEAD(&clp->cl_delegations);
148 spin_lock_init(&clp->cl_lock); 147 spin_lock_init(&clp->cl_lock);
149 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state); 148 INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
@@ -224,31 +223,54 @@ void nfs_put_client(struct nfs_client *clp)
224 } 223 }
225} 224}
226 225
227static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1, 226#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 const struct sockaddr_in *sa2) 227static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
229{ 228{
230 return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr; 229 switch (sa->sa_family) {
230 default:
231 return NULL;
232 case AF_INET6:
233 return &((const struct sockaddr_in6 *)sa)->sin6_addr;
234 break;
235 case AF_INET:
236 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
237 addr_mapped);
238 return addr_mapped;
239 }
231} 240}
232 241
233static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1, 242static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
234 const struct sockaddr_in6 *sa2) 243 const struct sockaddr *sa2)
244{
245 const struct in6_addr *addr1;
246 const struct in6_addr *addr2;
247 struct in6_addr addr1_mapped;
248 struct in6_addr addr2_mapped;
249
250 addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
251 if (likely(addr1 != NULL)) {
252 addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
253 if (likely(addr2 != NULL))
254 return ipv6_addr_equal(addr1, addr2);
255 }
256 return 0;
257}
258#else
259static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
260 const struct sockaddr_in *sa2)
235{ 261{
236 return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr); 262 return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
237} 263}
238 264
239static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, 265static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
240 const struct sockaddr *sa2) 266 const struct sockaddr *sa2)
241{ 267{
242 switch (sa1->sa_family) { 268 if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
243 case AF_INET: 269 return 0;
244 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1, 270 return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
245 (const struct sockaddr_in *)sa2); 271 (const struct sockaddr_in *)sa2);
246 case AF_INET6:
247 return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
248 (const struct sockaddr_in6 *)sa2);
249 }
250 BUG();
251} 272}
273#endif
252 274
253/* 275/*
254 * Find a client by IP address and protocol version 276 * Find a client by IP address and protocol version
@@ -270,8 +292,6 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
270 if (clp->rpc_ops->version != nfsversion) 292 if (clp->rpc_ops->version != nfsversion)
271 continue; 293 continue;
272 294
273 if (addr->sa_family != clap->sa_family)
274 continue;
275 /* Match only the IP address, not the port number */ 295 /* Match only the IP address, not the port number */
276 if (!nfs_sockaddr_match_ipaddr(addr, clap)) 296 if (!nfs_sockaddr_match_ipaddr(addr, clap))
277 continue; 297 continue;
@@ -305,8 +325,6 @@ struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
305 if (clp->rpc_ops->version != nfsvers) 325 if (clp->rpc_ops->version != nfsvers)
306 continue; 326 continue;
307 327
308 if (sap->sa_family != clap->sa_family)
309 continue;
310 /* Match only the IP address, not the port number */ 328 /* Match only the IP address, not the port number */
311 if (!nfs_sockaddr_match_ipaddr(sap, clap)) 329 if (!nfs_sockaddr_match_ipaddr(sap, clap))
312 continue; 330 continue;
@@ -470,7 +488,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
470static int nfs_create_rpc_client(struct nfs_client *clp, 488static int nfs_create_rpc_client(struct nfs_client *clp,
471 const struct rpc_timeout *timeparms, 489 const struct rpc_timeout *timeparms,
472 rpc_authflavor_t flavor, 490 rpc_authflavor_t flavor,
473 int flags) 491 int discrtry, int noresvport)
474{ 492{
475 struct rpc_clnt *clnt = NULL; 493 struct rpc_clnt *clnt = NULL;
476 struct rpc_create_args args = { 494 struct rpc_create_args args = {
@@ -482,9 +500,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
482 .program = &nfs_program, 500 .program = &nfs_program,
483 .version = clp->rpc_ops->version, 501 .version = clp->rpc_ops->version,
484 .authflavor = flavor, 502 .authflavor = flavor,
485 .flags = flags,
486 }; 503 };
487 504
505 if (discrtry)
506 args.flags |= RPC_CLNT_CREATE_DISCRTRY;
507 if (noresvport)
508 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
509
488 if (!IS_ERR(clp->cl_rpcclient)) 510 if (!IS_ERR(clp->cl_rpcclient))
489 return 0; 511 return 0;
490 512
@@ -522,6 +544,8 @@ static int nfs_start_lockd(struct nfs_server *server)
522 .protocol = server->flags & NFS_MOUNT_TCP ? 544 .protocol = server->flags & NFS_MOUNT_TCP ?
523 IPPROTO_TCP : IPPROTO_UDP, 545 IPPROTO_TCP : IPPROTO_UDP,
524 .nfs_version = clp->rpc_ops->version, 546 .nfs_version = clp->rpc_ops->version,
547 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
548 1 : 0,
525 }; 549 };
526 550
527 if (nlm_init.nfs_version > 3) 551 if (nlm_init.nfs_version > 3)
@@ -623,7 +647,8 @@ static int nfs_init_client(struct nfs_client *clp,
623 * Create a client RPC handle for doing FSSTAT with UNIX auth only 647 * Create a client RPC handle for doing FSSTAT with UNIX auth only
624 * - RFC 2623, sec 2.3.2 648 * - RFC 2623, sec 2.3.2
625 */ 649 */
626 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0); 650 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
651 0, data->flags & NFS_MOUNT_NORESVPORT);
627 if (error < 0) 652 if (error < 0)
628 goto error; 653 goto error;
629 nfs_mark_client_ready(clp, NFS_CS_READY); 654 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -965,7 +990,8 @@ error:
965static int nfs4_init_client(struct nfs_client *clp, 990static int nfs4_init_client(struct nfs_client *clp,
966 const struct rpc_timeout *timeparms, 991 const struct rpc_timeout *timeparms,
967 const char *ip_addr, 992 const char *ip_addr,
968 rpc_authflavor_t authflavour) 993 rpc_authflavor_t authflavour,
994 int flags)
969{ 995{
970 int error; 996 int error;
971 997
@@ -979,7 +1005,7 @@ static int nfs4_init_client(struct nfs_client *clp,
979 clp->rpc_ops = &nfs_v4_clientops; 1005 clp->rpc_ops = &nfs_v4_clientops;
980 1006
981 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1007 error = nfs_create_rpc_client(clp, timeparms, authflavour,
982 RPC_CLNT_CREATE_DISCRTRY); 1008 1, flags & NFS_MOUNT_NORESVPORT);
983 if (error < 0) 1009 if (error < 0)
984 goto error; 1010 goto error;
985 memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1011 memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1030,7 +1056,8 @@ static int nfs4_set_client(struct nfs_server *server,
1030 error = PTR_ERR(clp); 1056 error = PTR_ERR(clp);
1031 goto error; 1057 goto error;
1032 } 1058 }
1033 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); 1059 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
1060 server->flags);
1034 if (error < 0) 1061 if (error < 0)
1035 goto error_put; 1062 goto error_put;
1036 1063
@@ -1059,6 +1086,10 @@ static int nfs4_init_server(struct nfs_server *server,
1059 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 1086 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
1060 data->timeo, data->retrans); 1087 data->timeo, data->retrans);
1061 1088
1089 /* Initialise the client representation from the mount data */
1090 server->flags = data->flags;
1091 server->caps |= NFS_CAP_ATOMIC_OPEN;
1092
1062 /* Get a client record */ 1093 /* Get a client record */
1063 error = nfs4_set_client(server, 1094 error = nfs4_set_client(server,
1064 data->nfs_server.hostname, 1095 data->nfs_server.hostname,
@@ -1071,10 +1102,6 @@ static int nfs4_init_server(struct nfs_server *server,
1071 if (error < 0) 1102 if (error < 0)
1072 goto error; 1103 goto error;
1073 1104
1074 /* Initialise the client representation from the mount data */
1075 server->flags = data->flags;
1076 server->caps |= NFS_CAP_ATOMIC_OPEN;
1077
1078 if (data->rsize) 1105 if (data->rsize)
1079 server->rsize = nfs_block_size(data->rsize, NULL); 1106 server->rsize = nfs_block_size(data->rsize, NULL);
1080 if (data->wsize) 1107 if (data->wsize)
@@ -1177,6 +1204,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1177 parent_server = NFS_SB(data->sb); 1204 parent_server = NFS_SB(data->sb);
1178 parent_client = parent_server->nfs_client; 1205 parent_client = parent_server->nfs_client;
1179 1206
1207 /* Initialise the client representation from the parent server */
1208 nfs_server_copy_userdata(server, parent_server);
1209 server->caps |= NFS_CAP_ATOMIC_OPEN;
1210
1180 /* Get a client representation. 1211 /* Get a client representation.
1181 * Note: NFSv4 always uses TCP, */ 1212 * Note: NFSv4 always uses TCP, */
1182 error = nfs4_set_client(server, data->hostname, 1213 error = nfs4_set_client(server, data->hostname,
@@ -1189,10 +1220,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1189 if (error < 0) 1220 if (error < 0)
1190 goto error; 1221 goto error;
1191 1222
1192 /* Initialise the client representation from the parent server */
1193 nfs_server_copy_userdata(server, parent_server);
1194 server->caps |= NFS_CAP_ATOMIC_OPEN;
1195
1196 error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); 1223 error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
1197 if (error < 0) 1224 if (error < 0)
1198 goto error; 1225 goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cc563cfa6940..968225a88015 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -43,6 +43,27 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
43 put_rpccred(cred); 43 put_rpccred(cred);
44} 44}
45 45
46void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
47{
48 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
49}
50
51int nfs_have_delegation(struct inode *inode, fmode_t flags)
52{
53 struct nfs_delegation *delegation;
54 int ret = 0;
55
56 flags &= FMODE_READ|FMODE_WRITE;
57 rcu_read_lock();
58 delegation = rcu_dereference(NFS_I(inode)->delegation);
59 if (delegation != NULL && (delegation->type & flags) == flags) {
60 nfs_mark_delegation_referenced(delegation);
61 ret = 1;
62 }
63 rcu_read_unlock();
64 return ret;
65}
66
46static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 67static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
47{ 68{
48 struct inode *inode = state->inode; 69 struct inode *inode = state->inode;
@@ -119,7 +140,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
119 delegation->maxsize = res->maxsize; 140 delegation->maxsize = res->maxsize;
120 oldcred = delegation->cred; 141 oldcred = delegation->cred;
121 delegation->cred = get_rpccred(cred); 142 delegation->cred = get_rpccred(cred);
122 delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; 143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
123 NFS_I(inode)->delegation_state = delegation->type; 144 NFS_I(inode)->delegation_state = delegation->type;
124 smp_wmb(); 145 smp_wmb();
125 put_rpccred(oldcred); 146 put_rpccred(oldcred);
@@ -134,19 +155,35 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
134 return res; 155 return res;
135} 156}
136 157
158static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation)
159{
160 struct inode *inode = NULL;
161
162 spin_lock(&delegation->lock);
163 if (delegation->inode != NULL)
164 inode = igrab(delegation->inode);
165 spin_unlock(&delegation->lock);
166 return inode;
167}
168
137static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
138{ 170{
139 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
140 172
141 if (delegation == NULL) 173 if (delegation == NULL)
142 goto nomatch; 174 goto nomatch;
175 spin_lock(&delegation->lock);
143 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data, 176 if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
144 sizeof(delegation->stateid.data)) != 0) 177 sizeof(delegation->stateid.data)) != 0)
145 goto nomatch; 178 goto nomatch_unlock;
146 list_del_rcu(&delegation->super_list); 179 list_del_rcu(&delegation->super_list);
180 delegation->inode = NULL;
147 nfsi->delegation_state = 0; 181 nfsi->delegation_state = 0;
148 rcu_assign_pointer(nfsi->delegation, NULL); 182 rcu_assign_pointer(nfsi->delegation, NULL);
183 spin_unlock(&delegation->lock);
149 return delegation; 184 return delegation;
185nomatch_unlock:
186 spin_unlock(&delegation->lock);
150nomatch: 187nomatch:
151 return NULL; 188 return NULL;
152} 189}
@@ -172,6 +209,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
172 delegation->change_attr = nfsi->change_attr; 209 delegation->change_attr = nfsi->change_attr;
173 delegation->cred = get_rpccred(cred); 210 delegation->cred = get_rpccred(cred);
174 delegation->inode = inode; 211 delegation->inode = inode;
212 delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
213 spin_lock_init(&delegation->lock);
175 214
176 spin_lock(&clp->cl_lock); 215 spin_lock(&clp->cl_lock);
177 if (rcu_dereference(nfsi->delegation) != NULL) { 216 if (rcu_dereference(nfsi->delegation) != NULL) {
@@ -226,22 +265,47 @@ static void nfs_msync_inode(struct inode *inode)
226 */ 265 */
227static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) 266static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
228{ 267{
229 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
230 struct nfs_inode *nfsi = NFS_I(inode); 268 struct nfs_inode *nfsi = NFS_I(inode);
231 269
232 nfs_msync_inode(inode); 270 nfs_msync_inode(inode);
233 down_read(&clp->cl_sem);
234 /* Guard against new delegated open calls */ 271 /* Guard against new delegated open calls */
235 down_write(&nfsi->rwsem); 272 down_write(&nfsi->rwsem);
236 nfs_delegation_claim_opens(inode, &delegation->stateid); 273 nfs_delegation_claim_opens(inode, &delegation->stateid);
237 up_write(&nfsi->rwsem); 274 up_write(&nfsi->rwsem);
238 up_read(&clp->cl_sem);
239 nfs_msync_inode(inode); 275 nfs_msync_inode(inode);
240 276
241 return nfs_do_return_delegation(inode, delegation, 1); 277 return nfs_do_return_delegation(inode, delegation, 1);
242} 278}
243 279
244/* 280/*
281 * Return all delegations that have been marked for return
282 */
283void nfs_client_return_marked_delegations(struct nfs_client *clp)
284{
285 struct nfs_delegation *delegation;
286 struct inode *inode;
287
288restart:
289 rcu_read_lock();
290 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
291 if (!test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
292 continue;
293 inode = nfs_delegation_grab_inode(delegation);
294 if (inode == NULL)
295 continue;
296 spin_lock(&clp->cl_lock);
297 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
298 spin_unlock(&clp->cl_lock);
299 rcu_read_unlock();
300 if (delegation != NULL)
301 __nfs_inode_return_delegation(inode, delegation);
302 iput(inode);
303 goto restart;
304 }
305 rcu_read_unlock();
306}
307
308/*
245 * This function returns the delegation without reclaiming opens 309 * This function returns the delegation without reclaiming opens
246 * or protecting against delegation reclaims. 310 * or protecting against delegation reclaims.
247 * It is therefore really only safe to be called from 311 * It is therefore really only safe to be called from
@@ -279,83 +343,55 @@ int nfs_inode_return_delegation(struct inode *inode)
279 return err; 343 return err;
280} 344}
281 345
346static void nfs_mark_return_delegation(struct nfs_client *clp, struct nfs_delegation *delegation)
347{
348 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
349 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
350}
351
282/* 352/*
283 * Return all delegations associated to a super block 353 * Return all delegations associated to a super block
284 */ 354 */
285void nfs_return_all_delegations(struct super_block *sb) 355void nfs_super_return_all_delegations(struct super_block *sb)
286{ 356{
287 struct nfs_client *clp = NFS_SB(sb)->nfs_client; 357 struct nfs_client *clp = NFS_SB(sb)->nfs_client;
288 struct nfs_delegation *delegation; 358 struct nfs_delegation *delegation;
289 struct inode *inode;
290 359
291 if (clp == NULL) 360 if (clp == NULL)
292 return; 361 return;
293restart:
294 rcu_read_lock(); 362 rcu_read_lock();
295 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 363 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
296 if (delegation->inode->i_sb != sb) 364 spin_lock(&delegation->lock);
297 continue; 365 if (delegation->inode != NULL && delegation->inode->i_sb == sb)
298 inode = igrab(delegation->inode); 366 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
299 if (inode == NULL) 367 spin_unlock(&delegation->lock);
300 continue;
301 spin_lock(&clp->cl_lock);
302 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
303 spin_unlock(&clp->cl_lock);
304 rcu_read_unlock();
305 if (delegation != NULL)
306 __nfs_inode_return_delegation(inode, delegation);
307 iput(inode);
308 goto restart;
309 } 368 }
310 rcu_read_unlock(); 369 rcu_read_unlock();
370 nfs_client_return_marked_delegations(clp);
311} 371}
312 372
313static int nfs_do_expire_all_delegations(void *ptr) 373static void nfs_client_mark_return_all_delegations(struct nfs_client *clp)
314{ 374{
315 struct nfs_client *clp = ptr;
316 struct nfs_delegation *delegation; 375 struct nfs_delegation *delegation;
317 struct inode *inode;
318 376
319 allow_signal(SIGKILL);
320restart:
321 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
322 goto out;
323 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
324 goto out;
325 rcu_read_lock(); 377 rcu_read_lock();
326 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 378 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
327 inode = igrab(delegation->inode); 379 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
328 if (inode == NULL) 380 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
329 continue;
330 spin_lock(&clp->cl_lock);
331 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
332 spin_unlock(&clp->cl_lock);
333 rcu_read_unlock();
334 if (delegation)
335 __nfs_inode_return_delegation(inode, delegation);
336 iput(inode);
337 goto restart;
338 } 381 }
339 rcu_read_unlock(); 382 rcu_read_unlock();
340out: 383}
341 nfs_put_client(clp); 384
342 module_put_and_exit(0); 385static void nfs_delegation_run_state_manager(struct nfs_client *clp)
386{
387 if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state))
388 nfs4_schedule_state_manager(clp);
343} 389}
344 390
345void nfs_expire_all_delegations(struct nfs_client *clp) 391void nfs_expire_all_delegations(struct nfs_client *clp)
346{ 392{
347 struct task_struct *task; 393 nfs_client_mark_return_all_delegations(clp);
348 394 nfs_delegation_run_state_manager(clp);
349 __module_get(THIS_MODULE);
350 atomic_inc(&clp->cl_count);
351 task = kthread_run(nfs_do_expire_all_delegations, clp,
352 "%s-delegreturn",
353 rpc_peeraddr2str(clp->cl_rpcclient,
354 RPC_DISPLAY_ADDR));
355 if (!IS_ERR(task))
356 return;
357 nfs_put_client(clp);
358 module_put(THIS_MODULE);
359} 395}
360 396
361/* 397/*
@@ -363,68 +399,29 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
363 */ 399 */
364void nfs_handle_cb_pathdown(struct nfs_client *clp) 400void nfs_handle_cb_pathdown(struct nfs_client *clp)
365{ 401{
366 struct nfs_delegation *delegation;
367 struct inode *inode;
368
369 if (clp == NULL) 402 if (clp == NULL)
370 return; 403 return;
371restart: 404 nfs_client_mark_return_all_delegations(clp);
405}
406
407static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *clp)
408{
409 struct nfs_delegation *delegation;
410
372 rcu_read_lock(); 411 rcu_read_lock();
373 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 412 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
374 inode = igrab(delegation->inode); 413 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
375 if (inode == NULL)
376 continue; 414 continue;
377 spin_lock(&clp->cl_lock); 415 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
378 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 416 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
379 spin_unlock(&clp->cl_lock);
380 rcu_read_unlock();
381 if (delegation != NULL)
382 __nfs_inode_return_delegation(inode, delegation);
383 iput(inode);
384 goto restart;
385 } 417 }
386 rcu_read_unlock(); 418 rcu_read_unlock();
387} 419}
388 420
389struct recall_threadargs { 421void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
390 struct inode *inode;
391 struct nfs_client *clp;
392 const nfs4_stateid *stateid;
393
394 struct completion started;
395 int result;
396};
397
398static int recall_thread(void *data)
399{ 422{
400 struct recall_threadargs *args = (struct recall_threadargs *)data; 423 nfs_client_mark_return_unreferenced_delegations(clp);
401 struct inode *inode = igrab(args->inode); 424 nfs_delegation_run_state_manager(clp);
402 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
403 struct nfs_inode *nfsi = NFS_I(inode);
404 struct nfs_delegation *delegation;
405
406 daemonize("nfsv4-delegreturn");
407
408 nfs_msync_inode(inode);
409 down_read(&clp->cl_sem);
410 down_write(&nfsi->rwsem);
411 spin_lock(&clp->cl_lock);
412 delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
413 if (delegation != NULL)
414 args->result = 0;
415 else
416 args->result = -ENOENT;
417 spin_unlock(&clp->cl_lock);
418 complete(&args->started);
419 nfs_delegation_claim_opens(inode, args->stateid);
420 up_write(&nfsi->rwsem);
421 up_read(&clp->cl_sem);
422 nfs_msync_inode(inode);
423
424 if (delegation != NULL)
425 nfs_do_return_delegation(inode, delegation, 1);
426 iput(inode);
427 module_put_and_exit(0);
428} 425}
429 426
430/* 427/*
@@ -432,22 +429,20 @@ static int recall_thread(void *data)
432 */ 429 */
433int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) 430int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid)
434{ 431{
435 struct recall_threadargs data = { 432 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
436 .inode = inode, 433 struct nfs_delegation *delegation;
437 .stateid = stateid,
438 };
439 int status;
440 434
441 init_completion(&data.started); 435 rcu_read_lock();
442 __module_get(THIS_MODULE); 436 delegation = rcu_dereference(NFS_I(inode)->delegation);
443 status = kernel_thread(recall_thread, &data, CLONE_KERNEL); 437 if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
444 if (status < 0) 438 sizeof(delegation->stateid.data)) != 0) {
445 goto out_module_put; 439 rcu_read_unlock();
446 wait_for_completion(&data.started); 440 return -ENOENT;
447 return data.result; 441 }
448out_module_put: 442 nfs_mark_return_delegation(clp, delegation);
449 module_put(THIS_MODULE); 443 rcu_read_unlock();
450 return status; 444 nfs_delegation_run_state_manager(clp);
445 return 0;
451} 446}
452 447
453/* 448/*
@@ -459,10 +454,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
459 struct inode *res = NULL; 454 struct inode *res = NULL;
460 rcu_read_lock(); 455 rcu_read_lock();
461 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 456 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
462 if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { 457 spin_lock(&delegation->lock);
458 if (delegation->inode != NULL &&
459 nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
463 res = igrab(delegation->inode); 460 res = igrab(delegation->inode);
464 break;
465 } 461 }
462 spin_unlock(&delegation->lock);
463 if (res != NULL)
464 break;
466 } 465 }
467 rcu_read_unlock(); 466 rcu_read_unlock();
468 return res; 467 return res;
@@ -476,7 +475,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
476 struct nfs_delegation *delegation; 475 struct nfs_delegation *delegation;
477 rcu_read_lock(); 476 rcu_read_lock();
478 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) 477 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
479 delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; 478 set_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
480 rcu_read_unlock(); 479 rcu_read_unlock();
481} 480}
482 481
@@ -486,17 +485,22 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
486void nfs_delegation_reap_unclaimed(struct nfs_client *clp) 485void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
487{ 486{
488 struct nfs_delegation *delegation; 487 struct nfs_delegation *delegation;
488 struct inode *inode;
489restart: 489restart:
490 rcu_read_lock(); 490 rcu_read_lock();
491 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { 491 list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
492 if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) 492 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0)
493 continue;
494 inode = nfs_delegation_grab_inode(delegation);
495 if (inode == NULL)
493 continue; 496 continue;
494 spin_lock(&clp->cl_lock); 497 spin_lock(&clp->cl_lock);
495 delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL); 498 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
496 spin_unlock(&clp->cl_lock); 499 spin_unlock(&clp->cl_lock);
497 rcu_read_unlock(); 500 rcu_read_unlock();
498 if (delegation != NULL) 501 if (delegation != NULL)
499 nfs_free_delegation(delegation); 502 nfs_free_delegation(delegation);
503 iput(inode);
500 goto restart; 504 goto restart;
501 } 505 }
502 rcu_read_unlock(); 506 rcu_read_unlock();
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index f1c5e2a5d88e..09f383795174 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -17,14 +17,20 @@ struct nfs_delegation {
17 struct rpc_cred *cred; 17 struct rpc_cred *cred;
18 struct inode *inode; 18 struct inode *inode;
19 nfs4_stateid stateid; 19 nfs4_stateid stateid;
20 int type; 20 fmode_t type;
21#define NFS_DELEGATION_NEED_RECLAIM 1
22 long flags;
23 loff_t maxsize; 21 loff_t maxsize;
24 __u64 change_attr; 22 __u64 change_attr;
23 unsigned long flags;
24 spinlock_t lock;
25 struct rcu_head rcu; 25 struct rcu_head rcu;
26}; 26};
27 27
28enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED,
32};
33
28int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
29void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
30int nfs_inode_return_delegation(struct inode *inode); 36int nfs_inode_return_delegation(struct inode *inode);
@@ -32,9 +38,11 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s
32void nfs_inode_return_delegation_noreclaim(struct inode *inode); 38void nfs_inode_return_delegation_noreclaim(struct inode *inode);
33 39
34struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); 40struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
35void nfs_return_all_delegations(struct super_block *sb); 41void nfs_super_return_all_delegations(struct super_block *sb);
36void nfs_expire_all_delegations(struct nfs_client *clp); 42void nfs_expire_all_delegations(struct nfs_client *clp);
43void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
37void nfs_handle_cb_pathdown(struct nfs_client *clp); 44void nfs_handle_cb_pathdown(struct nfs_client *clp);
45void nfs_client_return_marked_delegations(struct nfs_client *clp);
38 46
39void nfs_delegation_mark_reclaim(struct nfs_client *clp); 47void nfs_delegation_mark_reclaim(struct nfs_client *clp);
40void nfs_delegation_reap_unclaimed(struct nfs_client *clp); 48void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
@@ -45,22 +53,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
45int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); 53int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
46int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 54int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
47 55
48static inline int nfs_have_delegation(struct inode *inode, int flags) 56void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
49{ 57int nfs_have_delegation(struct inode *inode, fmode_t flags);
50 struct nfs_delegation *delegation;
51 int ret = 0;
52
53 flags &= FMODE_READ|FMODE_WRITE;
54 rcu_read_lock();
55 delegation = rcu_dereference(NFS_I(inode)->delegation);
56 if (delegation != NULL && (delegation->type & flags) == flags)
57 ret = 1;
58 rcu_read_unlock();
59 return ret;
60}
61 58
62#else 59#else
63static inline int nfs_have_delegation(struct inode *inode, int flags) 60static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
64{ 61{
65 return 0; 62 return 0;
66} 63}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3e64b98f3a93..e35c8199f82f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -799,6 +799,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
799 goto out_bad; 799 goto out_bad;
800 } 800 }
801 801
802 if (nfs_have_delegation(inode, FMODE_READ))
803 goto out_set_verifier;
804
802 /* Force a full look up iff the parent directory has changed */ 805 /* Force a full look up iff the parent directory has changed */
803 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { 806 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
804 if (nfs_lookup_verify_inode(inode, nd)) 807 if (nfs_lookup_verify_inode(inode, nd))
@@ -817,6 +820,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
817 if ((error = nfs_refresh_inode(inode, &fattr)) != 0) 820 if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
818 goto out_bad; 821 goto out_bad;
819 822
823out_set_verifier:
820 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 824 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
821 out_valid: 825 out_valid:
822 dput(parent); 826 dput(parent);
@@ -973,7 +977,7 @@ struct dentry_operations nfs4_dentry_operations = {
973 * Use intent information to determine whether we need to substitute 977 * Use intent information to determine whether we need to substitute
974 * the NFSv4-style stateful OPEN for the LOOKUP call 978 * the NFSv4-style stateful OPEN for the LOOKUP call
975 */ 979 */
976static int is_atomic_open(struct inode *dir, struct nameidata *nd) 980static int is_atomic_open(struct nameidata *nd)
977{ 981{
978 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) 982 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
979 return 0; 983 return 0;
@@ -996,7 +1000,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
996 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1000 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
997 1001
998 /* Check that we are indeed trying to open this file */ 1002 /* Check that we are indeed trying to open this file */
999 if (!is_atomic_open(dir, nd)) 1003 if (!is_atomic_open(nd))
1000 goto no_open; 1004 goto no_open;
1001 1005
1002 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { 1006 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
@@ -1047,10 +1051,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1047 struct inode *dir; 1051 struct inode *dir;
1048 int openflags, ret = 0; 1052 int openflags, ret = 0;
1049 1053
1054 if (!is_atomic_open(nd))
1055 goto no_open;
1050 parent = dget_parent(dentry); 1056 parent = dget_parent(dentry);
1051 dir = parent->d_inode; 1057 dir = parent->d_inode;
1052 if (!is_atomic_open(dir, nd))
1053 goto no_open;
1054 /* We can't create new files in nfs_open_revalidate(), so we 1058 /* We can't create new files in nfs_open_revalidate(), so we
1055 * optimize away revalidation of negative dentries. 1059 * optimize away revalidation of negative dentries.
1056 */ 1060 */
@@ -1062,11 +1066,11 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1062 1066
1063 /* NFS only supports OPEN on regular files */ 1067 /* NFS only supports OPEN on regular files */
1064 if (!S_ISREG(inode->i_mode)) 1068 if (!S_ISREG(inode->i_mode))
1065 goto no_open; 1069 goto no_open_dput;
1066 openflags = nd->intent.open.flags; 1070 openflags = nd->intent.open.flags;
1067 /* We cannot do exclusive creation on a positive dentry */ 1071 /* We cannot do exclusive creation on a positive dentry */
1068 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1072 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
1069 goto no_open; 1073 goto no_open_dput;
1070 /* We can't create new files, or truncate existing ones here */ 1074 /* We can't create new files, or truncate existing ones here */
1071 openflags &= ~(O_CREAT|O_TRUNC); 1075 openflags &= ~(O_CREAT|O_TRUNC);
1072 1076
@@ -1081,10 +1085,9 @@ out:
1081 if (!ret) 1085 if (!ret)
1082 d_drop(dentry); 1086 d_drop(dentry);
1083 return ret; 1087 return ret;
1084no_open: 1088no_open_dput:
1085 dput(parent); 1089 dput(parent);
1086 if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) 1090no_open:
1087 return 1;
1088 return nfs_lookup_revalidate(dentry, nd); 1091 return nfs_lookup_revalidate(dentry, nd);
1089} 1092}
1090#endif /* CONFIG_NFSV4 */ 1093#endif /* CONFIG_NFSV4 */
@@ -1794,7 +1797,8 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
1794 cache = nfs_access_search_rbtree(inode, cred); 1797 cache = nfs_access_search_rbtree(inode, cred);
1795 if (cache == NULL) 1798 if (cache == NULL)
1796 goto out; 1799 goto out;
1797 if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) 1800 if (!nfs_have_delegation(inode, FMODE_READ) &&
1801 !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
1798 goto out_stale; 1802 goto out_stale;
1799 res->jiffies = cache->jiffies; 1803 res->jiffies = cache->jiffies;
1800 res->cred = cache->cred; 1804 res->cred = cache->cred;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d22eb383e1cf..0c381686171e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -592,7 +592,7 @@ static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context
592/* 592/*
593 * Given an inode, search for an open context with the desired characteristics 593 * Given an inode, search for an open context with the desired characteristics
594 */ 594 */
595struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode) 595struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode)
596{ 596{
597 struct nfs_inode *nfsi = NFS_I(inode); 597 struct nfs_inode *nfsi = NFS_I(inode);
598 struct nfs_open_context *pos, *ctx = NULL; 598 struct nfs_open_context *pos, *ctx = NULL;
@@ -712,14 +712,7 @@ int nfs_attribute_timeout(struct inode *inode)
712 712
713 if (nfs_have_delegation(inode, FMODE_READ)) 713 if (nfs_have_delegation(inode, FMODE_READ))
714 return 0; 714 return 0;
715 /* 715 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
716 * Special case: if the attribute timeout is set to 0, then always
717 * treat the cache as having expired (unless holding
718 * a delegation).
719 */
720 if (nfsi->attrtimeo == 0)
721 return 1;
722 return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
723} 716}
724 717
725/** 718/**
@@ -1182,7 +1175,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1182 nfsi->attrtimeo_timestamp = now; 1175 nfsi->attrtimeo_timestamp = now;
1183 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1176 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1184 } else { 1177 } else {
1185 if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1178 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
1186 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) 1179 if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
1187 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1180 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1188 nfsi->attrtimeo_timestamp = now; 1181 nfsi->attrtimeo_timestamp = now;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d212ee41caf2..340ede8f608f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,20 @@ struct nfs_parsed_mount_data {
63 struct security_mnt_opts lsm_opts; 63 struct security_mnt_opts lsm_opts;
64}; 64};
65 65
66/* mount_clnt.c */
67struct nfs_mount_request {
68 struct sockaddr *sap;
69 size_t salen;
70 char *hostname;
71 char *dirpath;
72 u32 version;
73 unsigned short protocol;
74 struct nfs_fh *fh;
75 int noresvport;
76};
77
78extern int nfs_mount(struct nfs_mount_request *info);
79
66/* client.c */ 80/* client.c */
67extern struct rpc_program nfs_program; 81extern struct rpc_program nfs_program;
68 82
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 086a6830d785..ca905a5bb1ba 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -29,47 +29,43 @@ struct mnt_fhstatus {
29 29
30/** 30/**
31 * nfs_mount - Obtain an NFS file handle for the given host and path 31 * nfs_mount - Obtain an NFS file handle for the given host and path
32 * @addr: pointer to server's address 32 * @info: pointer to mount request arguments
33 * @len: size of server's address
34 * @hostname: name of server host, or NULL
35 * @path: pointer to string containing export path to mount
36 * @version: mount version to use for this request
37 * @protocol: transport protocol to use for thie request
38 * @fh: pointer to location to place returned file handle
39 * 33 *
40 * Uses default timeout parameters specified by underlying transport. 34 * Uses default timeout parameters specified by underlying transport.
41 */ 35 */
42int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, 36int nfs_mount(struct nfs_mount_request *info)
43 int version, int protocol, struct nfs_fh *fh)
44{ 37{
45 struct mnt_fhstatus result = { 38 struct mnt_fhstatus result = {
46 .fh = fh 39 .fh = info->fh
47 }; 40 };
48 struct rpc_message msg = { 41 struct rpc_message msg = {
49 .rpc_argp = path, 42 .rpc_argp = info->dirpath,
50 .rpc_resp = &result, 43 .rpc_resp = &result,
51 }; 44 };
52 struct rpc_create_args args = { 45 struct rpc_create_args args = {
53 .protocol = protocol, 46 .protocol = info->protocol,
54 .address = addr, 47 .address = info->sap,
55 .addrsize = len, 48 .addrsize = info->salen,
56 .servername = hostname, 49 .servername = info->hostname,
57 .program = &mnt_program, 50 .program = &mnt_program,
58 .version = version, 51 .version = info->version,
59 .authflavor = RPC_AUTH_UNIX, 52 .authflavor = RPC_AUTH_UNIX,
60 .flags = 0,
61 }; 53 };
62 struct rpc_clnt *mnt_clnt; 54 struct rpc_clnt *mnt_clnt;
63 int status; 55 int status;
64 56
65 dprintk("NFS: sending MNT request for %s:%s\n", 57 dprintk("NFS: sending MNT request for %s:%s\n",
66 (hostname ? hostname : "server"), path); 58 (info->hostname ? info->hostname : "server"),
59 info->dirpath);
60
61 if (info->noresvport)
62 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
67 63
68 mnt_clnt = rpc_create(&args); 64 mnt_clnt = rpc_create(&args);
69 if (IS_ERR(mnt_clnt)) 65 if (IS_ERR(mnt_clnt))
70 goto out_clnt_err; 66 goto out_clnt_err;
71 67
72 if (version == NFS_MNT3_VERSION) 68 if (info->version == NFS_MNT3_VERSION)
73 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; 69 msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
74 else 70 else
75 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; 71 msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ea790645fda6..4e4d33204376 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -38,8 +38,12 @@ struct idmap;
38 ((err) != NFSERR_NOFILEHANDLE)) 38 ((err) != NFSERR_NOFILEHANDLE))
39 39
40enum nfs4_client_state { 40enum nfs4_client_state {
41 NFS4CLNT_STATE_RECOVER = 0, 41 NFS4CLNT_MANAGER_RUNNING = 0,
42 NFS4CLNT_CHECK_LEASE,
42 NFS4CLNT_LEASE_EXPIRED, 43 NFS4CLNT_LEASE_EXPIRED,
44 NFS4CLNT_RECLAIM_REBOOT,
45 NFS4CLNT_RECLAIM_NOGRACE,
46 NFS4CLNT_DELEGRETURN,
43}; 47};
44 48
45/* 49/*
@@ -90,12 +94,18 @@ struct nfs4_state_owner {
90 94
91 spinlock_t so_lock; 95 spinlock_t so_lock;
92 atomic_t so_count; 96 atomic_t so_count;
97 unsigned long so_flags;
93 struct list_head so_states; 98 struct list_head so_states;
94 struct list_head so_delegations; 99 struct list_head so_delegations;
95 struct nfs_seqid_counter so_seqid; 100 struct nfs_seqid_counter so_seqid;
96 struct rpc_sequence so_sequence; 101 struct rpc_sequence so_sequence;
97}; 102};
98 103
104enum {
105 NFS_OWNER_RECLAIM_REBOOT,
106 NFS_OWNER_RECLAIM_NOGRACE
107};
108
99/* 109/*
100 * struct nfs4_state maintains the client-side state for a given 110 * struct nfs4_state maintains the client-side state for a given
101 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). 111 * (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -128,6 +138,8 @@ enum {
128 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */ 138 NFS_O_RDONLY_STATE, /* OPEN stateid has read-only state */
129 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */ 139 NFS_O_WRONLY_STATE, /* OPEN stateid has write-only state */
130 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */ 140 NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
141 NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
142 NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
131}; 143};
132 144
133struct nfs4_state { 145struct nfs4_state {
@@ -149,7 +161,7 @@ struct nfs4_state {
149 unsigned int n_rdonly; /* Number of read-only references */ 161 unsigned int n_rdonly; /* Number of read-only references */
150 unsigned int n_wronly; /* Number of write-only references */ 162 unsigned int n_wronly; /* Number of write-only references */
151 unsigned int n_rdwr; /* Number of read/write references */ 163 unsigned int n_rdwr; /* Number of read/write references */
152 int state; /* State on the server (R,W, or RW) */ 164 fmode_t state; /* State on the server (R,W, or RW) */
153 atomic_t count; 165 atomic_t count;
154}; 166};
155 167
@@ -157,9 +169,12 @@ struct nfs4_state {
157struct nfs4_exception { 169struct nfs4_exception {
158 long timeout; 170 long timeout;
159 int retry; 171 int retry;
172 struct nfs4_state *state;
160}; 173};
161 174
162struct nfs4_state_recovery_ops { 175struct nfs4_state_recovery_ops {
176 int owner_flag_bit;
177 int state_flag_bit;
163 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); 178 int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
164 int (*recover_lock)(struct nfs4_state *, struct file_lock *); 179 int (*recover_lock)(struct nfs4_state *, struct file_lock *);
165}; 180};
@@ -174,7 +189,6 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
174 189
175 190
176/* nfs4proc.c */ 191/* nfs4proc.c */
177extern int nfs4_map_errors(int err);
178extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); 192extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
179extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 193extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
180extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 194extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
@@ -187,7 +201,7 @@ extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
187 struct nfs4_fs_locations *fs_locations, struct page *page); 201 struct nfs4_fs_locations *fs_locations, struct page *page);
188 202
189extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; 203extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
190extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; 204extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops;
191 205
192extern const u32 nfs4_fattr_bitmap[2]; 206extern const u32 nfs4_fattr_bitmap[2];
193extern const u32 nfs4_statfs_bitmap[2]; 207extern const u32 nfs4_statfs_bitmap[2];
@@ -202,16 +216,18 @@ extern void nfs4_kill_renewd(struct nfs_client *);
202extern void nfs4_renew_state(struct work_struct *); 216extern void nfs4_renew_state(struct work_struct *);
203 217
204/* nfs4state.c */ 218/* nfs4state.c */
205struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp); 219struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
206 220
207extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 221extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
208extern void nfs4_put_state_owner(struct nfs4_state_owner *); 222extern void nfs4_put_state_owner(struct nfs4_state_owner *);
209extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 223extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
210extern void nfs4_put_open_state(struct nfs4_state *); 224extern void nfs4_put_open_state(struct nfs4_state *);
211extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); 225extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
212extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t); 226extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
213extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); 227extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
214extern void nfs4_schedule_state_recovery(struct nfs_client *); 228extern void nfs4_schedule_state_recovery(struct nfs_client *);
229extern void nfs4_schedule_state_manager(struct nfs_client *);
230extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
215extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 231extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
216extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 232extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
217extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 233extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 83e700a2b0c0..8dde84b988d9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -62,14 +62,12 @@
62struct nfs4_opendata; 62struct nfs4_opendata;
63static int _nfs4_proc_open(struct nfs4_opendata *data); 63static int _nfs4_proc_open(struct nfs4_opendata *data);
64static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 64static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
65static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); 65static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
66static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
67static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
68static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 66static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
69static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 67static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
70 68
71/* Prevent leaks of NFSv4 errors into userland */ 69/* Prevent leaks of NFSv4 errors into userland */
72int nfs4_map_errors(int err) 70static int nfs4_map_errors(int err)
73{ 71{
74 if (err < -1000) { 72 if (err < -1000) {
75 dprintk("%s could not handle NFSv4 error %d\n", 73 dprintk("%s could not handle NFSv4 error %d\n",
@@ -195,6 +193,83 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
195 kunmap_atomic(start, KM_USER0); 193 kunmap_atomic(start, KM_USER0);
196} 194}
197 195
196static int nfs4_wait_bit_killable(void *word)
197{
198 if (fatal_signal_pending(current))
199 return -ERESTARTSYS;
200 schedule();
201 return 0;
202}
203
204static int nfs4_wait_clnt_recover(struct nfs_client *clp)
205{
206 int res;
207
208 might_sleep();
209
210 res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
211 nfs4_wait_bit_killable, TASK_KILLABLE);
212 return res;
213}
214
215static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
216{
217 int res = 0;
218
219 might_sleep();
220
221 if (*timeout <= 0)
222 *timeout = NFS4_POLL_RETRY_MIN;
223 if (*timeout > NFS4_POLL_RETRY_MAX)
224 *timeout = NFS4_POLL_RETRY_MAX;
225 schedule_timeout_killable(*timeout);
226 if (fatal_signal_pending(current))
227 res = -ERESTARTSYS;
228 *timeout <<= 1;
229 return res;
230}
231
232/* This is the error handling routine for processes that are allowed
233 * to sleep.
234 */
235static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
236{
237 struct nfs_client *clp = server->nfs_client;
238 struct nfs4_state *state = exception->state;
239 int ret = errorcode;
240
241 exception->retry = 0;
242 switch(errorcode) {
243 case 0:
244 return 0;
245 case -NFS4ERR_ADMIN_REVOKED:
246 case -NFS4ERR_BAD_STATEID:
247 case -NFS4ERR_OPENMODE:
248 if (state == NULL)
249 break;
250 nfs4_state_mark_reclaim_nograce(clp, state);
251 case -NFS4ERR_STALE_CLIENTID:
252 case -NFS4ERR_STALE_STATEID:
253 case -NFS4ERR_EXPIRED:
254 nfs4_schedule_state_recovery(clp);
255 ret = nfs4_wait_clnt_recover(clp);
256 if (ret == 0)
257 exception->retry = 1;
258 break;
259 case -NFS4ERR_FILE_OPEN:
260 case -NFS4ERR_GRACE:
261 case -NFS4ERR_DELAY:
262 ret = nfs4_delay(server->client, &exception->timeout);
263 if (ret != 0)
264 break;
265 case -NFS4ERR_OLD_STATEID:
266 exception->retry = 1;
267 }
268 /* We failed to handle the error */
269 return nfs4_map_errors(ret);
270}
271
272
198static void renew_lease(const struct nfs_server *server, unsigned long timestamp) 273static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
199{ 274{
200 struct nfs_client *clp = server->nfs_client; 275 struct nfs_client *clp = server->nfs_client;
@@ -248,7 +323,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
248} 323}
249 324
250static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 325static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
251 struct nfs4_state_owner *sp, int flags, 326 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
252 const struct iattr *attrs) 327 const struct iattr *attrs)
253{ 328{
254 struct dentry *parent = dget_parent(path->dentry); 329 struct dentry *parent = dget_parent(path->dentry);
@@ -268,7 +343,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
268 p->owner = sp; 343 p->owner = sp;
269 atomic_inc(&sp->so_count); 344 atomic_inc(&sp->so_count);
270 p->o_arg.fh = NFS_FH(dir); 345 p->o_arg.fh = NFS_FH(dir);
271 p->o_arg.open_flags = flags, 346 p->o_arg.open_flags = flags;
347 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
272 p->o_arg.clientid = server->nfs_client->cl_clientid; 348 p->o_arg.clientid = server->nfs_client->cl_clientid;
273 p->o_arg.id = sp->so_owner_id.id; 349 p->o_arg.id = sp->so_owner_id.id;
274 p->o_arg.name = &p->path.dentry->d_name; 350 p->o_arg.name = &p->path.dentry->d_name;
@@ -324,10 +400,13 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
324 return ret; 400 return ret;
325} 401}
326 402
327static int can_open_cached(struct nfs4_state *state, int mode) 403static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
328{ 404{
329 int ret = 0; 405 int ret = 0;
330 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { 406
407 if (open_mode & O_EXCL)
408 goto out;
409 switch (mode & (FMODE_READ|FMODE_WRITE)) {
331 case FMODE_READ: 410 case FMODE_READ:
332 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; 411 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
333 break; 412 break;
@@ -337,21 +416,23 @@ static int can_open_cached(struct nfs4_state *state, int mode)
337 case FMODE_READ|FMODE_WRITE: 416 case FMODE_READ|FMODE_WRITE:
338 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; 417 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
339 } 418 }
419out:
340 return ret; 420 return ret;
341} 421}
342 422
343static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags) 423static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
344{ 424{
345 if ((delegation->type & open_flags) != open_flags) 425 if ((delegation->type & fmode) != fmode)
346 return 0; 426 return 0;
347 if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) 427 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
348 return 0; 428 return 0;
429 nfs_mark_delegation_referenced(delegation);
349 return 1; 430 return 1;
350} 431}
351 432
352static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags) 433static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
353{ 434{
354 switch (open_flags) { 435 switch (fmode) {
355 case FMODE_WRITE: 436 case FMODE_WRITE:
356 state->n_wronly++; 437 state->n_wronly++;
357 break; 438 break;
@@ -361,15 +442,15 @@ static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
361 case FMODE_READ|FMODE_WRITE: 442 case FMODE_READ|FMODE_WRITE:
362 state->n_rdwr++; 443 state->n_rdwr++;
363 } 444 }
364 nfs4_state_set_mode_locked(state, state->state | open_flags); 445 nfs4_state_set_mode_locked(state, state->state | fmode);
365} 446}
366 447
367static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 448static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
368{ 449{
369 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) 450 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
370 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); 451 memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
371 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); 452 memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
372 switch (open_flags) { 453 switch (fmode) {
373 case FMODE_READ: 454 case FMODE_READ:
374 set_bit(NFS_O_RDONLY_STATE, &state->flags); 455 set_bit(NFS_O_RDONLY_STATE, &state->flags);
375 break; 456 break;
@@ -381,16 +462,15 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
381 } 462 }
382} 463}
383 464
384static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) 465static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
385{ 466{
386 write_seqlock(&state->seqlock); 467 write_seqlock(&state->seqlock);
387 nfs_set_open_stateid_locked(state, stateid, open_flags); 468 nfs_set_open_stateid_locked(state, stateid, fmode);
388 write_sequnlock(&state->seqlock); 469 write_sequnlock(&state->seqlock);
389} 470}
390 471
391static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags) 472static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
392{ 473{
393 open_flags &= (FMODE_READ|FMODE_WRITE);
394 /* 474 /*
395 * Protect the call to nfs4_state_set_mode_locked and 475 * Protect the call to nfs4_state_set_mode_locked and
396 * serialise the stateid update 476 * serialise the stateid update
@@ -401,20 +481,60 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_sta
401 set_bit(NFS_DELEGATED_STATE, &state->flags); 481 set_bit(NFS_DELEGATED_STATE, &state->flags);
402 } 482 }
403 if (open_stateid != NULL) 483 if (open_stateid != NULL)
404 nfs_set_open_stateid_locked(state, open_stateid, open_flags); 484 nfs_set_open_stateid_locked(state, open_stateid, fmode);
405 write_sequnlock(&state->seqlock); 485 write_sequnlock(&state->seqlock);
406 spin_lock(&state->owner->so_lock); 486 spin_lock(&state->owner->so_lock);
407 update_open_stateflags(state, open_flags); 487 update_open_stateflags(state, fmode);
408 spin_unlock(&state->owner->so_lock); 488 spin_unlock(&state->owner->so_lock);
409} 489}
410 490
411static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags) 491static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode)
492{
493 struct nfs_inode *nfsi = NFS_I(state->inode);
494 struct nfs_delegation *deleg_cur;
495 int ret = 0;
496
497 fmode &= (FMODE_READ|FMODE_WRITE);
498
499 rcu_read_lock();
500 deleg_cur = rcu_dereference(nfsi->delegation);
501 if (deleg_cur == NULL)
502 goto no_delegation;
503
504 spin_lock(&deleg_cur->lock);
505 if (nfsi->delegation != deleg_cur ||
506 (deleg_cur->type & fmode) != fmode)
507 goto no_delegation_unlock;
508
509 if (delegation == NULL)
510 delegation = &deleg_cur->stateid;
511 else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
512 goto no_delegation_unlock;
513
514 nfs_mark_delegation_referenced(deleg_cur);
515 __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode);
516 ret = 1;
517no_delegation_unlock:
518 spin_unlock(&deleg_cur->lock);
519no_delegation:
520 rcu_read_unlock();
521
522 if (!ret && open_stateid != NULL) {
523 __update_open_stateid(state, open_stateid, NULL, fmode);
524 ret = 1;
525 }
526
527 return ret;
528}
529
530
531static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
412{ 532{
413 struct nfs_delegation *delegation; 533 struct nfs_delegation *delegation;
414 534
415 rcu_read_lock(); 535 rcu_read_lock();
416 delegation = rcu_dereference(NFS_I(inode)->delegation); 536 delegation = rcu_dereference(NFS_I(inode)->delegation);
417 if (delegation == NULL || (delegation->type & open_flags) == open_flags) { 537 if (delegation == NULL || (delegation->type & fmode) == fmode) {
418 rcu_read_unlock(); 538 rcu_read_unlock();
419 return; 539 return;
420 } 540 }
@@ -427,27 +547,28 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
427 struct nfs4_state *state = opendata->state; 547 struct nfs4_state *state = opendata->state;
428 struct nfs_inode *nfsi = NFS_I(state->inode); 548 struct nfs_inode *nfsi = NFS_I(state->inode);
429 struct nfs_delegation *delegation; 549 struct nfs_delegation *delegation;
430 int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL); 550 int open_mode = opendata->o_arg.open_flags & O_EXCL;
551 fmode_t fmode = opendata->o_arg.fmode;
431 nfs4_stateid stateid; 552 nfs4_stateid stateid;
432 int ret = -EAGAIN; 553 int ret = -EAGAIN;
433 554
434 rcu_read_lock();
435 delegation = rcu_dereference(nfsi->delegation);
436 for (;;) { 555 for (;;) {
437 if (can_open_cached(state, open_mode)) { 556 if (can_open_cached(state, fmode, open_mode)) {
438 spin_lock(&state->owner->so_lock); 557 spin_lock(&state->owner->so_lock);
439 if (can_open_cached(state, open_mode)) { 558 if (can_open_cached(state, fmode, open_mode)) {
440 update_open_stateflags(state, open_mode); 559 update_open_stateflags(state, fmode);
441 spin_unlock(&state->owner->so_lock); 560 spin_unlock(&state->owner->so_lock);
442 rcu_read_unlock();
443 goto out_return_state; 561 goto out_return_state;
444 } 562 }
445 spin_unlock(&state->owner->so_lock); 563 spin_unlock(&state->owner->so_lock);
446 } 564 }
447 if (delegation == NULL) 565 rcu_read_lock();
448 break; 566 delegation = rcu_dereference(nfsi->delegation);
449 if (!can_open_delegated(delegation, open_mode)) 567 if (delegation == NULL ||
568 !can_open_delegated(delegation, fmode)) {
569 rcu_read_unlock();
450 break; 570 break;
571 }
451 /* Save the delegation */ 572 /* Save the delegation */
452 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); 573 memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
453 rcu_read_unlock(); 574 rcu_read_unlock();
@@ -455,19 +576,11 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
455 if (ret != 0) 576 if (ret != 0)
456 goto out; 577 goto out;
457 ret = -EAGAIN; 578 ret = -EAGAIN;
458 rcu_read_lock(); 579
459 delegation = rcu_dereference(nfsi->delegation); 580 /* Try to update the stateid using the delegation */
460 /* If no delegation, try a cached open */ 581 if (update_open_stateid(state, NULL, &stateid, fmode))
461 if (delegation == NULL) 582 goto out_return_state;
462 continue;
463 /* Is the delegation still valid? */
464 if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
465 continue;
466 rcu_read_unlock();
467 update_open_stateid(state, NULL, &stateid, open_mode);
468 goto out_return_state;
469 } 583 }
470 rcu_read_unlock();
471out: 584out:
472 return ERR_PTR(ret); 585 return ERR_PTR(ret);
473out_return_state: 586out_return_state:
@@ -480,7 +593,6 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
480 struct inode *inode; 593 struct inode *inode;
481 struct nfs4_state *state = NULL; 594 struct nfs4_state *state = NULL;
482 struct nfs_delegation *delegation; 595 struct nfs_delegation *delegation;
483 nfs4_stateid *deleg_stateid = NULL;
484 int ret; 596 int ret;
485 597
486 if (!data->rpc_done) { 598 if (!data->rpc_done) {
@@ -507,7 +619,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
507 if (delegation) 619 if (delegation)
508 delegation_flags = delegation->flags; 620 delegation_flags = delegation->flags;
509 rcu_read_unlock(); 621 rcu_read_unlock();
510 if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM)) 622 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
511 nfs_inode_set_delegation(state->inode, 623 nfs_inode_set_delegation(state->inode,
512 data->owner->so_cred, 624 data->owner->so_cred,
513 &data->o_res); 625 &data->o_res);
@@ -516,12 +628,9 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
516 data->owner->so_cred, 628 data->owner->so_cred,
517 &data->o_res); 629 &data->o_res);
518 } 630 }
519 rcu_read_lock(); 631
520 delegation = rcu_dereference(NFS_I(inode)->delegation); 632 update_open_stateid(state, &data->o_res.stateid, NULL,
521 if (delegation != NULL) 633 data->o_arg.fmode);
522 deleg_stateid = &delegation->stateid;
523 update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
524 rcu_read_unlock();
525 iput(inode); 634 iput(inode);
526out: 635out:
527 return state; 636 return state;
@@ -552,7 +661,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
552{ 661{
553 struct nfs4_opendata *opendata; 662 struct nfs4_opendata *opendata;
554 663
555 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); 664 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
556 if (opendata == NULL) 665 if (opendata == NULL)
557 return ERR_PTR(-ENOMEM); 666 return ERR_PTR(-ENOMEM);
558 opendata->state = state; 667 opendata->state = state;
@@ -560,12 +669,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
560 return opendata; 669 return opendata;
561} 670}
562 671
563static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res) 672static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
564{ 673{
565 struct nfs4_state *newstate; 674 struct nfs4_state *newstate;
566 int ret; 675 int ret;
567 676
568 opendata->o_arg.open_flags = openflags; 677 opendata->o_arg.open_flags = 0;
678 opendata->o_arg.fmode = fmode;
569 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 679 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
570 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 680 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
571 nfs4_init_opendata_res(opendata); 681 nfs4_init_opendata_res(opendata);
@@ -575,7 +685,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openf
575 newstate = nfs4_opendata_to_nfs4_state(opendata); 685 newstate = nfs4_opendata_to_nfs4_state(opendata);
576 if (IS_ERR(newstate)) 686 if (IS_ERR(newstate))
577 return PTR_ERR(newstate); 687 return PTR_ERR(newstate);
578 nfs4_close_state(&opendata->path, newstate, openflags); 688 nfs4_close_state(&opendata->path, newstate, fmode);
579 *res = newstate; 689 *res = newstate;
580 return 0; 690 return 0;
581} 691}
@@ -631,7 +741,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
631{ 741{
632 struct nfs_delegation *delegation; 742 struct nfs_delegation *delegation;
633 struct nfs4_opendata *opendata; 743 struct nfs4_opendata *opendata;
634 int delegation_type = 0; 744 fmode_t delegation_type = 0;
635 int status; 745 int status;
636 746
637 opendata = nfs4_open_recoverdata_alloc(ctx, state); 747 opendata = nfs4_open_recoverdata_alloc(ctx, state);
@@ -641,7 +751,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
641 opendata->o_arg.fh = NFS_FH(state->inode); 751 opendata->o_arg.fh = NFS_FH(state->inode);
642 rcu_read_lock(); 752 rcu_read_lock();
643 delegation = rcu_dereference(NFS_I(state->inode)->delegation); 753 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
644 if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) 754 if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
645 delegation_type = delegation->type; 755 delegation_type = delegation->type;
646 rcu_read_unlock(); 756 rcu_read_unlock();
647 opendata->o_arg.u.delegation_type = delegation_type; 757 opendata->o_arg.u.delegation_type = delegation_type;
@@ -744,7 +854,7 @@ static void nfs4_open_confirm_release(void *calldata)
744 goto out_free; 854 goto out_free;
745 state = nfs4_opendata_to_nfs4_state(data); 855 state = nfs4_opendata_to_nfs4_state(data);
746 if (!IS_ERR(state)) 856 if (!IS_ERR(state))
747 nfs4_close_state(&data->path, state, data->o_arg.open_flags); 857 nfs4_close_state(&data->path, state, data->o_arg.fmode);
748out_free: 858out_free:
749 nfs4_opendata_put(data); 859 nfs4_opendata_put(data);
750} 860}
@@ -808,12 +918,12 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
808 if (data->state != NULL) { 918 if (data->state != NULL) {
809 struct nfs_delegation *delegation; 919 struct nfs_delegation *delegation;
810 920
811 if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL))) 921 if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
812 goto out_no_action; 922 goto out_no_action;
813 rcu_read_lock(); 923 rcu_read_lock();
814 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); 924 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
815 if (delegation != NULL && 925 if (delegation != NULL &&
816 (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) { 926 test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) {
817 rcu_read_unlock(); 927 rcu_read_unlock();
818 goto out_no_action; 928 goto out_no_action;
819 } 929 }
@@ -877,7 +987,7 @@ static void nfs4_open_release(void *calldata)
877 goto out_free; 987 goto out_free;
878 state = nfs4_opendata_to_nfs4_state(data); 988 state = nfs4_opendata_to_nfs4_state(data);
879 if (!IS_ERR(state)) 989 if (!IS_ERR(state))
880 nfs4_close_state(&data->path, state, data->o_arg.open_flags); 990 nfs4_close_state(&data->path, state, data->o_arg.fmode);
881out_free: 991out_free:
882 nfs4_opendata_put(data); 992 nfs4_opendata_put(data);
883} 993}
@@ -955,10 +1065,11 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
955 int ret; 1065 int ret;
956 1066
957 for (;;) { 1067 for (;;) {
958 ret = nfs4_wait_clnt_recover(server->client, clp); 1068 ret = nfs4_wait_clnt_recover(clp);
959 if (ret != 0) 1069 if (ret != 0)
960 return ret; 1070 return ret;
961 if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1071 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1072 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
962 break; 1073 break;
963 nfs4_schedule_state_recovery(clp); 1074 nfs4_schedule_state_recovery(clp);
964 } 1075 }
@@ -993,8 +1104,9 @@ static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4
993 1104
994 do { 1105 do {
995 err = _nfs4_open_expired(ctx, state); 1106 err = _nfs4_open_expired(ctx, state);
996 if (err == -NFS4ERR_DELAY) 1107 if (err != -NFS4ERR_DELAY)
997 nfs4_handle_exception(server, err, &exception); 1108 break;
1109 nfs4_handle_exception(server, err, &exception);
998 } while (exception.retry); 1110 } while (exception.retry);
999 return err; 1111 return err;
1000} 1112}
@@ -1031,12 +1143,11 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1031/* 1143/*
1032 * Returns a referenced nfs4_state 1144 * Returns a referenced nfs4_state
1033 */ 1145 */
1034static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1146static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
1035{ 1147{
1036 struct nfs4_state_owner *sp; 1148 struct nfs4_state_owner *sp;
1037 struct nfs4_state *state = NULL; 1149 struct nfs4_state *state = NULL;
1038 struct nfs_server *server = NFS_SERVER(dir); 1150 struct nfs_server *server = NFS_SERVER(dir);
1039 struct nfs_client *clp = server->nfs_client;
1040 struct nfs4_opendata *opendata; 1151 struct nfs4_opendata *opendata;
1041 int status; 1152 int status;
1042 1153
@@ -1050,12 +1161,11 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
1050 if (status != 0) 1161 if (status != 0)
1051 goto err_put_state_owner; 1162 goto err_put_state_owner;
1052 if (path->dentry->d_inode != NULL) 1163 if (path->dentry->d_inode != NULL)
1053 nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE)); 1164 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
1054 down_read(&clp->cl_sem);
1055 status = -ENOMEM; 1165 status = -ENOMEM;
1056 opendata = nfs4_opendata_alloc(path, sp, flags, sattr); 1166 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
1057 if (opendata == NULL) 1167 if (opendata == NULL)
1058 goto err_release_rwsem; 1168 goto err_put_state_owner;
1059 1169
1060 if (path->dentry->d_inode != NULL) 1170 if (path->dentry->d_inode != NULL)
1061 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); 1171 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
@@ -1073,13 +1183,10 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct
1073 goto err_opendata_put; 1183 goto err_opendata_put;
1074 nfs4_opendata_put(opendata); 1184 nfs4_opendata_put(opendata);
1075 nfs4_put_state_owner(sp); 1185 nfs4_put_state_owner(sp);
1076 up_read(&clp->cl_sem);
1077 *res = state; 1186 *res = state;
1078 return 0; 1187 return 0;
1079err_opendata_put: 1188err_opendata_put:
1080 nfs4_opendata_put(opendata); 1189 nfs4_opendata_put(opendata);
1081err_release_rwsem:
1082 up_read(&clp->cl_sem);
1083err_put_state_owner: 1190err_put_state_owner:
1084 nfs4_put_state_owner(sp); 1191 nfs4_put_state_owner(sp);
1085out_err: 1192out_err:
@@ -1088,14 +1195,14 @@ out_err:
1088} 1195}
1089 1196
1090 1197
1091static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) 1198static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
1092{ 1199{
1093 struct nfs4_exception exception = { }; 1200 struct nfs4_exception exception = { };
1094 struct nfs4_state *res; 1201 struct nfs4_state *res;
1095 int status; 1202 int status;
1096 1203
1097 do { 1204 do {
1098 status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); 1205 status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res);
1099 if (status == 0) 1206 if (status == 0)
1100 break; 1207 break;
1101 /* NOTE: BAD_SEQID means the server and client disagree about the 1208 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1230,10 +1337,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1230 renew_lease(server, calldata->timestamp); 1337 renew_lease(server, calldata->timestamp);
1231 break; 1338 break;
1232 case -NFS4ERR_STALE_STATEID: 1339 case -NFS4ERR_STALE_STATEID:
1340 case -NFS4ERR_OLD_STATEID:
1341 case -NFS4ERR_BAD_STATEID:
1233 case -NFS4ERR_EXPIRED: 1342 case -NFS4ERR_EXPIRED:
1234 break; 1343 if (calldata->arg.fmode == 0)
1344 break;
1235 default: 1345 default:
1236 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 1346 if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
1237 rpc_restart_call(task); 1347 rpc_restart_call(task);
1238 return; 1348 return;
1239 } 1349 }
@@ -1272,10 +1382,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1272 nfs_fattr_init(calldata->res.fattr); 1382 nfs_fattr_init(calldata->res.fattr);
1273 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { 1383 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
1274 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 1384 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1275 calldata->arg.open_flags = FMODE_READ; 1385 calldata->arg.fmode = FMODE_READ;
1276 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { 1386 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
1277 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 1387 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1278 calldata->arg.open_flags = FMODE_WRITE; 1388 calldata->arg.fmode = FMODE_WRITE;
1279 } 1389 }
1280 calldata->timestamp = jiffies; 1390 calldata->timestamp = jiffies;
1281 rpc_call_start(task); 1391 rpc_call_start(task);
@@ -1328,6 +1438,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1328 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1438 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
1329 if (calldata->arg.seqid == NULL) 1439 if (calldata->arg.seqid == NULL)
1330 goto out_free_calldata; 1440 goto out_free_calldata;
1441 calldata->arg.fmode = 0;
1331 calldata->arg.bitmask = server->attr_bitmask; 1442 calldata->arg.bitmask = server->attr_bitmask;
1332 calldata->res.fattr = &calldata->fattr; 1443 calldata->res.fattr = &calldata->fattr;
1333 calldata->res.seqid = calldata->arg.seqid; 1444 calldata->res.seqid = calldata->arg.seqid;
@@ -1354,13 +1465,13 @@ out:
1354 return status; 1465 return status;
1355} 1466}
1356 1467
1357static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) 1468static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
1358{ 1469{
1359 struct file *filp; 1470 struct file *filp;
1360 int ret; 1471 int ret;
1361 1472
1362 /* If the open_intent is for execute, we have an extra check to make */ 1473 /* If the open_intent is for execute, we have an extra check to make */
1363 if (nd->intent.open.flags & FMODE_EXEC) { 1474 if (fmode & FMODE_EXEC) {
1364 ret = nfs_may_open(state->inode, 1475 ret = nfs_may_open(state->inode,
1365 state->owner->so_cred, 1476 state->owner->so_cred,
1366 nd->intent.open.flags); 1477 nd->intent.open.flags);
@@ -1376,7 +1487,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
1376 } 1487 }
1377 ret = PTR_ERR(filp); 1488 ret = PTR_ERR(filp);
1378out_close: 1489out_close:
1379 nfs4_close_sync(path, state, nd->intent.open.flags); 1490 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
1380 return ret; 1491 return ret;
1381} 1492}
1382 1493
@@ -1392,6 +1503,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1392 struct rpc_cred *cred; 1503 struct rpc_cred *cred;
1393 struct nfs4_state *state; 1504 struct nfs4_state *state;
1394 struct dentry *res; 1505 struct dentry *res;
1506 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1395 1507
1396 if (nd->flags & LOOKUP_CREATE) { 1508 if (nd->flags & LOOKUP_CREATE) {
1397 attr.ia_mode = nd->intent.open.create_mode; 1509 attr.ia_mode = nd->intent.open.create_mode;
@@ -1409,7 +1521,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1409 parent = dentry->d_parent; 1521 parent = dentry->d_parent;
1410 /* Protect against concurrent sillydeletes */ 1522 /* Protect against concurrent sillydeletes */
1411 nfs_block_sillyrename(parent); 1523 nfs_block_sillyrename(parent);
1412 state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); 1524 state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
1413 put_rpccred(cred); 1525 put_rpccred(cred);
1414 if (IS_ERR(state)) { 1526 if (IS_ERR(state)) {
1415 if (PTR_ERR(state) == -ENOENT) { 1527 if (PTR_ERR(state) == -ENOENT) {
@@ -1424,7 +1536,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1424 path.dentry = res; 1536 path.dentry = res;
1425 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); 1537 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
1426 nfs_unblock_sillyrename(parent); 1538 nfs_unblock_sillyrename(parent);
1427 nfs4_intent_set_file(nd, &path, state); 1539 nfs4_intent_set_file(nd, &path, state, fmode);
1428 return res; 1540 return res;
1429} 1541}
1430 1542
@@ -1437,11 +1549,12 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1437 }; 1549 };
1438 struct rpc_cred *cred; 1550 struct rpc_cred *cred;
1439 struct nfs4_state *state; 1551 struct nfs4_state *state;
1552 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
1440 1553
1441 cred = rpc_lookup_cred(); 1554 cred = rpc_lookup_cred();
1442 if (IS_ERR(cred)) 1555 if (IS_ERR(cred))
1443 return PTR_ERR(cred); 1556 return PTR_ERR(cred);
1444 state = nfs4_do_open(dir, &path, openflags, NULL, cred); 1557 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
1445 put_rpccred(cred); 1558 put_rpccred(cred);
1446 if (IS_ERR(state)) { 1559 if (IS_ERR(state)) {
1447 switch (PTR_ERR(state)) { 1560 switch (PTR_ERR(state)) {
@@ -1458,10 +1571,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
1458 } 1571 }
1459 if (state->inode == dentry->d_inode) { 1572 if (state->inode == dentry->d_inode) {
1460 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1573 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1461 nfs4_intent_set_file(nd, &path, state); 1574 nfs4_intent_set_file(nd, &path, state, fmode);
1462 return 1; 1575 return 1;
1463 } 1576 }
1464 nfs4_close_sync(&path, state, openflags); 1577 nfs4_close_sync(&path, state, fmode);
1465out_drop: 1578out_drop:
1466 d_drop(dentry); 1579 d_drop(dentry);
1467 return 0; 1580 return 0;
@@ -1887,6 +2000,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1887 }; 2000 };
1888 struct nfs4_state *state; 2001 struct nfs4_state *state;
1889 struct rpc_cred *cred; 2002 struct rpc_cred *cred;
2003 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
1890 int status = 0; 2004 int status = 0;
1891 2005
1892 cred = rpc_lookup_cred(); 2006 cred = rpc_lookup_cred();
@@ -1894,7 +2008,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1894 status = PTR_ERR(cred); 2008 status = PTR_ERR(cred);
1895 goto out; 2009 goto out;
1896 } 2010 }
1897 state = nfs4_do_open(dir, &path, flags, sattr, cred); 2011 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
1898 d_drop(dentry); 2012 d_drop(dentry);
1899 if (IS_ERR(state)) { 2013 if (IS_ERR(state)) {
1900 status = PTR_ERR(state); 2014 status = PTR_ERR(state);
@@ -1910,9 +2024,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
1910 nfs_post_op_update_inode(state->inode, &fattr); 2024 nfs_post_op_update_inode(state->inode, &fattr);
1911 } 2025 }
1912 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2026 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
1913 status = nfs4_intent_set_file(nd, &path, state); 2027 status = nfs4_intent_set_file(nd, &path, state, fmode);
1914 else 2028 else
1915 nfs4_close_sync(&path, state, flags); 2029 nfs4_close_sync(&path, state, fmode);
1916out_putcred: 2030out_putcred:
1917 put_rpccred(cred); 2031 put_rpccred(cred);
1918out: 2032out:
@@ -1974,7 +2088,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
1974{ 2088{
1975 struct nfs_removeres *res = task->tk_msg.rpc_resp; 2089 struct nfs_removeres *res = task->tk_msg.rpc_resp;
1976 2090
1977 if (nfs4_async_handle_error(task, res->server) == -EAGAIN) 2091 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
1978 return 0; 2092 return 0;
1979 update_changeattr(dir, &res->cinfo); 2093 update_changeattr(dir, &res->cinfo);
1980 nfs_post_op_update_inode(dir, &res->dir_attr); 2094 nfs_post_op_update_inode(dir, &res->dir_attr);
@@ -2402,7 +2516,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
2402{ 2516{
2403 struct nfs_server *server = NFS_SERVER(data->inode); 2517 struct nfs_server *server = NFS_SERVER(data->inode);
2404 2518
2405 if (nfs4_async_handle_error(task, server) == -EAGAIN) { 2519 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
2406 rpc_restart_call(task); 2520 rpc_restart_call(task);
2407 return -EAGAIN; 2521 return -EAGAIN;
2408 } 2522 }
@@ -2423,7 +2537,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
2423{ 2537{
2424 struct inode *inode = data->inode; 2538 struct inode *inode = data->inode;
2425 2539
2426 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2540 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
2427 rpc_restart_call(task); 2541 rpc_restart_call(task);
2428 return -EAGAIN; 2542 return -EAGAIN;
2429 } 2543 }
@@ -2449,7 +2563,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
2449{ 2563{
2450 struct inode *inode = data->inode; 2564 struct inode *inode = data->inode;
2451 2565
2452 if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { 2566 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
2453 rpc_restart_call(task); 2567 rpc_restart_call(task);
2454 return -EAGAIN; 2568 return -EAGAIN;
2455 } 2569 }
@@ -2742,19 +2856,25 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2742} 2856}
2743 2857
2744static int 2858static int
2745nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) 2859nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
2746{ 2860{
2747 struct nfs_client *clp = server->nfs_client; 2861 struct nfs_client *clp = server->nfs_client;
2748 2862
2749 if (!clp || task->tk_status >= 0) 2863 if (!clp || task->tk_status >= 0)
2750 return 0; 2864 return 0;
2751 switch(task->tk_status) { 2865 switch(task->tk_status) {
2866 case -NFS4ERR_ADMIN_REVOKED:
2867 case -NFS4ERR_BAD_STATEID:
2868 case -NFS4ERR_OPENMODE:
2869 if (state == NULL)
2870 break;
2871 nfs4_state_mark_reclaim_nograce(clp, state);
2752 case -NFS4ERR_STALE_CLIENTID: 2872 case -NFS4ERR_STALE_CLIENTID:
2753 case -NFS4ERR_STALE_STATEID: 2873 case -NFS4ERR_STALE_STATEID:
2754 case -NFS4ERR_EXPIRED: 2874 case -NFS4ERR_EXPIRED:
2755 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 2875 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
2756 nfs4_schedule_state_recovery(clp); 2876 nfs4_schedule_state_recovery(clp);
2757 if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) 2877 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
2758 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 2878 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
2759 task->tk_status = 0; 2879 task->tk_status = 0;
2760 return -EAGAIN; 2880 return -EAGAIN;
@@ -2772,79 +2892,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2772 return 0; 2892 return 0;
2773} 2893}
2774 2894
2775static int nfs4_wait_bit_killable(void *word)
2776{
2777 if (fatal_signal_pending(current))
2778 return -ERESTARTSYS;
2779 schedule();
2780 return 0;
2781}
2782
2783static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
2784{
2785 int res;
2786
2787 might_sleep();
2788
2789 rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
2790
2791 res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
2792 nfs4_wait_bit_killable, TASK_KILLABLE);
2793
2794 rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
2795 return res;
2796}
2797
2798static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2799{
2800 int res = 0;
2801
2802 might_sleep();
2803
2804 if (*timeout <= 0)
2805 *timeout = NFS4_POLL_RETRY_MIN;
2806 if (*timeout > NFS4_POLL_RETRY_MAX)
2807 *timeout = NFS4_POLL_RETRY_MAX;
2808 schedule_timeout_killable(*timeout);
2809 if (fatal_signal_pending(current))
2810 res = -ERESTARTSYS;
2811 *timeout <<= 1;
2812 return res;
2813}
2814
2815/* This is the error handling routine for processes that are allowed
2816 * to sleep.
2817 */
2818static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2819{
2820 struct nfs_client *clp = server->nfs_client;
2821 int ret = errorcode;
2822
2823 exception->retry = 0;
2824 switch(errorcode) {
2825 case 0:
2826 return 0;
2827 case -NFS4ERR_STALE_CLIENTID:
2828 case -NFS4ERR_STALE_STATEID:
2829 case -NFS4ERR_EXPIRED:
2830 nfs4_schedule_state_recovery(clp);
2831 ret = nfs4_wait_clnt_recover(server->client, clp);
2832 if (ret == 0)
2833 exception->retry = 1;
2834 break;
2835 case -NFS4ERR_FILE_OPEN:
2836 case -NFS4ERR_GRACE:
2837 case -NFS4ERR_DELAY:
2838 ret = nfs4_delay(server->client, &exception->timeout);
2839 if (ret != 0)
2840 break;
2841 case -NFS4ERR_OLD_STATEID:
2842 exception->retry = 1;
2843 }
2844 /* We failed to handle the error */
2845 return nfs4_map_errors(ret);
2846}
2847
2848int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 2895int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
2849{ 2896{
2850 nfs4_verifier sc_verifier; 2897 nfs4_verifier sc_verifier;
@@ -2916,7 +2963,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
2916 spin_lock(&clp->cl_lock); 2963 spin_lock(&clp->cl_lock);
2917 clp->cl_lease_time = fsinfo.lease_time * HZ; 2964 clp->cl_lease_time = fsinfo.lease_time * HZ;
2918 clp->cl_last_renewal = now; 2965 clp->cl_last_renewal = now;
2919 clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
2920 spin_unlock(&clp->cl_lock); 2966 spin_unlock(&clp->cl_lock);
2921 } 2967 }
2922 return status; 2968 return status;
@@ -3074,7 +3120,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3074 struct nfs4_lock_state *lsp; 3120 struct nfs4_lock_state *lsp;
3075 int status; 3121 int status;
3076 3122
3077 down_read(&clp->cl_sem);
3078 arg.lock_owner.clientid = clp->cl_clientid; 3123 arg.lock_owner.clientid = clp->cl_clientid;
3079 status = nfs4_set_lock_state(state, request); 3124 status = nfs4_set_lock_state(state, request);
3080 if (status != 0) 3125 if (status != 0)
@@ -3091,7 +3136,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3091 } 3136 }
3092 request->fl_ops->fl_release_private(request); 3137 request->fl_ops->fl_release_private(request);
3093out: 3138out:
3094 up_read(&clp->cl_sem);
3095 return status; 3139 return status;
3096} 3140}
3097 3141
@@ -3181,11 +3225,13 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
3181 sizeof(calldata->lsp->ls_stateid.data)); 3225 sizeof(calldata->lsp->ls_stateid.data));
3182 renew_lease(calldata->server, calldata->timestamp); 3226 renew_lease(calldata->server, calldata->timestamp);
3183 break; 3227 break;
3228 case -NFS4ERR_BAD_STATEID:
3229 case -NFS4ERR_OLD_STATEID:
3184 case -NFS4ERR_STALE_STATEID: 3230 case -NFS4ERR_STALE_STATEID:
3185 case -NFS4ERR_EXPIRED: 3231 case -NFS4ERR_EXPIRED:
3186 break; 3232 break;
3187 default: 3233 default:
3188 if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) 3234 if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
3189 rpc_restart_call(task); 3235 rpc_restart_call(task);
3190 } 3236 }
3191} 3237}
@@ -3248,6 +3294,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3248 3294
3249static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 3295static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
3250{ 3296{
3297 struct nfs_inode *nfsi = NFS_I(state->inode);
3251 struct nfs_seqid *seqid; 3298 struct nfs_seqid *seqid;
3252 struct nfs4_lock_state *lsp; 3299 struct nfs4_lock_state *lsp;
3253 struct rpc_task *task; 3300 struct rpc_task *task;
@@ -3257,8 +3304,12 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3257 status = nfs4_set_lock_state(state, request); 3304 status = nfs4_set_lock_state(state, request);
3258 /* Unlock _before_ we do the RPC call */ 3305 /* Unlock _before_ we do the RPC call */
3259 request->fl_flags |= FL_EXISTS; 3306 request->fl_flags |= FL_EXISTS;
3260 if (do_vfs_lock(request->fl_file, request) == -ENOENT) 3307 down_read(&nfsi->rwsem);
3308 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
3309 up_read(&nfsi->rwsem);
3261 goto out; 3310 goto out;
3311 }
3312 up_read(&nfsi->rwsem);
3262 if (status != 0) 3313 if (status != 0)
3263 goto out; 3314 goto out;
3264 /* Is this a delegated lock? */ 3315 /* Is this a delegated lock? */
@@ -3484,7 +3535,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3484 3535
3485static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 3536static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
3486{ 3537{
3487 struct nfs_client *clp = state->owner->so_client; 3538 struct nfs_inode *nfsi = NFS_I(state->inode);
3488 unsigned char fl_flags = request->fl_flags; 3539 unsigned char fl_flags = request->fl_flags;
3489 int status; 3540 int status;
3490 3541
@@ -3496,19 +3547,13 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
3496 status = do_vfs_lock(request->fl_file, request); 3547 status = do_vfs_lock(request->fl_file, request);
3497 if (status < 0) 3548 if (status < 0)
3498 goto out; 3549 goto out;
3499 down_read(&clp->cl_sem); 3550 down_read(&nfsi->rwsem);
3500 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 3551 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
3501 struct nfs_inode *nfsi = NFS_I(state->inode);
3502 /* Yes: cache locks! */ 3552 /* Yes: cache locks! */
3503 down_read(&nfsi->rwsem);
3504 /* ...but avoid races with delegation recall... */ 3553 /* ...but avoid races with delegation recall... */
3505 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { 3554 request->fl_flags = fl_flags & ~FL_SLEEP;
3506 request->fl_flags = fl_flags & ~FL_SLEEP; 3555 status = do_vfs_lock(request->fl_file, request);
3507 status = do_vfs_lock(request->fl_file, request); 3556 goto out_unlock;
3508 up_read(&nfsi->rwsem);
3509 goto out_unlock;
3510 }
3511 up_read(&nfsi->rwsem);
3512 } 3557 }
3513 status = _nfs4_do_setlk(state, cmd, request, 0); 3558 status = _nfs4_do_setlk(state, cmd, request, 0);
3514 if (status != 0) 3559 if (status != 0)
@@ -3518,7 +3563,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
3518 if (do_vfs_lock(request->fl_file, request) < 0) 3563 if (do_vfs_lock(request->fl_file, request) < 0)
3519 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); 3564 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
3520out_unlock: 3565out_unlock:
3521 up_read(&clp->cl_sem); 3566 up_read(&nfsi->rwsem);
3522out: 3567out:
3523 request->fl_flags = fl_flags; 3568 request->fl_flags = fl_flags;
3524 return status; 3569 return status;
@@ -3664,11 +3709,15 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
3664} 3709}
3665 3710
3666struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { 3711struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = {
3712 .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT,
3713 .state_flag_bit = NFS_STATE_RECLAIM_REBOOT,
3667 .recover_open = nfs4_open_reclaim, 3714 .recover_open = nfs4_open_reclaim,
3668 .recover_lock = nfs4_lock_reclaim, 3715 .recover_lock = nfs4_lock_reclaim,
3669}; 3716};
3670 3717
3671struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { 3718struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = {
3719 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
3720 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
3672 .recover_open = nfs4_open_expired, 3721 .recover_open = nfs4_open_expired,
3673 .recover_lock = nfs4_lock_expired, 3722 .recover_lock = nfs4_lock_expired,
3674}; 3723};
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 3305acbbe2ae..f524e932ff7b 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -65,7 +65,6 @@ nfs4_renew_state(struct work_struct *work)
65 long lease, timeout; 65 long lease, timeout;
66 unsigned long last, now; 66 unsigned long last, now;
67 67
68 down_read(&clp->cl_sem);
69 dprintk("%s: start\n", __func__); 68 dprintk("%s: start\n", __func__);
70 /* Are there any active superblocks? */ 69 /* Are there any active superblocks? */
71 if (list_empty(&clp->cl_superblocks)) 70 if (list_empty(&clp->cl_superblocks))
@@ -77,17 +76,19 @@ nfs4_renew_state(struct work_struct *work)
77 timeout = (2 * lease) / 3 + (long)last - (long)now; 76 timeout = (2 * lease) / 3 + (long)last - (long)now;
78 /* Are we close to a lease timeout? */ 77 /* Are we close to a lease timeout? */
79 if (time_after(now, last + lease/3)) { 78 if (time_after(now, last + lease/3)) {
80 cred = nfs4_get_renew_cred(clp); 79 cred = nfs4_get_renew_cred_locked(clp);
80 spin_unlock(&clp->cl_lock);
81 if (cred == NULL) { 81 if (cred == NULL) {
82 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 82 if (list_empty(&clp->cl_delegations)) {
83 spin_unlock(&clp->cl_lock); 83 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
84 goto out;
85 }
84 nfs_expire_all_delegations(clp); 86 nfs_expire_all_delegations(clp);
85 goto out; 87 } else {
88 /* Queue an asynchronous RENEW. */
89 nfs4_proc_async_renew(clp, cred);
90 put_rpccred(cred);
86 } 91 }
87 spin_unlock(&clp->cl_lock);
88 /* Queue an asynchronous RENEW. */
89 nfs4_proc_async_renew(clp, cred);
90 put_rpccred(cred);
91 timeout = (2 * lease) / 3; 92 timeout = (2 * lease) / 3;
92 spin_lock(&clp->cl_lock); 93 spin_lock(&clp->cl_lock);
93 } else 94 } else
@@ -100,12 +101,11 @@ nfs4_renew_state(struct work_struct *work)
100 cancel_delayed_work(&clp->cl_renewd); 101 cancel_delayed_work(&clp->cl_renewd);
101 schedule_delayed_work(&clp->cl_renewd, timeout); 102 schedule_delayed_work(&clp->cl_renewd, timeout);
102 spin_unlock(&clp->cl_lock); 103 spin_unlock(&clp->cl_lock);
104 nfs_expire_unreferenced_delegations(clp);
103out: 105out:
104 up_read(&clp->cl_sem);
105 dprintk("%s: done\n", __func__); 106 dprintk("%s: done\n", __func__);
106} 107}
107 108
108/* Must be called with clp->cl_sem locked for writes */
109void 109void
110nfs4_schedule_state_renewal(struct nfs_client *clp) 110nfs4_schedule_state_renewal(struct nfs_client *clp)
111{ 111{
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 401ef8b28f97..2022fe47966f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,14 +71,12 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
71 return status; 71 return status;
72} 72}
73 73
74static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp) 74static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
75{ 75{
76 struct rpc_cred *cred = NULL; 76 struct rpc_cred *cred = NULL;
77 77
78 spin_lock(&clp->cl_lock);
79 if (clp->cl_machine_cred != NULL) 78 if (clp->cl_machine_cred != NULL)
80 cred = get_rpccred(clp->cl_machine_cred); 79 cred = get_rpccred(clp->cl_machine_cred);
81 spin_unlock(&clp->cl_lock);
82 return cred; 80 return cred;
83} 81}
84 82
@@ -94,7 +92,7 @@ static void nfs4_clear_machine_cred(struct nfs_client *clp)
94 put_rpccred(cred); 92 put_rpccred(cred);
95} 93}
96 94
97struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) 95struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
98{ 96{
99 struct nfs4_state_owner *sp; 97 struct nfs4_state_owner *sp;
100 struct rb_node *pos; 98 struct rb_node *pos;
@@ -110,13 +108,24 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
110 return cred; 108 return cred;
111} 109}
112 110
111static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
112{
113 struct rpc_cred *cred;
114
115 spin_lock(&clp->cl_lock);
116 cred = nfs4_get_renew_cred_locked(clp);
117 spin_unlock(&clp->cl_lock);
118 return cred;
119}
120
113static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 121static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
114{ 122{
115 struct nfs4_state_owner *sp; 123 struct nfs4_state_owner *sp;
116 struct rb_node *pos; 124 struct rb_node *pos;
117 struct rpc_cred *cred; 125 struct rpc_cred *cred;
118 126
119 cred = nfs4_get_machine_cred(clp); 127 spin_lock(&clp->cl_lock);
128 cred = nfs4_get_machine_cred_locked(clp);
120 if (cred != NULL) 129 if (cred != NULL)
121 goto out; 130 goto out;
122 pos = rb_first(&clp->cl_state_owners); 131 pos = rb_first(&clp->cl_state_owners);
@@ -125,6 +134,7 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
125 cred = get_rpccred(sp->so_cred); 134 cred = get_rpccred(sp->so_cred);
126 } 135 }
127out: 136out:
137 spin_unlock(&clp->cl_lock);
128 return cred; 138 return cred;
129} 139}
130 140
@@ -295,10 +305,6 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
295 } 305 }
296} 306}
297 307
298/*
299 * Note: must be called with clp->cl_sem held in order to prevent races
300 * with reboot recovery!
301 */
302struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) 308struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
303{ 309{
304 struct nfs_client *clp = server->nfs_client; 310 struct nfs_client *clp = server->nfs_client;
@@ -327,10 +333,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
327 return sp; 333 return sp;
328} 334}
329 335
330/*
331 * Must be called with clp->cl_sem held in order to avoid races
332 * with state recovery...
333 */
334void nfs4_put_state_owner(struct nfs4_state_owner *sp) 336void nfs4_put_state_owner(struct nfs4_state_owner *sp)
335{ 337{
336 struct nfs_client *clp = sp->so_client; 338 struct nfs_client *clp = sp->so_client;
@@ -361,18 +363,18 @@ nfs4_alloc_open_state(void)
361} 363}
362 364
363void 365void
364nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) 366nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
365{ 367{
366 if (state->state == mode) 368 if (state->state == fmode)
367 return; 369 return;
368 /* NB! List reordering - see the reclaim code for why. */ 370 /* NB! List reordering - see the reclaim code for why. */
369 if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) { 371 if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
370 if (mode & FMODE_WRITE) 372 if (fmode & FMODE_WRITE)
371 list_move(&state->open_states, &state->owner->so_states); 373 list_move(&state->open_states, &state->owner->so_states);
372 else 374 else
373 list_move_tail(&state->open_states, &state->owner->so_states); 375 list_move_tail(&state->open_states, &state->owner->so_states);
374 } 376 }
375 state->state = mode; 377 state->state = fmode;
376} 378}
377 379
378static struct nfs4_state * 380static struct nfs4_state *
@@ -432,10 +434,6 @@ out:
432 return state; 434 return state;
433} 435}
434 436
435/*
436 * Beware! Caller must be holding exactly one
437 * reference to clp->cl_sem!
438 */
439void nfs4_put_open_state(struct nfs4_state *state) 437void nfs4_put_open_state(struct nfs4_state *state)
440{ 438{
441 struct inode *inode = state->inode; 439 struct inode *inode = state->inode;
@@ -456,16 +454,16 @@ void nfs4_put_open_state(struct nfs4_state *state)
456/* 454/*
457 * Close the current file. 455 * Close the current file.
458 */ 456 */
459static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait) 457static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
460{ 458{
461 struct nfs4_state_owner *owner = state->owner; 459 struct nfs4_state_owner *owner = state->owner;
462 int call_close = 0; 460 int call_close = 0;
463 int newstate; 461 fmode_t newstate;
464 462
465 atomic_inc(&owner->so_count); 463 atomic_inc(&owner->so_count);
466 /* Protect against nfs4_find_state() */ 464 /* Protect against nfs4_find_state() */
467 spin_lock(&owner->so_lock); 465 spin_lock(&owner->so_lock);
468 switch (mode & (FMODE_READ | FMODE_WRITE)) { 466 switch (fmode & (FMODE_READ | FMODE_WRITE)) {
469 case FMODE_READ: 467 case FMODE_READ:
470 state->n_rdonly--; 468 state->n_rdonly--;
471 break; 469 break;
@@ -500,14 +498,14 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mod
500 nfs4_do_close(path, state, wait); 498 nfs4_do_close(path, state, wait);
501} 499}
502 500
503void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) 501void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
504{ 502{
505 __nfs4_close(path, state, mode, 0); 503 __nfs4_close(path, state, fmode, 0);
506} 504}
507 505
508void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode) 506void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
509{ 507{
510 __nfs4_close(path, state, mode, 1); 508 __nfs4_close(path, state, fmode, 1);
511} 509}
512 510
513/* 511/*
@@ -568,7 +566,6 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
568 * Return a compatible lock_state. If no initialized lock_state structure 566 * Return a compatible lock_state. If no initialized lock_state structure
569 * exists, return an uninitialized one. 567 * exists, return an uninitialized one.
570 * 568 *
571 * The caller must be holding clp->cl_sem
572 */ 569 */
573static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) 570static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
574{ 571{
@@ -770,32 +767,34 @@ unlock:
770 return status; 767 return status;
771} 768}
772 769
773static int reclaimer(void *); 770static int nfs4_run_state_manager(void *);
774 771
775static inline void nfs4_clear_recover_bit(struct nfs_client *clp) 772static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
776{ 773{
777 smp_mb__before_clear_bit(); 774 smp_mb__before_clear_bit();
778 clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state); 775 clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
779 smp_mb__after_clear_bit(); 776 smp_mb__after_clear_bit();
780 wake_up_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER); 777 wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
781 rpc_wake_up(&clp->cl_rpcwaitq); 778 rpc_wake_up(&clp->cl_rpcwaitq);
782} 779}
783 780
784/* 781/*
785 * State recovery routine 782 * Schedule the nfs_client asynchronous state management routine
786 */ 783 */
787static void nfs4_recover_state(struct nfs_client *clp) 784void nfs4_schedule_state_manager(struct nfs_client *clp)
788{ 785{
789 struct task_struct *task; 786 struct task_struct *task;
790 787
788 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
789 return;
791 __module_get(THIS_MODULE); 790 __module_get(THIS_MODULE);
792 atomic_inc(&clp->cl_count); 791 atomic_inc(&clp->cl_count);
793 task = kthread_run(reclaimer, clp, "%s-reclaim", 792 task = kthread_run(nfs4_run_state_manager, clp, "%s-manager",
794 rpc_peeraddr2str(clp->cl_rpcclient, 793 rpc_peeraddr2str(clp->cl_rpcclient,
795 RPC_DISPLAY_ADDR)); 794 RPC_DISPLAY_ADDR));
796 if (!IS_ERR(task)) 795 if (!IS_ERR(task))
797 return; 796 return;
798 nfs4_clear_recover_bit(clp); 797 nfs4_clear_state_manager_bit(clp);
799 nfs_put_client(clp); 798 nfs_put_client(clp);
800 module_put(THIS_MODULE); 799 module_put(THIS_MODULE);
801} 800}
@@ -807,16 +806,42 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
807{ 806{
808 if (!clp) 807 if (!clp)
809 return; 808 return;
810 if (test_and_set_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) 809 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
811 nfs4_recover_state(clp); 810 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
811 nfs4_schedule_state_manager(clp);
812} 812}
813 813
814static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) 814static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
815{
816
817 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
818 /* Don't recover state that expired before the reboot */
819 if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
820 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
821 return 0;
822 }
823 set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
824 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
825 return 1;
826}
827
828int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
829{
830 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
831 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
832 set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
833 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
834 return 1;
835}
836
837static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
815{ 838{
816 struct inode *inode = state->inode; 839 struct inode *inode = state->inode;
840 struct nfs_inode *nfsi = NFS_I(inode);
817 struct file_lock *fl; 841 struct file_lock *fl;
818 int status = 0; 842 int status = 0;
819 843
844 down_write(&nfsi->rwsem);
820 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 845 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
821 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) 846 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
822 continue; 847 continue;
@@ -839,12 +864,14 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s
839 goto out_err; 864 goto out_err;
840 } 865 }
841 } 866 }
867 up_write(&nfsi->rwsem);
842 return 0; 868 return 0;
843out_err: 869out_err:
870 up_write(&nfsi->rwsem);
844 return status; 871 return status;
845} 872}
846 873
847static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) 874static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
848{ 875{
849 struct nfs4_state *state; 876 struct nfs4_state *state;
850 struct nfs4_lock_state *lock; 877 struct nfs4_lock_state *lock;
@@ -858,28 +885,34 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
858 * recovering after a network partition or a reboot from a 885 * recovering after a network partition or a reboot from a
859 * server that doesn't support a grace period. 886 * server that doesn't support a grace period.
860 */ 887 */
888restart:
889 spin_lock(&sp->so_lock);
861 list_for_each_entry(state, &sp->so_states, open_states) { 890 list_for_each_entry(state, &sp->so_states, open_states) {
891 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
892 continue;
862 if (state->state == 0) 893 if (state->state == 0)
863 continue; 894 continue;
895 atomic_inc(&state->count);
896 spin_unlock(&sp->so_lock);
864 status = ops->recover_open(sp, state); 897 status = ops->recover_open(sp, state);
865 if (status >= 0) { 898 if (status >= 0) {
866 status = nfs4_reclaim_locks(ops, state); 899 status = nfs4_reclaim_locks(state, ops);
867 if (status < 0) 900 if (status >= 0) {
868 goto out_err; 901 list_for_each_entry(lock, &state->lock_states, ls_locks) {
869 list_for_each_entry(lock, &state->lock_states, ls_locks) { 902 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
870 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 903 printk("%s: Lock reclaim failed!\n",
871 printk("%s: Lock reclaim failed!\n",
872 __func__); 904 __func__);
905 }
906 nfs4_put_open_state(state);
907 goto restart;
873 } 908 }
874 continue;
875 } 909 }
876 switch (status) { 910 switch (status) {
877 default: 911 default:
878 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", 912 printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n",
879 __func__, status); 913 __func__, status);
880 case -ENOENT: 914 case -ENOENT:
881 case -NFS4ERR_RECLAIM_BAD: 915 case -ESTALE:
882 case -NFS4ERR_RECLAIM_CONFLICT:
883 /* 916 /*
884 * Open state on this file cannot be recovered 917 * Open state on this file cannot be recovered
885 * All we can do is revert to using the zero stateid. 918 * All we can do is revert to using the zero stateid.
@@ -889,84 +922,176 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
889 /* Mark the file as being 'closed' */ 922 /* Mark the file as being 'closed' */
890 state->state = 0; 923 state->state = 0;
891 break; 924 break;
925 case -NFS4ERR_RECLAIM_BAD:
926 case -NFS4ERR_RECLAIM_CONFLICT:
927 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
928 break;
892 case -NFS4ERR_EXPIRED: 929 case -NFS4ERR_EXPIRED:
893 case -NFS4ERR_NO_GRACE: 930 case -NFS4ERR_NO_GRACE:
931 nfs4_state_mark_reclaim_nograce(sp->so_client, state);
894 case -NFS4ERR_STALE_CLIENTID: 932 case -NFS4ERR_STALE_CLIENTID:
895 goto out_err; 933 goto out_err;
896 } 934 }
935 nfs4_put_open_state(state);
936 goto restart;
897 } 937 }
938 spin_unlock(&sp->so_lock);
898 return 0; 939 return 0;
899out_err: 940out_err:
941 nfs4_put_open_state(state);
900 return status; 942 return status;
901} 943}
902 944
903static void nfs4_state_mark_reclaim(struct nfs_client *clp) 945static void nfs4_clear_open_state(struct nfs4_state *state)
946{
947 struct nfs4_lock_state *lock;
948
949 clear_bit(NFS_DELEGATED_STATE, &state->flags);
950 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
951 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
952 clear_bit(NFS_O_RDWR_STATE, &state->flags);
953 list_for_each_entry(lock, &state->lock_states, ls_locks) {
954 lock->ls_seqid.flags = 0;
955 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
956 }
957}
958
959static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
904{ 960{
905 struct nfs4_state_owner *sp; 961 struct nfs4_state_owner *sp;
906 struct rb_node *pos; 962 struct rb_node *pos;
907 struct nfs4_state *state; 963 struct nfs4_state *state;
908 struct nfs4_lock_state *lock;
909 964
910 /* Reset all sequence ids to zero */ 965 /* Reset all sequence ids to zero */
911 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 966 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
912 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 967 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
913 sp->so_seqid.counter = 0;
914 sp->so_seqid.flags = 0; 968 sp->so_seqid.flags = 0;
915 spin_lock(&sp->so_lock); 969 spin_lock(&sp->so_lock);
916 list_for_each_entry(state, &sp->so_states, open_states) { 970 list_for_each_entry(state, &sp->so_states, open_states) {
917 clear_bit(NFS_DELEGATED_STATE, &state->flags); 971 if (mark_reclaim(clp, state))
918 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 972 nfs4_clear_open_state(state);
919 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
920 clear_bit(NFS_O_RDWR_STATE, &state->flags);
921 list_for_each_entry(lock, &state->lock_states, ls_locks) {
922 lock->ls_seqid.counter = 0;
923 lock->ls_seqid.flags = 0;
924 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
925 }
926 } 973 }
927 spin_unlock(&sp->so_lock); 974 spin_unlock(&sp->so_lock);
928 } 975 }
929} 976}
930 977
931static int reclaimer(void *ptr) 978static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
979{
980 /* Mark all delegations for reclaim */
981 nfs_delegation_mark_reclaim(clp);
982 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
983}
984
985static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
932{ 986{
933 struct nfs_client *clp = ptr;
934 struct nfs4_state_owner *sp; 987 struct nfs4_state_owner *sp;
935 struct rb_node *pos; 988 struct rb_node *pos;
936 struct nfs4_state_recovery_ops *ops; 989 struct nfs4_state *state;
937 struct rpc_cred *cred; 990
991 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
992 return;
993
994 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
995 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
996 spin_lock(&sp->so_lock);
997 list_for_each_entry(state, &sp->so_states, open_states) {
998 if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags))
999 continue;
1000 nfs4_state_mark_reclaim_nograce(clp, state);
1001 }
1002 spin_unlock(&sp->so_lock);
1003 }
1004
1005 nfs_delegation_reap_unclaimed(clp);
1006}
1007
1008static void nfs_delegation_clear_all(struct nfs_client *clp)
1009{
1010 nfs_delegation_mark_reclaim(clp);
1011 nfs_delegation_reap_unclaimed(clp);
1012}
1013
1014static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1015{
1016 nfs_delegation_clear_all(clp);
1017 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1018}
1019
1020static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp)
1021{
1022 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1023}
1024
1025static void nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1026{
1027 switch (error) {
1028 case -NFS4ERR_CB_PATH_DOWN:
1029 nfs_handle_cb_pathdown(clp);
1030 break;
1031 case -NFS4ERR_STALE_CLIENTID:
1032 case -NFS4ERR_LEASE_MOVED:
1033 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1034 nfs4_state_start_reclaim_reboot(clp);
1035 break;
1036 case -NFS4ERR_EXPIRED:
1037 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1038 nfs4_state_start_reclaim_nograce(clp);
1039 }
1040}
1041
1042static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1043{
1044 struct rb_node *pos;
938 int status = 0; 1045 int status = 0;
939 1046
940 allow_signal(SIGKILL); 1047restart:
1048 spin_lock(&clp->cl_lock);
1049 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1050 struct nfs4_state_owner *sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
1051 if (!test_and_clear_bit(ops->owner_flag_bit, &sp->so_flags))
1052 continue;
1053 atomic_inc(&sp->so_count);
1054 spin_unlock(&clp->cl_lock);
1055 status = nfs4_reclaim_open_state(sp, ops);
1056 if (status < 0) {
1057 set_bit(ops->owner_flag_bit, &sp->so_flags);
1058 nfs4_put_state_owner(sp);
1059 nfs4_recovery_handle_error(clp, status);
1060 return status;
1061 }
1062 nfs4_put_state_owner(sp);
1063 goto restart;
1064 }
1065 spin_unlock(&clp->cl_lock);
1066 return status;
1067}
941 1068
942 /* Ensure exclusive access to NFSv4 state */ 1069static int nfs4_check_lease(struct nfs_client *clp)
943 down_write(&clp->cl_sem); 1070{
944 /* Are there any NFS mounts out there? */ 1071 struct rpc_cred *cred;
945 if (list_empty(&clp->cl_superblocks)) 1072 int status = -NFS4ERR_EXPIRED;
946 goto out; 1073
947restart_loop: 1074 /* Is the client already known to have an expired lease? */
948 ops = &nfs4_network_partition_recovery_ops; 1075 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
949 /* Are there any open files on this volume? */ 1076 return 0;
950 cred = nfs4_get_renew_cred(clp); 1077 cred = nfs4_get_renew_cred(clp);
951 if (cred != NULL) { 1078 if (cred == NULL) {
952 /* Yes there are: try to renew the old lease */ 1079 cred = nfs4_get_setclientid_cred(clp);
953 status = nfs4_proc_renew(clp, cred); 1080 if (cred == NULL)
954 put_rpccred(cred); 1081 goto out;
955 switch (status) {
956 case 0:
957 case -NFS4ERR_CB_PATH_DOWN:
958 goto out;
959 case -NFS4ERR_STALE_CLIENTID:
960 case -NFS4ERR_LEASE_MOVED:
961 ops = &nfs4_reboot_recovery_ops;
962 }
963 } else {
964 /* "reboot" to ensure we clear all state on the server */
965 clp->cl_boot_time = CURRENT_TIME;
966 } 1082 }
967 /* We're going to have to re-establish a clientid */ 1083 status = nfs4_proc_renew(clp, cred);
968 nfs4_state_mark_reclaim(clp); 1084 put_rpccred(cred);
969 status = -ENOENT; 1085out:
1086 nfs4_recovery_handle_error(clp, status);
1087 return status;
1088}
1089
1090static int nfs4_reclaim_lease(struct nfs_client *clp)
1091{
1092 struct rpc_cred *cred;
1093 int status = -ENOENT;
1094
970 cred = nfs4_get_setclientid_cred(clp); 1095 cred = nfs4_get_setclientid_cred(clp);
971 if (cred != NULL) { 1096 if (cred != NULL) {
972 status = nfs4_init_client(clp, cred); 1097 status = nfs4_init_client(clp, cred);
@@ -974,42 +1099,90 @@ restart_loop:
974 /* Handle case where the user hasn't set up machine creds */ 1099 /* Handle case where the user hasn't set up machine creds */
975 if (status == -EACCES && cred == clp->cl_machine_cred) { 1100 if (status == -EACCES && cred == clp->cl_machine_cred) {
976 nfs4_clear_machine_cred(clp); 1101 nfs4_clear_machine_cred(clp);
977 goto restart_loop; 1102 status = -EAGAIN;
978 } 1103 }
979 } 1104 }
980 if (status) 1105 return status;
981 goto out_error; 1106}
982 /* Mark all delegations for reclaim */ 1107
983 nfs_delegation_mark_reclaim(clp); 1108static void nfs4_state_manager(struct nfs_client *clp)
984 /* Note: list is protected by exclusive lock on cl->cl_sem */ 1109{
985 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1110 int status = 0;
986 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1111
987 status = nfs4_reclaim_open_state(ops, sp); 1112 /* Ensure exclusive access to NFSv4 state */
988 if (status < 0) { 1113 for(;;) {
989 if (status == -NFS4ERR_NO_GRACE) { 1114 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
990 ops = &nfs4_network_partition_recovery_ops; 1115 /* We're going to have to re-establish a clientid */
991 status = nfs4_reclaim_open_state(ops, sp); 1116 status = nfs4_reclaim_lease(clp);
1117 if (status) {
1118 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1119 if (status == -EAGAIN)
1120 continue;
1121 goto out_error;
992 } 1122 }
1123 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1124 }
1125
1126 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1127 status = nfs4_check_lease(clp);
1128 if (status != 0)
1129 continue;
1130 }
1131
1132 /* First recover reboot state... */
1133 if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
1134 status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops);
993 if (status == -NFS4ERR_STALE_CLIENTID) 1135 if (status == -NFS4ERR_STALE_CLIENTID)
994 goto restart_loop; 1136 continue;
995 if (status == -NFS4ERR_EXPIRED) 1137 nfs4_state_end_reclaim_reboot(clp);
996 goto restart_loop; 1138 continue;
1139 }
1140
1141 /* Now recover expired state... */
1142 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
1143 status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops);
1144 if (status < 0) {
1145 set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1146 if (status == -NFS4ERR_STALE_CLIENTID)
1147 continue;
1148 if (status == -NFS4ERR_EXPIRED)
1149 continue;
1150 goto out_error;
1151 } else
1152 nfs4_state_end_reclaim_nograce(clp);
1153 continue;
997 } 1154 }
1155
1156 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
1157 nfs_client_return_marked_delegations(clp);
1158 continue;
1159 }
1160
1161 nfs4_clear_state_manager_bit(clp);
1162 /* Did we race with an attempt to give us more work? */
1163 if (clp->cl_state == 0)
1164 break;
1165 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1166 break;
998 } 1167 }
999 nfs_delegation_reap_unclaimed(clp); 1168 return;
1000out: 1169out_error:
1001 up_write(&clp->cl_sem); 1170 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
1002 if (status == -NFS4ERR_CB_PATH_DOWN) 1171 " with error %d\n", clp->cl_hostname, -status);
1003 nfs_handle_cb_pathdown(clp); 1172 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1004 nfs4_clear_recover_bit(clp); 1173 nfs4_state_end_reclaim_reboot(clp);
1174 nfs4_clear_state_manager_bit(clp);
1175}
1176
1177static int nfs4_run_state_manager(void *ptr)
1178{
1179 struct nfs_client *clp = ptr;
1180
1181 allow_signal(SIGKILL);
1182 nfs4_state_manager(clp);
1005 nfs_put_client(clp); 1183 nfs_put_client(clp);
1006 module_put_and_exit(0); 1184 module_put_and_exit(0);
1007 return 0; 1185 return 0;
1008out_error:
1009 printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s"
1010 " with error %d\n", clp->cl_hostname, -status);
1011 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1012 goto out;
1013} 1186}
1014 1187
1015/* 1188/*
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b916297d2334..d1e4c8f8a0a9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -8,7 +8,7 @@
8 * 8 *
9 * Kendrick Smith <kmsmith@umich.edu> 9 * Kendrick Smith <kmsmith@umich.edu>
10 * Andy Adamson <andros@umich.edu> 10 * Andy Adamson <andros@umich.edu>
11 * 11 *
12 * Redistribution and use in source and binary forms, with or without 12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions 13 * modification, are permitted provided that the following conditions
14 * are met: 14 * are met:
@@ -67,7 +67,7 @@ static int nfs4_stat_to_errno(int);
67#define NFS4_MAXTAGLEN 0 67#define NFS4_MAXTAGLEN 0
68#endif 68#endif
69 69
70/* lock,open owner id: 70/* lock,open owner id:
71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) 71 * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
72 */ 72 */
73#define open_owner_id_maxsz (1 + 4) 73#define open_owner_id_maxsz (1 + 4)
@@ -541,6 +541,7 @@ static struct {
541struct compound_hdr { 541struct compound_hdr {
542 int32_t status; 542 int32_t status;
543 uint32_t nops; 543 uint32_t nops;
544 __be32 * nops_p;
544 uint32_t taglen; 545 uint32_t taglen;
545 char * tag; 546 char * tag;
546}; 547};
@@ -578,7 +579,7 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *
578 xdr_encode_opaque(p, str, len); 579 xdr_encode_opaque(p, str, len);
579} 580}
580 581
581static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 582static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
582{ 583{
583 __be32 *p; 584 __be32 *p;
584 585
@@ -588,8 +589,13 @@ static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
588 WRITE32(hdr->taglen); 589 WRITE32(hdr->taglen);
589 WRITEMEM(hdr->tag, hdr->taglen); 590 WRITEMEM(hdr->tag, hdr->taglen);
590 WRITE32(NFS4_MINOR_VERSION); 591 WRITE32(NFS4_MINOR_VERSION);
592 hdr->nops_p = p;
591 WRITE32(hdr->nops); 593 WRITE32(hdr->nops);
592 return 0; 594}
595
596static void encode_nops(struct compound_hdr *hdr)
597{
598 *hdr->nops_p = htonl(hdr->nops);
593} 599}
594 600
595static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) 601static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf)
@@ -601,7 +607,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
601 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); 607 xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE);
602} 608}
603 609
604static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) 610static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server)
605{ 611{
606 char owner_name[IDMAP_NAMESZ]; 612 char owner_name[IDMAP_NAMESZ];
607 char owner_group[IDMAP_NAMESZ]; 613 char owner_group[IDMAP_NAMESZ];
@@ -612,7 +618,6 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
612 int len; 618 int len;
613 uint32_t bmval0 = 0; 619 uint32_t bmval0 = 0;
614 uint32_t bmval1 = 0; 620 uint32_t bmval1 = 0;
615 int status;
616 621
617 /* 622 /*
618 * We reserve enough space to write the entire attribute buffer at once. 623 * We reserve enough space to write the entire attribute buffer at once.
@@ -709,7 +714,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
709 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 714 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
710 WRITE32(NFS4_SET_TO_SERVER_TIME); 715 WRITE32(NFS4_SET_TO_SERVER_TIME);
711 } 716 }
712 717
713 /* 718 /*
714 * Now we backfill the bitmap and the attribute buffer length. 719 * Now we backfill the bitmap and the attribute buffer length.
715 */ 720 */
@@ -723,23 +728,20 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
723 *q++ = htonl(bmval1); 728 *q++ = htonl(bmval1);
724 *q++ = htonl(len); 729 *q++ = htonl(len);
725 730
726 status = 0;
727/* out: */ 731/* out: */
728 return status;
729} 732}
730 733
731static int encode_access(struct xdr_stream *xdr, u32 access) 734static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr)
732{ 735{
733 __be32 *p; 736 __be32 *p;
734 737
735 RESERVE_SPACE(8); 738 RESERVE_SPACE(8);
736 WRITE32(OP_ACCESS); 739 WRITE32(OP_ACCESS);
737 WRITE32(access); 740 WRITE32(access);
738 741 hdr->nops++;
739 return 0;
740} 742}
741 743
742static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg) 744static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
743{ 745{
744 __be32 *p; 746 __be32 *p;
745 747
@@ -747,26 +749,24 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
747 WRITE32(OP_CLOSE); 749 WRITE32(OP_CLOSE);
748 WRITE32(arg->seqid->sequence->counter); 750 WRITE32(arg->seqid->sequence->counter);
749 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 751 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
750 752 hdr->nops++;
751 return 0;
752} 753}
753 754
754static int encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args) 755static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
755{ 756{
756 __be32 *p; 757 __be32 *p;
757
758 RESERVE_SPACE(16);
759 WRITE32(OP_COMMIT);
760 WRITE64(args->offset);
761 WRITE32(args->count);
762 758
763 return 0; 759 RESERVE_SPACE(16);
760 WRITE32(OP_COMMIT);
761 WRITE64(args->offset);
762 WRITE32(args->count);
763 hdr->nops++;
764} 764}
765 765
766static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create) 766static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr)
767{ 767{
768 __be32 *p; 768 __be32 *p;
769 769
770 RESERVE_SPACE(8); 770 RESERVE_SPACE(8);
771 WRITE32(OP_CREATE); 771 WRITE32(OP_CREATE);
772 WRITE32(create->ftype); 772 WRITE32(create->ftype);
@@ -791,64 +791,62 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
791 RESERVE_SPACE(4 + create->name->len); 791 RESERVE_SPACE(4 + create->name->len);
792 WRITE32(create->name->len); 792 WRITE32(create->name->len);
793 WRITEMEM(create->name->name, create->name->len); 793 WRITEMEM(create->name->name, create->name->len);
794 hdr->nops++;
794 795
795 return encode_attrs(xdr, create->attrs, create->server); 796 encode_attrs(xdr, create->attrs, create->server);
796} 797}
797 798
798static int encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap) 799static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
799{ 800{
800 __be32 *p; 801 __be32 *p;
801 802
802 RESERVE_SPACE(12); 803 RESERVE_SPACE(12);
803 WRITE32(OP_GETATTR); 804 WRITE32(OP_GETATTR);
804 WRITE32(1); 805 WRITE32(1);
805 WRITE32(bitmap); 806 WRITE32(bitmap);
806 return 0; 807 hdr->nops++;
807} 808}
808 809
809static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1) 810static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
810{ 811{
811 __be32 *p; 812 __be32 *p;
812 813
813 RESERVE_SPACE(16); 814 RESERVE_SPACE(16);
814 WRITE32(OP_GETATTR); 815 WRITE32(OP_GETATTR);
815 WRITE32(2); 816 WRITE32(2);
816 WRITE32(bm0); 817 WRITE32(bm0);
817 WRITE32(bm1); 818 WRITE32(bm1);
818 return 0; 819 hdr->nops++;
819} 820}
820 821
821static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) 822static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
822{ 823{
823 return encode_getattr_two(xdr, 824 encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
824 bitmask[0] & nfs4_fattr_bitmap[0], 825 bitmask[1] & nfs4_fattr_bitmap[1], hdr);
825 bitmask[1] & nfs4_fattr_bitmap[1]);
826} 826}
827 827
828static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) 828static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
829{ 829{
830 return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], 830 encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0],
831 bitmask[1] & nfs4_fsinfo_bitmap[1]); 831 bitmask[1] & nfs4_fsinfo_bitmap[1], hdr);
832} 832}
833 833
834static int encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask) 834static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
835{ 835{
836 return encode_getattr_two(xdr, 836 encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0],
837 bitmask[0] & nfs4_fs_locations_bitmap[0], 837 bitmask[1] & nfs4_fs_locations_bitmap[1], hdr);
838 bitmask[1] & nfs4_fs_locations_bitmap[1]);
839} 838}
840 839
841static int encode_getfh(struct xdr_stream *xdr) 840static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
842{ 841{
843 __be32 *p; 842 __be32 *p;
844 843
845 RESERVE_SPACE(4); 844 RESERVE_SPACE(4);
846 WRITE32(OP_GETFH); 845 WRITE32(OP_GETFH);
847 846 hdr->nops++;
848 return 0;
849} 847}
850 848
851static int encode_link(struct xdr_stream *xdr, const struct qstr *name) 849static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
852{ 850{
853 __be32 *p; 851 __be32 *p;
854 852
@@ -856,8 +854,7 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
856 WRITE32(OP_LINK); 854 WRITE32(OP_LINK);
857 WRITE32(name->len); 855 WRITE32(name->len);
858 WRITEMEM(name->name, name->len); 856 WRITEMEM(name->name, name->len);
859 857 hdr->nops++;
860 return 0;
861} 858}
862 859
863static inline int nfs4_lock_type(struct file_lock *fl, int block) 860static inline int nfs4_lock_type(struct file_lock *fl, int block)
@@ -878,7 +875,7 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl)
878 * opcode,type,reclaim,offset,length,new_lock_owner = 32 875 * opcode,type,reclaim,offset,length,new_lock_owner = 32
879 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 876 * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
880 */ 877 */
881static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args) 878static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args, struct compound_hdr *hdr)
882{ 879{
883 __be32 *p; 880 __be32 *p;
884 881
@@ -904,11 +901,10 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
904 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 901 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE);
905 WRITE32(args->lock_seqid->sequence->counter); 902 WRITE32(args->lock_seqid->sequence->counter);
906 } 903 }
907 904 hdr->nops++;
908 return 0;
909} 905}
910 906
911static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args) 907static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr)
912{ 908{
913 __be32 *p; 909 __be32 *p;
914 910
@@ -921,11 +917,10 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
921 WRITE32(16); 917 WRITE32(16);
922 WRITEMEM("lock id:", 8); 918 WRITEMEM("lock id:", 8);
923 WRITE64(args->lock_owner.id); 919 WRITE64(args->lock_owner.id);
924 920 hdr->nops++;
925 return 0;
926} 921}
927 922
928static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args) 923static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr)
929{ 924{
930 __be32 *p; 925 __be32 *p;
931 926
@@ -936,11 +931,10 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg
936 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 931 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE);
937 WRITE64(args->fl->fl_start); 932 WRITE64(args->fl->fl_start);
938 WRITE64(nfs4_lock_length(args->fl)); 933 WRITE64(nfs4_lock_length(args->fl));
939 934 hdr->nops++;
940 return 0;
941} 935}
942 936
943static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name) 937static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
944{ 938{
945 int len = name->len; 939 int len = name->len;
946 __be32 *p; 940 __be32 *p;
@@ -949,27 +943,26 @@ static int encode_lookup(struct xdr_stream *xdr, const struct qstr *name)
949 WRITE32(OP_LOOKUP); 943 WRITE32(OP_LOOKUP);
950 WRITE32(len); 944 WRITE32(len);
951 WRITEMEM(name->name, len); 945 WRITEMEM(name->name, len);
952 946 hdr->nops++;
953 return 0;
954} 947}
955 948
956static void encode_share_access(struct xdr_stream *xdr, int open_flags) 949static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
957{ 950{
958 __be32 *p; 951 __be32 *p;
959 952
960 RESERVE_SPACE(8); 953 RESERVE_SPACE(8);
961 switch (open_flags & (FMODE_READ|FMODE_WRITE)) { 954 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
962 case FMODE_READ: 955 case FMODE_READ:
963 WRITE32(NFS4_SHARE_ACCESS_READ); 956 WRITE32(NFS4_SHARE_ACCESS_READ);
964 break; 957 break;
965 case FMODE_WRITE: 958 case FMODE_WRITE:
966 WRITE32(NFS4_SHARE_ACCESS_WRITE); 959 WRITE32(NFS4_SHARE_ACCESS_WRITE);
967 break; 960 break;
968 case FMODE_READ|FMODE_WRITE: 961 case FMODE_READ|FMODE_WRITE:
969 WRITE32(NFS4_SHARE_ACCESS_BOTH); 962 WRITE32(NFS4_SHARE_ACCESS_BOTH);
970 break; 963 break;
971 default: 964 default:
972 BUG(); 965 WRITE32(0);
973 } 966 }
974 WRITE32(0); /* for linux, share_deny = 0 always */ 967 WRITE32(0); /* for linux, share_deny = 0 always */
975} 968}
@@ -984,7 +977,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
984 RESERVE_SPACE(8); 977 RESERVE_SPACE(8);
985 WRITE32(OP_OPEN); 978 WRITE32(OP_OPEN);
986 WRITE32(arg->seqid->sequence->counter); 979 WRITE32(arg->seqid->sequence->counter);
987 encode_share_access(xdr, arg->open_flags); 980 encode_share_access(xdr, arg->fmode);
988 RESERVE_SPACE(28); 981 RESERVE_SPACE(28);
989 WRITE64(arg->clientid); 982 WRITE64(arg->clientid);
990 WRITE32(16); 983 WRITE32(16);
@@ -998,13 +991,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
998 991
999 RESERVE_SPACE(4); 992 RESERVE_SPACE(4);
1000 switch(arg->open_flags & O_EXCL) { 993 switch(arg->open_flags & O_EXCL) {
1001 case 0: 994 case 0:
1002 WRITE32(NFS4_CREATE_UNCHECKED); 995 WRITE32(NFS4_CREATE_UNCHECKED);
1003 encode_attrs(xdr, arg->u.attrs, arg->server); 996 encode_attrs(xdr, arg->u.attrs, arg->server);
1004 break; 997 break;
1005 default: 998 default:
1006 WRITE32(NFS4_CREATE_EXCLUSIVE); 999 WRITE32(NFS4_CREATE_EXCLUSIVE);
1007 encode_nfs4_verifier(xdr, &arg->u.verifier); 1000 encode_nfs4_verifier(xdr, &arg->u.verifier);
1008 } 1001 }
1009} 1002}
1010 1003
@@ -1014,33 +1007,33 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1014 1007
1015 RESERVE_SPACE(4); 1008 RESERVE_SPACE(4);
1016 switch (arg->open_flags & O_CREAT) { 1009 switch (arg->open_flags & O_CREAT) {
1017 case 0: 1010 case 0:
1018 WRITE32(NFS4_OPEN_NOCREATE); 1011 WRITE32(NFS4_OPEN_NOCREATE);
1019 break; 1012 break;
1020 default: 1013 default:
1021 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1014 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1022 WRITE32(NFS4_OPEN_CREATE); 1015 WRITE32(NFS4_OPEN_CREATE);
1023 encode_createmode(xdr, arg); 1016 encode_createmode(xdr, arg);
1024 } 1017 }
1025} 1018}
1026 1019
1027static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) 1020static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delegation_type)
1028{ 1021{
1029 __be32 *p; 1022 __be32 *p;
1030 1023
1031 RESERVE_SPACE(4); 1024 RESERVE_SPACE(4);
1032 switch (delegation_type) { 1025 switch (delegation_type) {
1033 case 0: 1026 case 0:
1034 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1027 WRITE32(NFS4_OPEN_DELEGATE_NONE);
1035 break; 1028 break;
1036 case FMODE_READ: 1029 case FMODE_READ:
1037 WRITE32(NFS4_OPEN_DELEGATE_READ); 1030 WRITE32(NFS4_OPEN_DELEGATE_READ);
1038 break; 1031 break;
1039 case FMODE_WRITE|FMODE_READ: 1032 case FMODE_WRITE|FMODE_READ:
1040 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1033 WRITE32(NFS4_OPEN_DELEGATE_WRITE);
1041 break; 1034 break;
1042 default: 1035 default:
1043 BUG(); 1036 BUG();
1044 } 1037 }
1045} 1038}
1046 1039
@@ -1053,7 +1046,7 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1053 encode_string(xdr, name->len, name->name); 1046 encode_string(xdr, name->len, name->name);
1054} 1047}
1055 1048
1056static inline void encode_claim_previous(struct xdr_stream *xdr, int type) 1049static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1057{ 1050{
1058 __be32 *p; 1051 __be32 *p;
1059 1052
@@ -1072,27 +1065,27 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1072 encode_string(xdr, name->len, name->name); 1065 encode_string(xdr, name->len, name->name);
1073} 1066}
1074 1067
1075static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1068static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr)
1076{ 1069{
1077 encode_openhdr(xdr, arg); 1070 encode_openhdr(xdr, arg);
1078 encode_opentype(xdr, arg); 1071 encode_opentype(xdr, arg);
1079 switch (arg->claim) { 1072 switch (arg->claim) {
1080 case NFS4_OPEN_CLAIM_NULL: 1073 case NFS4_OPEN_CLAIM_NULL:
1081 encode_claim_null(xdr, arg->name); 1074 encode_claim_null(xdr, arg->name);
1082 break; 1075 break;
1083 case NFS4_OPEN_CLAIM_PREVIOUS: 1076 case NFS4_OPEN_CLAIM_PREVIOUS:
1084 encode_claim_previous(xdr, arg->u.delegation_type); 1077 encode_claim_previous(xdr, arg->u.delegation_type);
1085 break; 1078 break;
1086 case NFS4_OPEN_CLAIM_DELEGATE_CUR: 1079 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1087 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); 1080 encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation);
1088 break; 1081 break;
1089 default: 1082 default:
1090 BUG(); 1083 BUG();
1091 } 1084 }
1092 return 0; 1085 hdr->nops++;
1093} 1086}
1094 1087
1095static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) 1088static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr)
1096{ 1089{
1097 __be32 *p; 1090 __be32 *p;
1098 1091
@@ -1100,11 +1093,10 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
1100 WRITE32(OP_OPEN_CONFIRM); 1093 WRITE32(OP_OPEN_CONFIRM);
1101 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1094 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1102 WRITE32(arg->seqid->sequence->counter); 1095 WRITE32(arg->seqid->sequence->counter);
1103 1096 hdr->nops++;
1104 return 0;
1105} 1097}
1106 1098
1107static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg) 1099static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
1108{ 1100{
1109 __be32 *p; 1101 __be32 *p;
1110 1102
@@ -1112,12 +1104,12 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
1112 WRITE32(OP_OPEN_DOWNGRADE); 1104 WRITE32(OP_OPEN_DOWNGRADE);
1113 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1105 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1114 WRITE32(arg->seqid->sequence->counter); 1106 WRITE32(arg->seqid->sequence->counter);
1115 encode_share_access(xdr, arg->open_flags); 1107 encode_share_access(xdr, arg->fmode);
1116 return 0; 1108 hdr->nops++;
1117} 1109}
1118 1110
1119static int 1111static void
1120encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh) 1112encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr)
1121{ 1113{
1122 int len = fh->size; 1114 int len = fh->size;
1123 __be32 *p; 1115 __be32 *p;
@@ -1126,18 +1118,16 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh)
1126 WRITE32(OP_PUTFH); 1118 WRITE32(OP_PUTFH);
1127 WRITE32(len); 1119 WRITE32(len);
1128 WRITEMEM(fh->data, len); 1120 WRITEMEM(fh->data, len);
1129 1121 hdr->nops++;
1130 return 0;
1131} 1122}
1132 1123
1133static int encode_putrootfh(struct xdr_stream *xdr) 1124static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1134{ 1125{
1135 __be32 *p; 1126 __be32 *p;
1136
1137 RESERVE_SPACE(4);
1138 WRITE32(OP_PUTROOTFH);
1139 1127
1140 return 0; 1128 RESERVE_SPACE(4);
1129 WRITE32(OP_PUTROOTFH);
1130 hdr->nops++;
1141} 1131}
1142 1132
1143static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) 1133static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx)
@@ -1153,7 +1143,7 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1153 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1143 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
1154} 1144}
1155 1145
1156static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) 1146static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1157{ 1147{
1158 __be32 *p; 1148 __be32 *p;
1159 1149
@@ -1165,11 +1155,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
1165 RESERVE_SPACE(12); 1155 RESERVE_SPACE(12);
1166 WRITE64(args->offset); 1156 WRITE64(args->offset);
1167 WRITE32(args->count); 1157 WRITE32(args->count);
1168 1158 hdr->nops++;
1169 return 0;
1170} 1159}
1171 1160
1172static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) 1161static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1173{ 1162{
1174 uint32_t attrs[2] = { 1163 uint32_t attrs[2] = {
1175 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1164 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
@@ -1191,6 +1180,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1191 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1180 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1192 WRITE32(attrs[0] & readdir->bitmask[0]); 1181 WRITE32(attrs[0] & readdir->bitmask[0]);
1193 WRITE32(attrs[1] & readdir->bitmask[1]); 1182 WRITE32(attrs[1] & readdir->bitmask[1]);
1183 hdr->nops++;
1194 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1184 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
1195 __func__, 1185 __func__,
1196 (unsigned long long)readdir->cookie, 1186 (unsigned long long)readdir->cookie,
@@ -1198,21 +1188,18 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1198 ((u32 *)readdir->verifier.data)[1], 1188 ((u32 *)readdir->verifier.data)[1],
1199 attrs[0] & readdir->bitmask[0], 1189 attrs[0] & readdir->bitmask[0],
1200 attrs[1] & readdir->bitmask[1]); 1190 attrs[1] & readdir->bitmask[1]);
1201
1202 return 0;
1203} 1191}
1204 1192
1205static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req) 1193static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr)
1206{ 1194{
1207 __be32 *p; 1195 __be32 *p;
1208 1196
1209 RESERVE_SPACE(4); 1197 RESERVE_SPACE(4);
1210 WRITE32(OP_READLINK); 1198 WRITE32(OP_READLINK);
1211 1199 hdr->nops++;
1212 return 0;
1213} 1200}
1214 1201
1215static int encode_remove(struct xdr_stream *xdr, const struct qstr *name) 1202static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1216{ 1203{
1217 __be32 *p; 1204 __be32 *p;
1218 1205
@@ -1220,11 +1207,10 @@ static int encode_remove(struct xdr_stream *xdr, const struct qstr *name)
1220 WRITE32(OP_REMOVE); 1207 WRITE32(OP_REMOVE);
1221 WRITE32(name->len); 1208 WRITE32(name->len);
1222 WRITEMEM(name->name, name->len); 1209 WRITEMEM(name->name, name->len);
1223 1210 hdr->nops++;
1224 return 0;
1225} 1211}
1226 1212
1227static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname) 1213static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr)
1228{ 1214{
1229 __be32 *p; 1215 __be32 *p;
1230 1216
@@ -1232,38 +1218,35 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
1232 WRITE32(OP_RENAME); 1218 WRITE32(OP_RENAME);
1233 WRITE32(oldname->len); 1219 WRITE32(oldname->len);
1234 WRITEMEM(oldname->name, oldname->len); 1220 WRITEMEM(oldname->name, oldname->len);
1235 1221
1236 RESERVE_SPACE(4 + newname->len); 1222 RESERVE_SPACE(4 + newname->len);
1237 WRITE32(newname->len); 1223 WRITE32(newname->len);
1238 WRITEMEM(newname->name, newname->len); 1224 WRITEMEM(newname->name, newname->len);
1239 1225 hdr->nops++;
1240 return 0;
1241} 1226}
1242 1227
1243static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid) 1228static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr)
1244{ 1229{
1245 __be32 *p; 1230 __be32 *p;
1246 1231
1247 RESERVE_SPACE(12); 1232 RESERVE_SPACE(12);
1248 WRITE32(OP_RENEW); 1233 WRITE32(OP_RENEW);
1249 WRITE64(client_stateid->cl_clientid); 1234 WRITE64(client_stateid->cl_clientid);
1250 1235 hdr->nops++;
1251 return 0;
1252} 1236}
1253 1237
1254static int 1238static void
1255encode_restorefh(struct xdr_stream *xdr) 1239encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1256{ 1240{
1257 __be32 *p; 1241 __be32 *p;
1258 1242
1259 RESERVE_SPACE(4); 1243 RESERVE_SPACE(4);
1260 WRITE32(OP_RESTOREFH); 1244 WRITE32(OP_RESTOREFH);
1261 1245 hdr->nops++;
1262 return 0;
1263} 1246}
1264 1247
1265static int 1248static int
1266encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) 1249encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compound_hdr *hdr)
1267{ 1250{
1268 __be32 *p; 1251 __be32 *p;
1269 1252
@@ -1278,36 +1261,32 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
1278 RESERVE_SPACE(4); 1261 RESERVE_SPACE(4);
1279 WRITE32(arg->acl_len); 1262 WRITE32(arg->acl_len);
1280 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1263 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1264 hdr->nops++;
1281 return 0; 1265 return 0;
1282} 1266}
1283 1267
1284static int 1268static void
1285encode_savefh(struct xdr_stream *xdr) 1269encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1286{ 1270{
1287 __be32 *p; 1271 __be32 *p;
1288 1272
1289 RESERVE_SPACE(4); 1273 RESERVE_SPACE(4);
1290 WRITE32(OP_SAVEFH); 1274 WRITE32(OP_SAVEFH);
1291 1275 hdr->nops++;
1292 return 0;
1293} 1276}
1294 1277
1295static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server) 1278static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr)
1296{ 1279{
1297 int status;
1298 __be32 *p; 1280 __be32 *p;
1299
1300 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1301 WRITE32(OP_SETATTR);
1302 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1303 1281
1304 if ((status = encode_attrs(xdr, arg->iap, server))) 1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1305 return status; 1283 WRITE32(OP_SETATTR);
1306 1284 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1307 return 0; 1285 hdr->nops++;
1286 encode_attrs(xdr, arg->iap, server);
1308} 1287}
1309 1288
1310static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) 1289static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
1311{ 1290{
1312 __be32 *p; 1291 __be32 *p;
1313 1292
@@ -1322,23 +1301,21 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
1322 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1301 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1323 RESERVE_SPACE(4); 1302 RESERVE_SPACE(4);
1324 WRITE32(setclientid->sc_cb_ident); 1303 WRITE32(setclientid->sc_cb_ident);
1325 1304 hdr->nops++;
1326 return 0;
1327} 1305}
1328 1306
1329static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state) 1307static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
1330{ 1308{
1331 __be32 *p; 1309 __be32 *p;
1332
1333 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
1334 WRITE32(OP_SETCLIENTID_CONFIRM);
1335 WRITE64(client_state->cl_clientid);
1336 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1337 1310
1338 return 0; 1311 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
1312 WRITE32(OP_SETCLIENTID_CONFIRM);
1313 WRITE64(client_state->cl_clientid);
1314 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1315 hdr->nops++;
1339} 1316}
1340 1317
1341static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args) 1318static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
1342{ 1319{
1343 __be32 *p; 1320 __be32 *p;
1344 1321
@@ -1353,11 +1330,10 @@ static int encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args
1353 WRITE32(args->count); 1330 WRITE32(args->count);
1354 1331
1355 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1332 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1356 1333 hdr->nops++;
1357 return 0;
1358} 1334}
1359 1335
1360static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) 1336static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr)
1361{ 1337{
1362 __be32 *p; 1338 __be32 *p;
1363 1339
@@ -1365,8 +1341,7 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei
1365 1341
1366 WRITE32(OP_DELEGRETURN); 1342 WRITE32(OP_DELEGRETURN);
1367 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1343 WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
1368 return 0; 1344 hdr->nops++;
1369
1370} 1345}
1371/* 1346/*
1372 * END OF "GENERIC" ENCODE ROUTINES. 1347 * END OF "GENERIC" ENCODE ROUTINES.
@@ -1379,21 +1354,16 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs
1379{ 1354{
1380 struct xdr_stream xdr; 1355 struct xdr_stream xdr;
1381 struct compound_hdr hdr = { 1356 struct compound_hdr hdr = {
1382 .nops = 3, 1357 .nops = 0,
1383 }; 1358 };
1384 int status;
1385 1359
1386 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1360 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1387 encode_compound_hdr(&xdr, &hdr); 1361 encode_compound_hdr(&xdr, &hdr);
1388 status = encode_putfh(&xdr, args->fh); 1362 encode_putfh(&xdr, args->fh, &hdr);
1389 if (status != 0) 1363 encode_access(&xdr, args->access, &hdr);
1390 goto out; 1364 encode_getfattr(&xdr, args->bitmask, &hdr);
1391 status = encode_access(&xdr, args->access); 1365 encode_nops(&hdr);
1392 if (status != 0) 1366 return 0;
1393 goto out;
1394 status = encode_getfattr(&xdr, args->bitmask);
1395out:
1396 return status;
1397} 1367}
1398 1368
1399/* 1369/*
@@ -1403,21 +1373,17 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs
1403{ 1373{
1404 struct xdr_stream xdr; 1374 struct xdr_stream xdr;
1405 struct compound_hdr hdr = { 1375 struct compound_hdr hdr = {
1406 .nops = 4, 1376 .nops = 0,
1407 }; 1377 };
1408 int status;
1409 1378
1410 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1379 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1411 encode_compound_hdr(&xdr, &hdr); 1380 encode_compound_hdr(&xdr, &hdr);
1412 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1381 encode_putfh(&xdr, args->dir_fh, &hdr);
1413 goto out; 1382 encode_lookup(&xdr, args->name, &hdr);
1414 if ((status = encode_lookup(&xdr, args->name)) != 0) 1383 encode_getfh(&xdr, &hdr);
1415 goto out; 1384 encode_getfattr(&xdr, args->bitmask, &hdr);
1416 if ((status = encode_getfh(&xdr)) != 0) 1385 encode_nops(&hdr);
1417 goto out; 1386 return 0;
1418 status = encode_getfattr(&xdr, args->bitmask);
1419out:
1420 return status;
1421} 1387}
1422 1388
1423/* 1389/*
@@ -1427,18 +1393,16 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc
1427{ 1393{
1428 struct xdr_stream xdr; 1394 struct xdr_stream xdr;
1429 struct compound_hdr hdr = { 1395 struct compound_hdr hdr = {
1430 .nops = 3, 1396 .nops = 0,
1431 }; 1397 };
1432 int status;
1433 1398
1434 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1399 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1435 encode_compound_hdr(&xdr, &hdr); 1400 encode_compound_hdr(&xdr, &hdr);
1436 if ((status = encode_putrootfh(&xdr)) != 0) 1401 encode_putrootfh(&xdr, &hdr);
1437 goto out; 1402 encode_getfh(&xdr, &hdr);
1438 if ((status = encode_getfh(&xdr)) == 0) 1403 encode_getfattr(&xdr, args->bitmask, &hdr);
1439 status = encode_getfattr(&xdr, args->bitmask); 1404 encode_nops(&hdr);
1440out: 1405 return 0;
1441 return status;
1442} 1406}
1443 1407
1444/* 1408/*
@@ -1448,19 +1412,16 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1448{ 1412{
1449 struct xdr_stream xdr; 1413 struct xdr_stream xdr;
1450 struct compound_hdr hdr = { 1414 struct compound_hdr hdr = {
1451 .nops = 3, 1415 .nops = 0,
1452 }; 1416 };
1453 int status;
1454 1417
1455 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1418 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1456 encode_compound_hdr(&xdr, &hdr); 1419 encode_compound_hdr(&xdr, &hdr);
1457 if ((status = encode_putfh(&xdr, args->fh)) != 0) 1420 encode_putfh(&xdr, args->fh, &hdr);
1458 goto out; 1421 encode_remove(&xdr, &args->name, &hdr);
1459 if ((status = encode_remove(&xdr, &args->name)) != 0) 1422 encode_getfattr(&xdr, args->bitmask, &hdr);
1460 goto out; 1423 encode_nops(&hdr);
1461 status = encode_getfattr(&xdr, args->bitmask); 1424 return 0;
1462out:
1463 return status;
1464} 1425}
1465 1426
1466/* 1427/*
@@ -1470,27 +1431,20 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs
1470{ 1431{
1471 struct xdr_stream xdr; 1432 struct xdr_stream xdr;
1472 struct compound_hdr hdr = { 1433 struct compound_hdr hdr = {
1473 .nops = 7, 1434 .nops = 0,
1474 }; 1435 };
1475 int status;
1476 1436
1477 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1437 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1478 encode_compound_hdr(&xdr, &hdr); 1438 encode_compound_hdr(&xdr, &hdr);
1479 if ((status = encode_putfh(&xdr, args->old_dir)) != 0) 1439 encode_putfh(&xdr, args->old_dir, &hdr);
1480 goto out; 1440 encode_savefh(&xdr, &hdr);
1481 if ((status = encode_savefh(&xdr)) != 0) 1441 encode_putfh(&xdr, args->new_dir, &hdr);
1482 goto out; 1442 encode_rename(&xdr, args->old_name, args->new_name, &hdr);
1483 if ((status = encode_putfh(&xdr, args->new_dir)) != 0) 1443 encode_getfattr(&xdr, args->bitmask, &hdr);
1484 goto out; 1444 encode_restorefh(&xdr, &hdr);
1485 if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0) 1445 encode_getfattr(&xdr, args->bitmask, &hdr);
1486 goto out; 1446 encode_nops(&hdr);
1487 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1447 return 0;
1488 goto out;
1489 if ((status = encode_restorefh(&xdr)) != 0)
1490 goto out;
1491 status = encode_getfattr(&xdr, args->bitmask);
1492out:
1493 return status;
1494} 1448}
1495 1449
1496/* 1450/*
@@ -1500,27 +1454,20 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_
1500{ 1454{
1501 struct xdr_stream xdr; 1455 struct xdr_stream xdr;
1502 struct compound_hdr hdr = { 1456 struct compound_hdr hdr = {
1503 .nops = 7, 1457 .nops = 0,
1504 }; 1458 };
1505 int status;
1506 1459
1507 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1460 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1508 encode_compound_hdr(&xdr, &hdr); 1461 encode_compound_hdr(&xdr, &hdr);
1509 if ((status = encode_putfh(&xdr, args->fh)) != 0) 1462 encode_putfh(&xdr, args->fh, &hdr);
1510 goto out; 1463 encode_savefh(&xdr, &hdr);
1511 if ((status = encode_savefh(&xdr)) != 0) 1464 encode_putfh(&xdr, args->dir_fh, &hdr);
1512 goto out; 1465 encode_link(&xdr, args->name, &hdr);
1513 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1466 encode_getfattr(&xdr, args->bitmask, &hdr);
1514 goto out; 1467 encode_restorefh(&xdr, &hdr);
1515 if ((status = encode_link(&xdr, args->name)) != 0) 1468 encode_getfattr(&xdr, args->bitmask, &hdr);
1516 goto out; 1469 encode_nops(&hdr);
1517 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1470 return 0;
1518 goto out;
1519 if ((status = encode_restorefh(&xdr)) != 0)
1520 goto out;
1521 status = encode_getfattr(&xdr, args->bitmask);
1522out:
1523 return status;
1524} 1471}
1525 1472
1526/* 1473/*
@@ -1530,27 +1477,20 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs
1530{ 1477{
1531 struct xdr_stream xdr; 1478 struct xdr_stream xdr;
1532 struct compound_hdr hdr = { 1479 struct compound_hdr hdr = {
1533 .nops = 7, 1480 .nops = 0,
1534 }; 1481 };
1535 int status;
1536 1482
1537 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1483 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1538 encode_compound_hdr(&xdr, &hdr); 1484 encode_compound_hdr(&xdr, &hdr);
1539 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 1485 encode_putfh(&xdr, args->dir_fh, &hdr);
1540 goto out; 1486 encode_savefh(&xdr, &hdr);
1541 if ((status = encode_savefh(&xdr)) != 0) 1487 encode_create(&xdr, args, &hdr);
1542 goto out; 1488 encode_getfh(&xdr, &hdr);
1543 if ((status = encode_create(&xdr, args)) != 0) 1489 encode_getfattr(&xdr, args->bitmask, &hdr);
1544 goto out; 1490 encode_restorefh(&xdr, &hdr);
1545 if ((status = encode_getfh(&xdr)) != 0) 1491 encode_getfattr(&xdr, args->bitmask, &hdr);
1546 goto out; 1492 encode_nops(&hdr);
1547 if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) 1493 return 0;
1548 goto out;
1549 if ((status = encode_restorefh(&xdr)) != 0)
1550 goto out;
1551 status = encode_getfattr(&xdr, args->bitmask);
1552out:
1553 return status;
1554} 1494}
1555 1495
1556/* 1496/*
@@ -1568,15 +1508,15 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
1568{ 1508{
1569 struct xdr_stream xdr; 1509 struct xdr_stream xdr;
1570 struct compound_hdr hdr = { 1510 struct compound_hdr hdr = {
1571 .nops = 2, 1511 .nops = 0,
1572 }; 1512 };
1573 int status;
1574 1513
1575 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1514 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1576 encode_compound_hdr(&xdr, &hdr); 1515 encode_compound_hdr(&xdr, &hdr);
1577 if ((status = encode_putfh(&xdr, args->fh)) == 0) 1516 encode_putfh(&xdr, args->fh, &hdr);
1578 status = encode_getfattr(&xdr, args->bitmask); 1517 encode_getfattr(&xdr, args->bitmask, &hdr);
1579 return status; 1518 encode_nops(&hdr);
1519 return 0;
1580} 1520}
1581 1521
1582/* 1522/*
@@ -1584,23 +1524,18 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf
1584 */ 1524 */
1585static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args) 1525static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closeargs *args)
1586{ 1526{
1587 struct xdr_stream xdr; 1527 struct xdr_stream xdr;
1588 struct compound_hdr hdr = { 1528 struct compound_hdr hdr = {
1589 .nops = 3, 1529 .nops = 0,
1590 }; 1530 };
1591 int status; 1531
1592 1532 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1593 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1533 encode_compound_hdr(&xdr, &hdr);
1594 encode_compound_hdr(&xdr, &hdr); 1534 encode_putfh(&xdr, args->fh, &hdr);
1595 status = encode_putfh(&xdr, args->fh); 1535 encode_close(&xdr, args, &hdr);
1596 if(status) 1536 encode_getfattr(&xdr, args->bitmask, &hdr);
1597 goto out; 1537 encode_nops(&hdr);
1598 status = encode_close(&xdr, args); 1538 return 0;
1599 if (status != 0)
1600 goto out;
1601 status = encode_getfattr(&xdr, args->bitmask);
1602out:
1603 return status;
1604} 1539}
1605 1540
1606/* 1541/*
@@ -1610,33 +1545,20 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg
1610{ 1545{
1611 struct xdr_stream xdr; 1546 struct xdr_stream xdr;
1612 struct compound_hdr hdr = { 1547 struct compound_hdr hdr = {
1613 .nops = 7, 1548 .nops = 0,
1614 }; 1549 };
1615 int status;
1616 1550
1617 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1551 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1618 encode_compound_hdr(&xdr, &hdr); 1552 encode_compound_hdr(&xdr, &hdr);
1619 status = encode_putfh(&xdr, args->fh); 1553 encode_putfh(&xdr, args->fh, &hdr);
1620 if (status) 1554 encode_savefh(&xdr, &hdr);
1621 goto out; 1555 encode_open(&xdr, args, &hdr);
1622 status = encode_savefh(&xdr); 1556 encode_getfh(&xdr, &hdr);
1623 if (status) 1557 encode_getfattr(&xdr, args->bitmask, &hdr);
1624 goto out; 1558 encode_restorefh(&xdr, &hdr);
1625 status = encode_open(&xdr, args); 1559 encode_getfattr(&xdr, args->bitmask, &hdr);
1626 if (status) 1560 encode_nops(&hdr);
1627 goto out; 1561 return 0;
1628 status = encode_getfh(&xdr);
1629 if (status)
1630 goto out;
1631 status = encode_getfattr(&xdr, args->bitmask);
1632 if (status)
1633 goto out;
1634 status = encode_restorefh(&xdr);
1635 if (status)
1636 goto out;
1637 status = encode_getfattr(&xdr, args->bitmask);
1638out:
1639 return status;
1640} 1562}
1641 1563
1642/* 1564/*
@@ -1646,18 +1568,15 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs
1646{ 1568{
1647 struct xdr_stream xdr; 1569 struct xdr_stream xdr;
1648 struct compound_hdr hdr = { 1570 struct compound_hdr hdr = {
1649 .nops = 2, 1571 .nops = 0,
1650 }; 1572 };
1651 int status;
1652 1573
1653 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1574 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1654 encode_compound_hdr(&xdr, &hdr); 1575 encode_compound_hdr(&xdr, &hdr);
1655 status = encode_putfh(&xdr, args->fh); 1576 encode_putfh(&xdr, args->fh, &hdr);
1656 if(status) 1577 encode_open_confirm(&xdr, args, &hdr);
1657 goto out; 1578 encode_nops(&hdr);
1658 status = encode_open_confirm(&xdr, args); 1579 return 0;
1659out:
1660 return status;
1661} 1580}
1662 1581
1663/* 1582/*
@@ -1667,21 +1586,16 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_
1667{ 1586{
1668 struct xdr_stream xdr; 1587 struct xdr_stream xdr;
1669 struct compound_hdr hdr = { 1588 struct compound_hdr hdr = {
1670 .nops = 3, 1589 .nops = 0,
1671 }; 1590 };
1672 int status;
1673 1591
1674 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1592 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1675 encode_compound_hdr(&xdr, &hdr); 1593 encode_compound_hdr(&xdr, &hdr);
1676 status = encode_putfh(&xdr, args->fh); 1594 encode_putfh(&xdr, args->fh, &hdr);
1677 if (status) 1595 encode_open(&xdr, args, &hdr);
1678 goto out; 1596 encode_getfattr(&xdr, args->bitmask, &hdr);
1679 status = encode_open(&xdr, args); 1597 encode_nops(&hdr);
1680 if (status) 1598 return 0;
1681 goto out;
1682 status = encode_getfattr(&xdr, args->bitmask);
1683out:
1684 return status;
1685} 1599}
1686 1600
1687/* 1601/*
@@ -1691,21 +1605,16 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n
1691{ 1605{
1692 struct xdr_stream xdr; 1606 struct xdr_stream xdr;
1693 struct compound_hdr hdr = { 1607 struct compound_hdr hdr = {
1694 .nops = 3, 1608 .nops = 0,
1695 }; 1609 };
1696 int status;
1697 1610
1698 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1611 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1699 encode_compound_hdr(&xdr, &hdr); 1612 encode_compound_hdr(&xdr, &hdr);
1700 status = encode_putfh(&xdr, args->fh); 1613 encode_putfh(&xdr, args->fh, &hdr);
1701 if (status) 1614 encode_open_downgrade(&xdr, args, &hdr);
1702 goto out; 1615 encode_getfattr(&xdr, args->bitmask, &hdr);
1703 status = encode_open_downgrade(&xdr, args); 1616 encode_nops(&hdr);
1704 if (status != 0) 1617 return 0;
1705 goto out;
1706 status = encode_getfattr(&xdr, args->bitmask);
1707out:
1708 return status;
1709} 1618}
1710 1619
1711/* 1620/*
@@ -1715,18 +1624,15 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar
1715{ 1624{
1716 struct xdr_stream xdr; 1625 struct xdr_stream xdr;
1717 struct compound_hdr hdr = { 1626 struct compound_hdr hdr = {
1718 .nops = 2, 1627 .nops = 0,
1719 }; 1628 };
1720 int status;
1721 1629
1722 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1630 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1723 encode_compound_hdr(&xdr, &hdr); 1631 encode_compound_hdr(&xdr, &hdr);
1724 status = encode_putfh(&xdr, args->fh); 1632 encode_putfh(&xdr, args->fh, &hdr);
1725 if(status) 1633 encode_lock(&xdr, args, &hdr);
1726 goto out; 1634 encode_nops(&hdr);
1727 status = encode_lock(&xdr, args); 1635 return 0;
1728out:
1729 return status;
1730} 1636}
1731 1637
1732/* 1638/*
@@ -1736,18 +1642,15 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_
1736{ 1642{
1737 struct xdr_stream xdr; 1643 struct xdr_stream xdr;
1738 struct compound_hdr hdr = { 1644 struct compound_hdr hdr = {
1739 .nops = 2, 1645 .nops = 0,
1740 }; 1646 };
1741 int status;
1742 1647
1743 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1744 encode_compound_hdr(&xdr, &hdr); 1649 encode_compound_hdr(&xdr, &hdr);
1745 status = encode_putfh(&xdr, args->fh); 1650 encode_putfh(&xdr, args->fh, &hdr);
1746 if(status) 1651 encode_lockt(&xdr, args, &hdr);
1747 goto out; 1652 encode_nops(&hdr);
1748 status = encode_lockt(&xdr, args); 1653 return 0;
1749out:
1750 return status;
1751} 1654}
1752 1655
1753/* 1656/*
@@ -1757,18 +1660,15 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_
1757{ 1660{
1758 struct xdr_stream xdr; 1661 struct xdr_stream xdr;
1759 struct compound_hdr hdr = { 1662 struct compound_hdr hdr = {
1760 .nops = 2, 1663 .nops = 0,
1761 }; 1664 };
1762 int status;
1763 1665
1764 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1666 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1765 encode_compound_hdr(&xdr, &hdr); 1667 encode_compound_hdr(&xdr, &hdr);
1766 status = encode_putfh(&xdr, args->fh); 1668 encode_putfh(&xdr, args->fh, &hdr);
1767 if(status) 1669 encode_locku(&xdr, args, &hdr);
1768 goto out; 1670 encode_nops(&hdr);
1769 status = encode_locku(&xdr, args); 1671 return 0;
1770out:
1771 return status;
1772} 1672}
1773 1673
1774/* 1674/*
@@ -1778,18 +1678,15 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
1778{ 1678{
1779 struct xdr_stream xdr; 1679 struct xdr_stream xdr;
1780 struct compound_hdr hdr = { 1680 struct compound_hdr hdr = {
1781 .nops = 2, 1681 .nops = 0,
1782 }; 1682 };
1783 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1683 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1784 unsigned int replen; 1684 unsigned int replen;
1785 int status;
1786 1685
1787 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1686 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1788 encode_compound_hdr(&xdr, &hdr); 1687 encode_compound_hdr(&xdr, &hdr);
1789 status = encode_putfh(&xdr, args->fh); 1688 encode_putfh(&xdr, args->fh, &hdr);
1790 if(status) 1689 encode_readlink(&xdr, args, req, &hdr);
1791 goto out;
1792 status = encode_readlink(&xdr, args, req);
1793 1690
1794 /* set up reply kvec 1691 /* set up reply kvec
1795 * toplevel_status + taglen + rescount + OP_PUTFH + status 1692 * toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1798,9 +1695,8 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n
1798 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2; 1695 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2;
1799 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 1696 xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages,
1800 args->pgbase, args->pglen); 1697 args->pgbase, args->pglen);
1801 1698 encode_nops(&hdr);
1802out: 1699 return 0;
1803 return status;
1804} 1700}
1805 1701
1806/* 1702/*
@@ -1810,18 +1706,15 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1810{ 1706{
1811 struct xdr_stream xdr; 1707 struct xdr_stream xdr;
1812 struct compound_hdr hdr = { 1708 struct compound_hdr hdr = {
1813 .nops = 2, 1709 .nops = 0,
1814 }; 1710 };
1815 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1711 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1816 int replen; 1712 int replen;
1817 int status;
1818 1713
1819 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1714 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1820 encode_compound_hdr(&xdr, &hdr); 1715 encode_compound_hdr(&xdr, &hdr);
1821 status = encode_putfh(&xdr, args->fh); 1716 encode_putfh(&xdr, args->fh, &hdr);
1822 if(status) 1717 encode_readdir(&xdr, args, req, &hdr);
1823 goto out;
1824 status = encode_readdir(&xdr, args, req);
1825 1718
1826 /* set up reply kvec 1719 /* set up reply kvec
1827 * toplevel_status + taglen + rescount + OP_PUTFH + status 1720 * toplevel_status + taglen + rescount + OP_PUTFH + status
@@ -1833,9 +1726,8 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf
1833 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", 1726 dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
1834 __func__, replen, args->pages, 1727 __func__, replen, args->pages,
1835 args->pgbase, args->count); 1728 args->pgbase, args->count);
1836 1729 encode_nops(&hdr);
1837out: 1730 return 0;
1838 return status;
1839} 1731}
1840 1732
1841/* 1733/*
@@ -1846,18 +1738,14 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
1846 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1738 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1847 struct xdr_stream xdr; 1739 struct xdr_stream xdr;
1848 struct compound_hdr hdr = { 1740 struct compound_hdr hdr = {
1849 .nops = 2, 1741 .nops = 0,
1850 }; 1742 };
1851 int replen, status; 1743 int replen;
1852 1744
1853 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1745 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1854 encode_compound_hdr(&xdr, &hdr); 1746 encode_compound_hdr(&xdr, &hdr);
1855 status = encode_putfh(&xdr, args->fh); 1747 encode_putfh(&xdr, args->fh, &hdr);
1856 if (status) 1748 encode_read(&xdr, args, &hdr);
1857 goto out;
1858 status = encode_read(&xdr, args);
1859 if (status)
1860 goto out;
1861 1749
1862 /* set up reply kvec 1750 /* set up reply kvec
1863 * toplevel status + taglen=0 + rescount + OP_PUTFH + status 1751 * toplevel status + taglen=0 + rescount + OP_PUTFH + status
@@ -1867,33 +1755,27 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg
1867 xdr_inline_pages(&req->rq_rcv_buf, replen, 1755 xdr_inline_pages(&req->rq_rcv_buf, replen,
1868 args->pages, args->pgbase, args->count); 1756 args->pages, args->pgbase, args->count);
1869 req->rq_rcv_buf.flags |= XDRBUF_READ; 1757 req->rq_rcv_buf.flags |= XDRBUF_READ;
1870out: 1758 encode_nops(&hdr);
1871 return status; 1759 return 0;
1872} 1760}
1873 1761
1874/* 1762/*
1875 * Encode an SETATTR request 1763 * Encode an SETATTR request
1876 */ 1764 */
1877static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args) 1765static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_setattrargs *args)
1878
1879{ 1766{
1880 struct xdr_stream xdr; 1767 struct xdr_stream xdr;
1881 struct compound_hdr hdr = { 1768 struct compound_hdr hdr = {
1882 .nops = 3, 1769 .nops = 0,
1883 }; 1770 };
1884 int status; 1771
1885 1772 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1886 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1773 encode_compound_hdr(&xdr, &hdr);
1887 encode_compound_hdr(&xdr, &hdr); 1774 encode_putfh(&xdr, args->fh, &hdr);
1888 status = encode_putfh(&xdr, args->fh); 1775 encode_setattr(&xdr, args, args->server, &hdr);
1889 if(status) 1776 encode_getfattr(&xdr, args->bitmask, &hdr);
1890 goto out; 1777 encode_nops(&hdr);
1891 status = encode_setattr(&xdr, args, args->server); 1778 return 0;
1892 if(status)
1893 goto out;
1894 status = encode_getfattr(&xdr, args->bitmask);
1895out:
1896 return status;
1897} 1779}
1898 1780
1899/* 1781/*
@@ -1906,22 +1788,21 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
1906 struct xdr_stream xdr; 1788 struct xdr_stream xdr;
1907 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 1789 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
1908 struct compound_hdr hdr = { 1790 struct compound_hdr hdr = {
1909 .nops = 2, 1791 .nops = 0,
1910 }; 1792 };
1911 int replen, status; 1793 int replen;
1912 1794
1913 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1795 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1914 encode_compound_hdr(&xdr, &hdr); 1796 encode_compound_hdr(&xdr, &hdr);
1915 status = encode_putfh(&xdr, args->fh); 1797 encode_putfh(&xdr, args->fh, &hdr);
1916 if (status) 1798 encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr);
1917 goto out; 1799
1918 status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
1919 /* set up reply buffer: */ 1800 /* set up reply buffer: */
1920 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; 1801 replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2;
1921 xdr_inline_pages(&req->rq_rcv_buf, replen, 1802 xdr_inline_pages(&req->rq_rcv_buf, replen,
1922 args->acl_pages, args->acl_pgbase, args->acl_len); 1803 args->acl_pages, args->acl_pgbase, args->acl_len);
1923out: 1804 encode_nops(&hdr);
1924 return status; 1805 return 0;
1925} 1806}
1926 1807
1927/* 1808/*
@@ -1931,22 +1812,17 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea
1931{ 1812{
1932 struct xdr_stream xdr; 1813 struct xdr_stream xdr;
1933 struct compound_hdr hdr = { 1814 struct compound_hdr hdr = {
1934 .nops = 3, 1815 .nops = 0,
1935 }; 1816 };
1936 int status;
1937 1817
1938 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1818 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1939 encode_compound_hdr(&xdr, &hdr); 1819 encode_compound_hdr(&xdr, &hdr);
1940 status = encode_putfh(&xdr, args->fh); 1820 encode_putfh(&xdr, args->fh, &hdr);
1941 if (status) 1821 encode_write(&xdr, args, &hdr);
1942 goto out;
1943 status = encode_write(&xdr, args);
1944 if (status)
1945 goto out;
1946 req->rq_snd_buf.flags |= XDRBUF_WRITE; 1822 req->rq_snd_buf.flags |= XDRBUF_WRITE;
1947 status = encode_getfattr(&xdr, args->bitmask); 1823 encode_getfattr(&xdr, args->bitmask, &hdr);
1948out: 1824 encode_nops(&hdr);
1949 return status; 1825 return 0;
1950} 1826}
1951 1827
1952/* 1828/*
@@ -1956,21 +1832,16 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write
1956{ 1832{
1957 struct xdr_stream xdr; 1833 struct xdr_stream xdr;
1958 struct compound_hdr hdr = { 1834 struct compound_hdr hdr = {
1959 .nops = 3, 1835 .nops = 0,
1960 }; 1836 };
1961 int status;
1962 1837
1963 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1838 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1964 encode_compound_hdr(&xdr, &hdr); 1839 encode_compound_hdr(&xdr, &hdr);
1965 status = encode_putfh(&xdr, args->fh); 1840 encode_putfh(&xdr, args->fh, &hdr);
1966 if (status) 1841 encode_commit(&xdr, args, &hdr);
1967 goto out; 1842 encode_getfattr(&xdr, args->bitmask, &hdr);
1968 status = encode_commit(&xdr, args); 1843 encode_nops(&hdr);
1969 if (status) 1844 return 0;
1970 goto out;
1971 status = encode_getfattr(&xdr, args->bitmask);
1972out:
1973 return status;
1974} 1845}
1975 1846
1976/* 1847/*
@@ -1980,16 +1851,15 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin
1980{ 1851{
1981 struct xdr_stream xdr; 1852 struct xdr_stream xdr;
1982 struct compound_hdr hdr = { 1853 struct compound_hdr hdr = {
1983 .nops = 2, 1854 .nops = 0,
1984 }; 1855 };
1985 int status;
1986 1856
1987 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1857 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
1988 encode_compound_hdr(&xdr, &hdr); 1858 encode_compound_hdr(&xdr, &hdr);
1989 status = encode_putfh(&xdr, args->fh); 1859 encode_putfh(&xdr, args->fh, &hdr);
1990 if (!status) 1860 encode_fsinfo(&xdr, args->bitmask, &hdr);
1991 status = encode_fsinfo(&xdr, args->bitmask); 1861 encode_nops(&hdr);
1992 return status; 1862 return 0;
1993} 1863}
1994 1864
1995/* 1865/*
@@ -1999,17 +1869,16 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n
1999{ 1869{
2000 struct xdr_stream xdr; 1870 struct xdr_stream xdr;
2001 struct compound_hdr hdr = { 1871 struct compound_hdr hdr = {
2002 .nops = 2, 1872 .nops = 0,
2003 }; 1873 };
2004 int status;
2005 1874
2006 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1875 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2007 encode_compound_hdr(&xdr, &hdr); 1876 encode_compound_hdr(&xdr, &hdr);
2008 status = encode_putfh(&xdr, args->fh); 1877 encode_putfh(&xdr, args->fh, &hdr);
2009 if (!status) 1878 encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
2010 status = encode_getattr_one(&xdr, 1879 &hdr);
2011 args->bitmask[0] & nfs4_pathconf_bitmap[0]); 1880 encode_nops(&hdr);
2012 return status; 1881 return 0;
2013} 1882}
2014 1883
2015/* 1884/*
@@ -2019,18 +1888,16 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs
2019{ 1888{
2020 struct xdr_stream xdr; 1889 struct xdr_stream xdr;
2021 struct compound_hdr hdr = { 1890 struct compound_hdr hdr = {
2022 .nops = 2, 1891 .nops = 0,
2023 }; 1892 };
2024 int status;
2025 1893
2026 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1894 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2027 encode_compound_hdr(&xdr, &hdr); 1895 encode_compound_hdr(&xdr, &hdr);
2028 status = encode_putfh(&xdr, args->fh); 1896 encode_putfh(&xdr, args->fh, &hdr);
2029 if (status == 0) 1897 encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
2030 status = encode_getattr_two(&xdr, 1898 args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
2031 args->bitmask[0] & nfs4_statfs_bitmap[0], 1899 encode_nops(&hdr);
2032 args->bitmask[1] & nfs4_statfs_bitmap[1]); 1900 return 0;
2033 return status;
2034} 1901}
2035 1902
2036/* 1903/*
@@ -2040,19 +1907,18 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struc
2040{ 1907{
2041 struct xdr_stream xdr; 1908 struct xdr_stream xdr;
2042 struct compound_hdr hdr = { 1909 struct compound_hdr hdr = {
2043 .nops = 2, 1910 .nops = 0,
2044 }; 1911 };
2045 int status;
2046 1912
2047 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1913 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2048 encode_compound_hdr(&xdr, &hdr); 1914 encode_compound_hdr(&xdr, &hdr);
2049 status = encode_putfh(&xdr, fhandle); 1915 encode_putfh(&xdr, fhandle, &hdr);
2050 if (status == 0) 1916 encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS|
2051 status = encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| 1917 FATTR4_WORD0_LINK_SUPPORT|
2052 FATTR4_WORD0_LINK_SUPPORT| 1918 FATTR4_WORD0_SYMLINK_SUPPORT|
2053 FATTR4_WORD0_SYMLINK_SUPPORT| 1919 FATTR4_WORD0_ACLSUPPORT, &hdr);
2054 FATTR4_WORD0_ACLSUPPORT); 1920 encode_nops(&hdr);
2055 return status; 1921 return 0;
2056} 1922}
2057 1923
2058/* 1924/*
@@ -2062,12 +1928,14 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client
2062{ 1928{
2063 struct xdr_stream xdr; 1929 struct xdr_stream xdr;
2064 struct compound_hdr hdr = { 1930 struct compound_hdr hdr = {
2065 .nops = 1, 1931 .nops = 0,
2066 }; 1932 };
2067 1933
2068 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1934 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2069 encode_compound_hdr(&xdr, &hdr); 1935 encode_compound_hdr(&xdr, &hdr);
2070 return encode_renew(&xdr, clp); 1936 encode_renew(&xdr, clp, &hdr);
1937 encode_nops(&hdr);
1938 return 0;
2071} 1939}
2072 1940
2073/* 1941/*
@@ -2077,12 +1945,14 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
2077{ 1945{
2078 struct xdr_stream xdr; 1946 struct xdr_stream xdr;
2079 struct compound_hdr hdr = { 1947 struct compound_hdr hdr = {
2080 .nops = 1, 1948 .nops = 0,
2081 }; 1949 };
2082 1950
2083 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1951 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2084 encode_compound_hdr(&xdr, &hdr); 1952 encode_compound_hdr(&xdr, &hdr);
2085 return encode_setclientid(&xdr, sc); 1953 encode_setclientid(&xdr, sc, &hdr);
1954 encode_nops(&hdr);
1955 return 0;
2086} 1956}
2087 1957
2088/* 1958/*
@@ -2092,19 +1962,17 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
2092{ 1962{
2093 struct xdr_stream xdr; 1963 struct xdr_stream xdr;
2094 struct compound_hdr hdr = { 1964 struct compound_hdr hdr = {
2095 .nops = 3, 1965 .nops = 0,
2096 }; 1966 };
2097 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; 1967 const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 };
2098 int status;
2099 1968
2100 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1969 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2101 encode_compound_hdr(&xdr, &hdr); 1970 encode_compound_hdr(&xdr, &hdr);
2102 status = encode_setclientid_confirm(&xdr, clp); 1971 encode_setclientid_confirm(&xdr, clp, &hdr);
2103 if (!status) 1972 encode_putrootfh(&xdr, &hdr);
2104 status = encode_putrootfh(&xdr); 1973 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2105 if (!status) 1974 encode_nops(&hdr);
2106 status = encode_fsinfo(&xdr, lease_bitmap); 1975 return 0;
2107 return status;
2108} 1976}
2109 1977
2110/* 1978/*
@@ -2114,21 +1982,16 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc
2114{ 1982{
2115 struct xdr_stream xdr; 1983 struct xdr_stream xdr;
2116 struct compound_hdr hdr = { 1984 struct compound_hdr hdr = {
2117 .nops = 3, 1985 .nops = 0,
2118 }; 1986 };
2119 int status;
2120 1987
2121 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 1988 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2122 encode_compound_hdr(&xdr, &hdr); 1989 encode_compound_hdr(&xdr, &hdr);
2123 status = encode_putfh(&xdr, args->fhandle); 1990 encode_putfh(&xdr, args->fhandle, &hdr);
2124 if (status != 0) 1991 encode_delegreturn(&xdr, args->stateid, &hdr);
2125 goto out; 1992 encode_getfattr(&xdr, args->bitmask, &hdr);
2126 status = encode_delegreturn(&xdr, args->stateid); 1993 encode_nops(&hdr);
2127 if (status != 0) 1994 return 0;
2128 goto out;
2129 status = encode_getfattr(&xdr, args->bitmask);
2130out:
2131 return status;
2132} 1995}
2133 1996
2134/* 1997/*
@@ -2138,20 +2001,17 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2138{ 2001{
2139 struct xdr_stream xdr; 2002 struct xdr_stream xdr;
2140 struct compound_hdr hdr = { 2003 struct compound_hdr hdr = {
2141 .nops = 3, 2004 .nops = 0,
2142 }; 2005 };
2143 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; 2006 struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
2144 int replen; 2007 int replen;
2145 int status;
2146 2008
2147 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2009 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2148 encode_compound_hdr(&xdr, &hdr); 2010 encode_compound_hdr(&xdr, &hdr);
2149 if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) 2011 encode_putfh(&xdr, args->dir_fh, &hdr);
2150 goto out; 2012 encode_lookup(&xdr, args->name, &hdr);
2151 if ((status = encode_lookup(&xdr, args->name)) != 0) 2013 encode_fs_locations(&xdr, args->bitmask, &hdr);
2152 goto out; 2014
2153 if ((status = encode_fs_locations(&xdr, args->bitmask)) != 0)
2154 goto out;
2155 /* set up reply 2015 /* set up reply
2156 * toplevel_status + OP_PUTFH + status 2016 * toplevel_status + OP_PUTFH + status
2157 * + OP_LOOKUP + status + OP_GETATTR + status = 7 2017 * + OP_LOOKUP + status + OP_GETATTR + status = 7
@@ -2159,8 +2019,8 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
2159 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; 2019 replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
2160 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, 2020 xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page,
2161 0, PAGE_SIZE); 2021 0, PAGE_SIZE);
2162out: 2022 encode_nops(&hdr);
2163 return status; 2023 return 0;
2164} 2024}
2165 2025
2166/* 2026/*
@@ -2217,11 +2077,13 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2217 READ_BUF(8); 2077 READ_BUF(8);
2218 READ32(hdr->status); 2078 READ32(hdr->status);
2219 READ32(hdr->taglen); 2079 READ32(hdr->taglen);
2220 2080
2221 READ_BUF(hdr->taglen + 4); 2081 READ_BUF(hdr->taglen + 4);
2222 hdr->tag = (char *)p; 2082 hdr->tag = (char *)p;
2223 p += XDR_QUADLEN(hdr->taglen); 2083 p += XDR_QUADLEN(hdr->taglen);
2224 READ32(hdr->nops); 2084 READ32(hdr->nops);
2085 if (unlikely(hdr->nops < 1))
2086 return nfs4_stat_to_errno(hdr->status);
2225 return 0; 2087 return 0;
2226} 2088}
2227 2089
@@ -3047,8 +2909,7 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3047static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 2909static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
3048{ 2910{
3049 __be32 *savep; 2911 __be32 *savep;
3050 uint32_t attrlen, 2912 uint32_t attrlen, bitmap[2] = {0};
3051 bitmap[2] = {0};
3052 int status; 2913 int status;
3053 2914
3054 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2915 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
@@ -3070,14 +2931,13 @@ xdr_error:
3070 dprintk("%s: xdr returned %d!\n", __func__, -status); 2931 dprintk("%s: xdr returned %d!\n", __func__, -status);
3071 return status; 2932 return status;
3072} 2933}
3073 2934
3074static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) 2935static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
3075{ 2936{
3076 __be32 *savep; 2937 __be32 *savep;
3077 uint32_t attrlen, 2938 uint32_t attrlen, bitmap[2] = {0};
3078 bitmap[2] = {0};
3079 int status; 2939 int status;
3080 2940
3081 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2941 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
3082 goto xdr_error; 2942 goto xdr_error;
3083 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 2943 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
@@ -3107,10 +2967,9 @@ xdr_error:
3107static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) 2967static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
3108{ 2968{
3109 __be32 *savep; 2969 __be32 *savep;
3110 uint32_t attrlen, 2970 uint32_t attrlen, bitmap[2] = {0};
3111 bitmap[2] = {0};
3112 int status; 2971 int status;
3113 2972
3114 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 2973 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
3115 goto xdr_error; 2974 goto xdr_error;
3116 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 2975 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
@@ -3256,7 +3115,7 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3256static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3115static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3257{ 3116{
3258 int status; 3117 int status;
3259 3118
3260 status = decode_op_hdr(xdr, OP_LINK); 3119 status = decode_op_hdr(xdr, OP_LINK);
3261 if (status) 3120 if (status)
3262 return status; 3121 return status;
@@ -3344,27 +3203,27 @@ static int decode_lookup(struct xdr_stream *xdr)
3344/* This is too sick! */ 3203/* This is too sick! */
3345static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) 3204static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3346{ 3205{
3347 __be32 *p; 3206 __be32 *p;
3348 uint32_t limit_type, nblocks, blocksize; 3207 uint32_t limit_type, nblocks, blocksize;
3349 3208
3350 READ_BUF(12); 3209 READ_BUF(12);
3351 READ32(limit_type); 3210 READ32(limit_type);
3352 switch (limit_type) { 3211 switch (limit_type) {
3353 case 1: 3212 case 1:
3354 READ64(*maxsize); 3213 READ64(*maxsize);
3355 break; 3214 break;
3356 case 2: 3215 case 2:
3357 READ32(nblocks); 3216 READ32(nblocks);
3358 READ32(blocksize); 3217 READ32(blocksize);
3359 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3218 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3360 } 3219 }
3361 return 0; 3220 return 0;
3362} 3221}
3363 3222
3364static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3223static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3365{ 3224{
3366 __be32 *p; 3225 __be32 *p;
3367 uint32_t delegation_type; 3226 uint32_t delegation_type;
3368 3227
3369 READ_BUF(4); 3228 READ_BUF(4);
3370 READ32(delegation_type); 3229 READ32(delegation_type);
@@ -3375,13 +3234,14 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3375 READ_BUF(NFS4_STATEID_SIZE+4); 3234 READ_BUF(NFS4_STATEID_SIZE+4);
3376 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3235 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE);
3377 READ32(res->do_recall); 3236 READ32(res->do_recall);
3237
3378 switch (delegation_type) { 3238 switch (delegation_type) {
3379 case NFS4_OPEN_DELEGATE_READ: 3239 case NFS4_OPEN_DELEGATE_READ:
3380 res->delegation_type = FMODE_READ; 3240 res->delegation_type = FMODE_READ;
3381 break; 3241 break;
3382 case NFS4_OPEN_DELEGATE_WRITE: 3242 case NFS4_OPEN_DELEGATE_WRITE:
3383 res->delegation_type = FMODE_WRITE|FMODE_READ; 3243 res->delegation_type = FMODE_WRITE|FMODE_READ;
3384 if (decode_space_limit(xdr, &res->maxsize) < 0) 3244 if (decode_space_limit(xdr, &res->maxsize) < 0)
3385 return -EIO; 3245 return -EIO;
3386 } 3246 }
3387 return decode_ace(xdr, NULL, res->server->nfs_client); 3247 return decode_ace(xdr, NULL, res->server->nfs_client);
@@ -3389,27 +3249,27 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3389 3249
3390static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3250static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3391{ 3251{
3392 __be32 *p; 3252 __be32 *p;
3393 uint32_t savewords, bmlen, i; 3253 uint32_t savewords, bmlen, i;
3394 int status; 3254 int status;
3395 3255
3396 status = decode_op_hdr(xdr, OP_OPEN); 3256 status = decode_op_hdr(xdr, OP_OPEN);
3397 if (status != -EIO) 3257 if (status != -EIO)
3398 nfs_increment_open_seqid(status, res->seqid); 3258 nfs_increment_open_seqid(status, res->seqid);
3399 if (status) 3259 if (status)
3400 return status; 3260 return status;
3401 READ_BUF(NFS4_STATEID_SIZE); 3261 READ_BUF(NFS4_STATEID_SIZE);
3402 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3262 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3403 3263
3404 decode_change_info(xdr, &res->cinfo); 3264 decode_change_info(xdr, &res->cinfo);
3405 3265
3406 READ_BUF(8); 3266 READ_BUF(8);
3407 READ32(res->rflags); 3267 READ32(res->rflags);
3408 READ32(bmlen); 3268 READ32(bmlen);
3409 if (bmlen > 10) 3269 if (bmlen > 10)
3410 goto xdr_error; 3270 goto xdr_error;
3411 3271
3412 READ_BUF(bmlen << 2); 3272 READ_BUF(bmlen << 2);
3413 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3273 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3414 for (i = 0; i < savewords; ++i) 3274 for (i = 0; i < savewords; ++i)
3415 READ32(res->attrset[i]); 3275 READ32(res->attrset[i]);
@@ -3424,17 +3284,17 @@ xdr_error:
3424 3284
3425static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 3285static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3426{ 3286{
3427 __be32 *p; 3287 __be32 *p;
3428 int status; 3288 int status;
3429 3289
3430 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 3290 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3431 if (status != -EIO) 3291 if (status != -EIO)
3432 nfs_increment_open_seqid(status, res->seqid); 3292 nfs_increment_open_seqid(status, res->seqid);
3433 if (status) 3293 if (status)
3434 return status; 3294 return status;
3435 READ_BUF(NFS4_STATEID_SIZE); 3295 READ_BUF(NFS4_STATEID_SIZE);
3436 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3296 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3437 return 0; 3297 return 0;
3438} 3298}
3439 3299
3440static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 3300static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -3562,7 +3422,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3562 dprintk("NFS: readdir reply truncated!\n"); 3422 dprintk("NFS: readdir reply truncated!\n");
3563 entry[1] = 1; 3423 entry[1] = 1;
3564 } 3424 }
3565out: 3425out:
3566 kunmap_atomic(kaddr, KM_USER0); 3426 kunmap_atomic(kaddr, KM_USER0);
3567 return 0; 3427 return 0;
3568short_pkt: 3428short_pkt:
@@ -3718,7 +3578,6 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
3718 uint32_t bmlen; 3578 uint32_t bmlen;
3719 int status; 3579 int status;
3720 3580
3721
3722 status = decode_op_hdr(xdr, OP_SETATTR); 3581 status = decode_op_hdr(xdr, OP_SETATTR);
3723 if (status) 3582 if (status)
3724 return status; 3583 return status;
@@ -3738,7 +3597,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3738 READ32(opnum); 3597 READ32(opnum);
3739 if (opnum != OP_SETCLIENTID) { 3598 if (opnum != OP_SETCLIENTID) {
3740 dprintk("nfs: decode_setclientid: Server returned operation" 3599 dprintk("nfs: decode_setclientid: Server returned operation"
3741 " %d\n", opnum); 3600 " %d\n", opnum);
3742 return -EIO; 3601 return -EIO;
3743 } 3602 }
3744 READ32(nfserr); 3603 READ32(nfserr);
@@ -3792,34 +3651,34 @@ static int decode_delegreturn(struct xdr_stream *xdr)
3792} 3651}
3793 3652
3794/* 3653/*
3654 * END OF "GENERIC" DECODE ROUTINES.
3655 */
3656
3657/*
3795 * Decode OPEN_DOWNGRADE response 3658 * Decode OPEN_DOWNGRADE response
3796 */ 3659 */
3797static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 3660static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
3798{ 3661{
3799 struct xdr_stream xdr; 3662 struct xdr_stream xdr;
3800 struct compound_hdr hdr; 3663 struct compound_hdr hdr;
3801 int status; 3664 int status;
3802 3665
3803 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3666 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3804 status = decode_compound_hdr(&xdr, &hdr); 3667 status = decode_compound_hdr(&xdr, &hdr);
3805 if (status) 3668 if (status)
3806 goto out; 3669 goto out;
3807 status = decode_putfh(&xdr); 3670 status = decode_putfh(&xdr);
3808 if (status) 3671 if (status)
3809 goto out; 3672 goto out;
3810 status = decode_open_downgrade(&xdr, res); 3673 status = decode_open_downgrade(&xdr, res);
3811 if (status != 0) 3674 if (status != 0)
3812 goto out; 3675 goto out;
3813 decode_getfattr(&xdr, res->fattr, res->server); 3676 decode_getfattr(&xdr, res->fattr, res->server);
3814out: 3677out:
3815 return status; 3678 return status;
3816} 3679}
3817 3680
3818/* 3681/*
3819 * END OF "GENERIC" DECODE ROUTINES.
3820 */
3821
3822/*
3823 * Decode ACCESS response 3682 * Decode ACCESS response
3824 */ 3683 */
3825static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res) 3684static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_accessres *res)
@@ -3827,7 +3686,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
3827 struct xdr_stream xdr; 3686 struct xdr_stream xdr;
3828 struct compound_hdr hdr; 3687 struct compound_hdr hdr;
3829 int status; 3688 int status;
3830 3689
3831 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3690 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3832 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3691 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3833 goto out; 3692 goto out;
@@ -3850,7 +3709,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
3850 struct xdr_stream xdr; 3709 struct xdr_stream xdr;
3851 struct compound_hdr hdr; 3710 struct compound_hdr hdr;
3852 int status; 3711 int status;
3853 3712
3854 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3713 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3855 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3714 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3856 goto out; 3715 goto out;
@@ -3873,7 +3732,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
3873 struct xdr_stream xdr; 3732 struct xdr_stream xdr;
3874 struct compound_hdr hdr; 3733 struct compound_hdr hdr;
3875 int status; 3734 int status;
3876 3735
3877 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3736 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3878 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3737 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3879 goto out; 3738 goto out;
@@ -3893,7 +3752,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
3893 struct xdr_stream xdr; 3752 struct xdr_stream xdr;
3894 struct compound_hdr hdr; 3753 struct compound_hdr hdr;
3895 int status; 3754 int status;
3896 3755
3897 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3756 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3898 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3757 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3899 goto out; 3758 goto out;
@@ -3914,7 +3773,7 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
3914 struct xdr_stream xdr; 3773 struct xdr_stream xdr;
3915 struct compound_hdr hdr; 3774 struct compound_hdr hdr;
3916 int status; 3775 int status;
3917 3776
3918 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3777 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3919 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3778 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3920 goto out; 3779 goto out;
@@ -3944,7 +3803,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
3944 struct xdr_stream xdr; 3803 struct xdr_stream xdr;
3945 struct compound_hdr hdr; 3804 struct compound_hdr hdr;
3946 int status; 3805 int status;
3947 3806
3948 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3807 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3949 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3808 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3950 goto out; 3809 goto out;
@@ -3977,7 +3836,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
3977 struct xdr_stream xdr; 3836 struct xdr_stream xdr;
3978 struct compound_hdr hdr; 3837 struct compound_hdr hdr;
3979 int status; 3838 int status;
3980 3839
3981 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3840 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
3982 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) 3841 if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
3983 goto out; 3842 goto out;
@@ -4014,7 +3873,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4014 struct xdr_stream xdr; 3873 struct xdr_stream xdr;
4015 struct compound_hdr hdr; 3874 struct compound_hdr hdr;
4016 int status; 3875 int status;
4017 3876
4018 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3877 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4019 status = decode_compound_hdr(&xdr, &hdr); 3878 status = decode_compound_hdr(&xdr, &hdr);
4020 if (status) 3879 if (status)
@@ -4025,7 +3884,6 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4025 status = decode_getfattr(&xdr, res->fattr, res->server); 3884 status = decode_getfattr(&xdr, res->fattr, res->server);
4026out: 3885out:
4027 return status; 3886 return status;
4028
4029} 3887}
4030 3888
4031/* 3889/*
@@ -4034,21 +3892,20 @@ out:
4034static int 3892static int
4035nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args) 3893nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args)
4036{ 3894{
4037 struct xdr_stream xdr; 3895 struct xdr_stream xdr;
4038 struct compound_hdr hdr = { 3896 struct compound_hdr hdr = {
4039 .nops = 2, 3897 .nops = 0,
4040 }; 3898 };
4041 int status; 3899 int status;
4042 3900
4043 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 3901 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
4044 encode_compound_hdr(&xdr, &hdr); 3902 encode_compound_hdr(&xdr, &hdr);
4045 status = encode_putfh(&xdr, args->fh); 3903 encode_putfh(&xdr, args->fh, &hdr);
4046 if (status) 3904 status = encode_setacl(&xdr, args, &hdr);
4047 goto out; 3905 encode_nops(&hdr);
4048 status = encode_setacl(&xdr, args); 3906 return status;
4049out:
4050 return status;
4051} 3907}
3908
4052/* 3909/*
4053 * Decode SETACL response 3910 * Decode SETACL response
4054 */ 3911 */
@@ -4099,18 +3956,18 @@ out:
4099 */ 3956 */
4100static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res) 3957static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_closeres *res)
4101{ 3958{
4102 struct xdr_stream xdr; 3959 struct xdr_stream xdr;
4103 struct compound_hdr hdr; 3960 struct compound_hdr hdr;
4104 int status; 3961 int status;
4105 3962
4106 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3963 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4107 status = decode_compound_hdr(&xdr, &hdr); 3964 status = decode_compound_hdr(&xdr, &hdr);
4108 if (status) 3965 if (status)
4109 goto out; 3966 goto out;
4110 status = decode_putfh(&xdr); 3967 status = decode_putfh(&xdr);
4111 if (status) 3968 if (status)
4112 goto out; 3969 goto out;
4113 status = decode_close(&xdr, res); 3970 status = decode_close(&xdr, res);
4114 if (status != 0) 3971 if (status != 0)
4115 goto out; 3972 goto out;
4116 /* 3973 /*
@@ -4121,7 +3978,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4121 */ 3978 */
4122 decode_getfattr(&xdr, res->fattr, res->server); 3979 decode_getfattr(&xdr, res->fattr, res->server);
4123out: 3980out:
4124 return status; 3981 return status;
4125} 3982}
4126 3983
4127/* 3984/*
@@ -4129,23 +3986,23 @@ out:
4129 */ 3986 */
4130static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 3987static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
4131{ 3988{
4132 struct xdr_stream xdr; 3989 struct xdr_stream xdr;
4133 struct compound_hdr hdr; 3990 struct compound_hdr hdr;
4134 int status; 3991 int status;
4135 3992
4136 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 3993 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4137 status = decode_compound_hdr(&xdr, &hdr); 3994 status = decode_compound_hdr(&xdr, &hdr);
4138 if (status) 3995 if (status)
4139 goto out; 3996 goto out;
4140 status = decode_putfh(&xdr); 3997 status = decode_putfh(&xdr);
4141 if (status) 3998 if (status)
4142 goto out; 3999 goto out;
4143 status = decode_savefh(&xdr); 4000 status = decode_savefh(&xdr);
4001 if (status)
4002 goto out;
4003 status = decode_open(&xdr, res);
4144 if (status) 4004 if (status)
4145 goto out; 4005 goto out;
4146 status = decode_open(&xdr, res);
4147 if (status)
4148 goto out;
4149 if (decode_getfh(&xdr, &res->fh) != 0) 4006 if (decode_getfh(&xdr, &res->fh) != 0)
4150 goto out; 4007 goto out;
4151 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) 4008 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
@@ -4154,7 +4011,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4154 goto out; 4011 goto out;
4155 decode_getfattr(&xdr, res->dir_attr, res->server); 4012 decode_getfattr(&xdr, res->dir_attr, res->server);
4156out: 4013out:
4157 return status; 4014 return status;
4158} 4015}
4159 4016
4160/* 4017/*
@@ -4162,20 +4019,20 @@ out:
4162 */ 4019 */
4163static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res) 4020static int nfs4_xdr_dec_open_confirm(struct rpc_rqst *rqstp, __be32 *p, struct nfs_open_confirmres *res)
4164{ 4021{
4165 struct xdr_stream xdr; 4022 struct xdr_stream xdr;
4166 struct compound_hdr hdr; 4023 struct compound_hdr hdr;
4167 int status; 4024 int status;
4168 4025
4169 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4026 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4170 status = decode_compound_hdr(&xdr, &hdr); 4027 status = decode_compound_hdr(&xdr, &hdr);
4171 if (status) 4028 if (status)
4172 goto out; 4029 goto out;
4173 status = decode_putfh(&xdr); 4030 status = decode_putfh(&xdr);
4174 if (status) 4031 if (status)
4175 goto out; 4032 goto out;
4176 status = decode_open_confirm(&xdr, res); 4033 status = decode_open_confirm(&xdr, res);
4177out: 4034out:
4178 return status; 4035 return status;
4179} 4036}
4180 4037
4181/* 4038/*
@@ -4183,23 +4040,23 @@ out:
4183 */ 4040 */
4184static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res) 4041static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openres *res)
4185{ 4042{
4186 struct xdr_stream xdr; 4043 struct xdr_stream xdr;
4187 struct compound_hdr hdr; 4044 struct compound_hdr hdr;
4188 int status; 4045 int status;
4189 4046
4190 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4047 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4191 status = decode_compound_hdr(&xdr, &hdr); 4048 status = decode_compound_hdr(&xdr, &hdr);
4192 if (status) 4049 if (status)
4193 goto out; 4050 goto out;
4194 status = decode_putfh(&xdr); 4051 status = decode_putfh(&xdr);
4195 if (status) 4052 if (status)
4196 goto out; 4053 goto out;
4197 status = decode_open(&xdr, res); 4054 status = decode_open(&xdr, res);
4198 if (status) 4055 if (status)
4199 goto out; 4056 goto out;
4200 decode_getfattr(&xdr, res->f_attr, res->server); 4057 decode_getfattr(&xdr, res->f_attr, res->server);
4201out: 4058out:
4202 return status; 4059 return status;
4203} 4060}
4204 4061
4205/* 4062/*
@@ -4207,25 +4064,25 @@ out:
4207 */ 4064 */
4208static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res) 4065static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_setattrres *res)
4209{ 4066{
4210 struct xdr_stream xdr; 4067 struct xdr_stream xdr;
4211 struct compound_hdr hdr; 4068 struct compound_hdr hdr;
4212 int status; 4069 int status;
4213 4070
4214 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); 4071 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
4215 status = decode_compound_hdr(&xdr, &hdr); 4072 status = decode_compound_hdr(&xdr, &hdr);
4216 if (status) 4073 if (status)
4217 goto out; 4074 goto out;
4218 status = decode_putfh(&xdr); 4075 status = decode_putfh(&xdr);
4219 if (status) 4076 if (status)
4220 goto out; 4077 goto out;
4221 status = decode_setattr(&xdr, res); 4078 status = decode_setattr(&xdr, res);
4222 if (status) 4079 if (status)
4223 goto out; 4080 goto out;
4224 status = decode_getfattr(&xdr, res->fattr, res->server); 4081 status = decode_getfattr(&xdr, res->fattr, res->server);
4225 if (status == NFS4ERR_DELAY) 4082 if (status == NFS4ERR_DELAY)
4226 status = 0; 4083 status = 0;
4227out: 4084out:
4228 return status; 4085 return status;
4229} 4086}
4230 4087
4231/* 4088/*
@@ -4421,8 +4278,6 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
4421 status = decode_putfh(&xdr); 4278 status = decode_putfh(&xdr);
4422 if (!status) 4279 if (!status)
4423 status = decode_fsinfo(&xdr, fsinfo); 4280 status = decode_fsinfo(&xdr, fsinfo);
4424 if (!status)
4425 status = nfs4_stat_to_errno(hdr.status);
4426 return status; 4281 return status;
4427} 4282}
4428 4283
@@ -4511,8 +4366,6 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
4511 status = decode_compound_hdr(&xdr, &hdr); 4366 status = decode_compound_hdr(&xdr, &hdr);
4512 if (!status) 4367 if (!status)
4513 status = decode_setclientid(&xdr, clp); 4368 status = decode_setclientid(&xdr, clp);
4514 if (!status)
4515 status = nfs4_stat_to_errno(hdr.status);
4516 return status; 4369 return status;
4517} 4370}
4518 4371
@@ -4533,8 +4386,6 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
4533 status = decode_putrootfh(&xdr); 4386 status = decode_putrootfh(&xdr);
4534 if (!status) 4387 if (!status)
4535 status = decode_fsinfo(&xdr, fsinfo); 4388 status = decode_fsinfo(&xdr, fsinfo);
4536 if (!status)
4537 status = nfs4_stat_to_errno(hdr.status);
4538 return status; 4389 return status;
4539} 4390}
4540 4391
@@ -4715,7 +4566,7 @@ nfs4_stat_to_errno(int stat)
4715 .p_replen = NFS4_##restype##_sz, \ 4566 .p_replen = NFS4_##restype##_sz, \
4716 .p_statidx = NFSPROC4_CLNT_##proc, \ 4567 .p_statidx = NFSPROC4_CLNT_##proc, \
4717 .p_name = #proc, \ 4568 .p_name = #proc, \
4718 } 4569}
4719 4570
4720struct rpc_procinfo nfs4_procedures[] = { 4571struct rpc_procinfo nfs4_procedures[] = {
4721 PROC(READ, enc_read, dec_read), 4572 PROC(READ, enc_read, dec_read),
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index d74d16ce0d49..d9ef602fbc5a 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,6 +86,8 @@
86#include <net/ipconfig.h> 86#include <net/ipconfig.h>
87#include <linux/parser.h> 87#include <linux/parser.h>
88 88
89#include "internal.h"
90
89/* Define this to allow debugging output */ 91/* Define this to allow debugging output */
90#undef NFSROOT_DEBUG 92#undef NFSROOT_DEBUG
91#define NFSDBG_FACILITY NFSDBG_ROOT 93#define NFSDBG_FACILITY NFSDBG_ROOT
@@ -100,7 +102,7 @@ static char nfs_root_name[256] __initdata = "";
100static __be32 servaddr __initdata = 0; 102static __be32 servaddr __initdata = 0;
101 103
102/* Name of directory to mount */ 104/* Name of directory to mount */
103static char nfs_path[NFS_MAXPATHLEN] __initdata = { 0, }; 105static char nfs_export_path[NFS_MAXPATHLEN] __initdata = { 0, };
104 106
105/* NFS-related data */ 107/* NFS-related data */
106static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ 108static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
@@ -312,7 +314,7 @@ static int __init root_nfs_name(char *name)
312 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); 314 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
313 return -1; 315 return -1;
314 } 316 }
315 sprintf(nfs_path, buf, cp); 317 sprintf(nfs_export_path, buf, cp);
316 318
317 return 1; 319 return 1;
318} 320}
@@ -340,7 +342,7 @@ static int __init root_nfs_addr(void)
340static void __init root_nfs_print(void) 342static void __init root_nfs_print(void)
341{ 343{
342 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 344 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
343 nfs_path, nfs_data.hostname); 345 nfs_export_path, nfs_data.hostname);
344 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", 346 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
345 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); 347 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
346 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", 348 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
@@ -485,18 +487,23 @@ static int __init root_nfs_get_handle(void)
485{ 487{
486 struct nfs_fh fh; 488 struct nfs_fh fh;
487 struct sockaddr_in sin; 489 struct sockaddr_in sin;
490 struct nfs_mount_request request = {
491 .sap = (struct sockaddr *)&sin,
492 .salen = sizeof(sin),
493 .dirpath = nfs_export_path,
494 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
495 NFS_MNT3_VERSION : NFS_MNT_VERSION,
496 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
497 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
498 .fh = &fh,
499 };
488 int status; 500 int status;
489 int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
490 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP;
491 int version = (nfs_data.flags & NFS_MOUNT_VER3) ?
492 NFS_MNT3_VERSION : NFS_MNT_VERSION;
493 501
494 set_sockaddr(&sin, servaddr, htons(mount_port)); 502 set_sockaddr(&sin, servaddr, htons(mount_port));
495 status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, 503 status = nfs_mount(&request);
496 nfs_path, version, protocol, &fh);
497 if (status < 0) 504 if (status < 0)
498 printk(KERN_ERR "Root-NFS: Server returned error %d " 505 printk(KERN_ERR "Root-NFS: Server returned error %d "
499 "while mounting %s\n", status, nfs_path); 506 "while mounting %s\n", status, nfs_export_path);
500 else { 507 else {
501 nfs_data.root.size = fh.size; 508 nfs_data.root.size = fh.size;
502 memcpy(nfs_data.root.data, fh.data, fh.size); 509 memcpy(nfs_data.root.data, fh.data, fh.size);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 40d17987d0e8..f856004bb7fa 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -533,12 +533,6 @@ readpage_async_filler(void *data, struct page *page)
533 unsigned int len; 533 unsigned int len;
534 int error; 534 int error;
535 535
536 error = nfs_wb_page(inode, page);
537 if (error)
538 goto out_unlock;
539 if (PageUptodate(page))
540 goto out_unlock;
541
542 len = nfs_page_length(page); 536 len = nfs_page_length(page);
543 if (len == 0) 537 if (len == 0)
544 return nfs_return_empty_page(page); 538 return nfs_return_empty_page(page);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index bb0313ac9e1f..d6686f4786dc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -75,6 +75,7 @@ enum {
75 Opt_acl, Opt_noacl, 75 Opt_acl, Opt_noacl,
76 Opt_rdirplus, Opt_nordirplus, 76 Opt_rdirplus, Opt_nordirplus,
77 Opt_sharecache, Opt_nosharecache, 77 Opt_sharecache, Opt_nosharecache,
78 Opt_resvport, Opt_noresvport,
78 79
79 /* Mount options that take integer arguments */ 80 /* Mount options that take integer arguments */
80 Opt_port, 81 Opt_port,
@@ -129,6 +130,8 @@ static const match_table_t nfs_mount_option_tokens = {
129 { Opt_nordirplus, "nordirplus" }, 130 { Opt_nordirplus, "nordirplus" },
130 { Opt_sharecache, "sharecache" }, 131 { Opt_sharecache, "sharecache" },
131 { Opt_nosharecache, "nosharecache" }, 132 { Opt_nosharecache, "nosharecache" },
133 { Opt_resvport, "resvport" },
134 { Opt_noresvport, "noresvport" },
132 135
133 { Opt_port, "port=%u" }, 136 { Opt_port, "port=%u" },
134 { Opt_rsize, "rsize=%u" }, 137 { Opt_rsize, "rsize=%u" },
@@ -512,7 +515,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
512 { NFS_MOUNT_NONLM, ",nolock", "" }, 515 { NFS_MOUNT_NONLM, ",nolock", "" },
513 { NFS_MOUNT_NOACL, ",noacl", "" }, 516 { NFS_MOUNT_NOACL, ",noacl", "" },
514 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, 517 { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
515 { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, 518 { NFS_MOUNT_UNSHARED, ",nosharecache", "" },
519 { NFS_MOUNT_NORESVPORT, ",noresvport", "" },
516 { 0, NULL, NULL } 520 { 0, NULL, NULL }
517 }; 521 };
518 const struct proc_nfs_info *nfs_infop; 522 const struct proc_nfs_info *nfs_infop;
@@ -1033,6 +1037,12 @@ static int nfs_parse_mount_options(char *raw,
1033 case Opt_nosharecache: 1037 case Opt_nosharecache:
1034 mnt->flags |= NFS_MOUNT_UNSHARED; 1038 mnt->flags |= NFS_MOUNT_UNSHARED;
1035 break; 1039 break;
1040 case Opt_resvport:
1041 mnt->flags &= ~NFS_MOUNT_NORESVPORT;
1042 break;
1043 case Opt_noresvport:
1044 mnt->flags |= NFS_MOUNT_NORESVPORT;
1045 break;
1036 1046
1037 /* 1047 /*
1038 * options that take numeric values 1048 * options that take numeric values
@@ -1327,8 +1337,14 @@ out_security_failure:
1327static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1337static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1328 struct nfs_fh *root_fh) 1338 struct nfs_fh *root_fh)
1329{ 1339{
1330 struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address; 1340 struct nfs_mount_request request = {
1331 char *hostname; 1341 .sap = (struct sockaddr *)
1342 &args->mount_server.address,
1343 .dirpath = args->nfs_server.export_path,
1344 .protocol = args->mount_server.protocol,
1345 .fh = root_fh,
1346 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1347 };
1332 int status; 1348 int status;
1333 1349
1334 if (args->mount_server.version == 0) { 1350 if (args->mount_server.version == 0) {
@@ -1337,42 +1353,38 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1337 else 1353 else
1338 args->mount_server.version = NFS_MNT_VERSION; 1354 args->mount_server.version = NFS_MNT_VERSION;
1339 } 1355 }
1356 request.version = args->mount_server.version;
1340 1357
1341 if (args->mount_server.hostname) 1358 if (args->mount_server.hostname)
1342 hostname = args->mount_server.hostname; 1359 request.hostname = args->mount_server.hostname;
1343 else 1360 else
1344 hostname = args->nfs_server.hostname; 1361 request.hostname = args->nfs_server.hostname;
1345 1362
1346 /* 1363 /*
1347 * Construct the mount server's address. 1364 * Construct the mount server's address.
1348 */ 1365 */
1349 if (args->mount_server.address.ss_family == AF_UNSPEC) { 1366 if (args->mount_server.address.ss_family == AF_UNSPEC) {
1350 memcpy(sap, &args->nfs_server.address, 1367 memcpy(request.sap, &args->nfs_server.address,
1351 args->nfs_server.addrlen); 1368 args->nfs_server.addrlen);
1352 args->mount_server.addrlen = args->nfs_server.addrlen; 1369 args->mount_server.addrlen = args->nfs_server.addrlen;
1353 } 1370 }
1371 request.salen = args->mount_server.addrlen;
1354 1372
1355 /* 1373 /*
1356 * autobind will be used if mount_server.port == 0 1374 * autobind will be used if mount_server.port == 0
1357 */ 1375 */
1358 nfs_set_port(sap, args->mount_server.port); 1376 nfs_set_port(request.sap, args->mount_server.port);
1359 1377
1360 /* 1378 /*
1361 * Now ask the mount server to map our export path 1379 * Now ask the mount server to map our export path
1362 * to a file handle. 1380 * to a file handle.
1363 */ 1381 */
1364 status = nfs_mount(sap, 1382 status = nfs_mount(&request);
1365 args->mount_server.addrlen,
1366 hostname,
1367 args->nfs_server.export_path,
1368 args->mount_server.version,
1369 args->mount_server.protocol,
1370 root_fh);
1371 if (status == 0) 1383 if (status == 0)
1372 return 0; 1384 return 0;
1373 1385
1374 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", 1386 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
1375 hostname, status); 1387 request.hostname, status);
1376 return status; 1388 return status;
1377} 1389}
1378 1390
@@ -2419,7 +2431,7 @@ static void nfs4_kill_super(struct super_block *sb)
2419{ 2431{
2420 struct nfs_server *server = NFS_SB(sb); 2432 struct nfs_server *server = NFS_SB(sb);
2421 2433
2422 nfs_return_all_delegations(sb); 2434 nfs_super_return_all_delegations(sb);
2423 kill_anon_super(sb); 2435 kill_anon_super(sb);
2424 2436
2425 nfs4_renewd_prepare_shutdown(server); 2437 nfs4_renewd_prepare_shutdown(server);
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index c11f5375d7c1..04133aacb1e5 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -29,8 +29,8 @@
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31 31
32EXPORT_SYMBOL(nfsacl_encode); 32EXPORT_SYMBOL_GPL(nfsacl_encode);
33EXPORT_SYMBOL(nfsacl_decode); 33EXPORT_SYMBOL_GPL(nfsacl_decode);
34 34
35struct nfsacl_encode_desc { 35struct nfsacl_encode_desc {
36 struct xdr_array2_desc desc; 36 struct xdr_array2_desc desc;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 094747a1227c..6d7d8c02c197 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -358,6 +358,7 @@ static struct rpc_program cb_program = {
358 .nrvers = ARRAY_SIZE(nfs_cb_version), 358 .nrvers = ARRAY_SIZE(nfs_cb_version),
359 .version = nfs_cb_version, 359 .version = nfs_cb_version,
360 .stats = &cb_stats, 360 .stats = &cb_stats,
361 .pipe_dir_name = "/nfsd4_cb",
361}; 362};
362 363
363/* Reference counting, callback cleanup, etc., all look racy as heck. 364/* Reference counting, callback cleanup, etc., all look racy as heck.
@@ -382,8 +383,9 @@ static int do_probe_callback(void *data)
382 .program = &cb_program, 383 .program = &cb_program,
383 .prognumber = cb->cb_prog, 384 .prognumber = cb->cb_prog,
384 .version = nfs_cb_version[1]->number, 385 .version = nfs_cb_version[1]->number,
385 .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ 386 .authflavor = clp->cl_flavor,
386 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 387 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
388 .client_name = clp->cl_principal,
387 }; 389 };
388 struct rpc_message msg = { 390 struct rpc_message msg = {
389 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 391 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
@@ -392,6 +394,11 @@ static int do_probe_callback(void *data)
392 struct rpc_clnt *client; 394 struct rpc_clnt *client;
393 int status; 395 int status;
394 396
397 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) {
398 status = nfserr_cb_path_down;
399 goto out_err;
400 }
401
395 /* Initialize address */ 402 /* Initialize address */
396 memset(&addr, 0, sizeof(addr)); 403 memset(&addr, 0, sizeof(addr));
397 addr.sin_family = AF_INET; 404 addr.sin_family = AF_INET;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bf4cd46a5a11..13e0e074dbb8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,6 +54,7 @@
54#include <linux/mutex.h> 54#include <linux/mutex.h>
55#include <linux/lockd/bind.h> 55#include <linux/lockd/bind.h>
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sunrpc/svcauth_gss.h>
57 58
58#define NFSDDBG_FACILITY NFSDDBG_PROC 59#define NFSDDBG_FACILITY NFSDDBG_PROC
59 60
@@ -377,6 +378,7 @@ free_client(struct nfs4_client *clp)
377 shutdown_callback_client(clp); 378 shutdown_callback_client(clp);
378 if (clp->cl_cred.cr_group_info) 379 if (clp->cl_cred.cr_group_info)
379 put_group_info(clp->cl_cred.cr_group_info); 380 put_group_info(clp->cl_cred.cr_group_info);
381 kfree(clp->cl_principal);
380 kfree(clp->cl_name.data); 382 kfree(clp->cl_name.data);
381 kfree(clp); 383 kfree(clp);
382} 384}
@@ -696,6 +698,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
696 unsigned int strhashval; 698 unsigned int strhashval;
697 struct nfs4_client *conf, *unconf, *new; 699 struct nfs4_client *conf, *unconf, *new;
698 __be32 status; 700 __be32 status;
701 char *princ;
699 char dname[HEXDIR_LEN]; 702 char dname[HEXDIR_LEN];
700 703
701 if (!check_name(clname)) 704 if (!check_name(clname))
@@ -783,6 +786,15 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
783 } 786 }
784 copy_verf(new, &clverifier); 787 copy_verf(new, &clverifier);
785 new->cl_addr = sin->sin_addr.s_addr; 788 new->cl_addr = sin->sin_addr.s_addr;
789 new->cl_flavor = rqstp->rq_flavor;
790 princ = svc_gss_principal(rqstp);
791 if (princ) {
792 new->cl_principal = kstrdup(princ, GFP_KERNEL);
793 if (new->cl_principal == NULL) {
794 free_client(new);
795 goto out;
796 }
797 }
786 copy_cred(&new->cl_cred, &rqstp->rq_cred); 798 copy_cred(&new->cl_cred, &rqstp->rq_cred);
787 gen_confirm(new); 799 gen_confirm(new);
788 gen_callback(new, setclid); 800 gen_callback(new, setclid);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 81904f07679d..3bb1cf1e7425 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -44,10 +44,13 @@ static int show_stat(struct seq_file *p, void *v)
44 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 44 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
45 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 45 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
46 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 46 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
47 47 for_each_irq_nr(j) {
48 for_each_irq_nr(j) 48#ifdef CONFIG_SPARSE_IRQ
49 if (!irq_to_desc(j))
50 continue;
51#endif
49 sum += kstat_irqs_cpu(j, i); 52 sum += kstat_irqs_cpu(j, i);
50 53 }
51 sum += arch_irq_stat_cpu(i); 54 sum += arch_irq_stat_cpu(i);
52 } 55 }
53 sum += arch_irq_stat(); 56 sum += arch_irq_stat();
@@ -92,7 +95,12 @@ static int show_stat(struct seq_file *p, void *v)
92 /* sum again ? it could be updated? */ 95 /* sum again ? it could be updated? */
93 for_each_irq_nr(j) { 96 for_each_irq_nr(j) {
94 per_irq_sum = 0; 97 per_irq_sum = 0;
95 98#ifdef CONFIG_SPARSE_IRQ
99 if (!irq_to_desc(j)) {
100 seq_printf(p, " %u", per_irq_sum);
101 continue;
102 }
103#endif
96 for_each_possible_cpu(i) 104 for_each_possible_cpu(i)
97 per_irq_sum += kstat_irqs_cpu(j, i); 105 per_irq_sum += kstat_irqs_cpu(j, i);
98 106
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 737c9a425361..c3dc491fff89 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -85,13 +85,13 @@ xfs-y += xfs_alloc.o \
85 xfs_trans_inode.o \ 85 xfs_trans_inode.o \
86 xfs_trans_item.o \ 86 xfs_trans_item.o \
87 xfs_utils.o \ 87 xfs_utils.o \
88 xfs_vfsops.o \
89 xfs_vnodeops.o \ 88 xfs_vnodeops.o \
90 xfs_rw.o \ 89 xfs_rw.o \
91 xfs_dmops.o \ 90 xfs_dmops.o \
92 xfs_qmops.o 91 xfs_qmops.o
93 92
94xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o 93xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
94 xfs_dir2_trace.o
95 95
96# Objects in linux/ 96# Objects in linux/
97xfs-y += $(addprefix $(XFS_LINUX)/, \ 97xfs-y += $(addprefix $(XFS_LINUX)/, \
@@ -106,7 +106,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 106 xfs_iops.o \
107 xfs_lrw.o \ 107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_vnode.o \ 109 xfs_sync.o \
110 xfs_xattr.o) 110 xfs_xattr.o)
111 111
112# Objects in support/ 112# Objects in support/
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
index 351a8f454bd1..4dfc7c370819 100644
--- a/fs/xfs/linux-2.6/sv.h
+++ b/fs/xfs/linux-2.6/sv.h
@@ -32,23 +32,15 @@ typedef struct sv_s {
32 wait_queue_head_t waiters; 32 wait_queue_head_t waiters;
33} sv_t; 33} sv_t;
34 34
35#define SV_FIFO 0x0 /* sv_t is FIFO type */ 35static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
36#define SV_LIFO 0x2 /* sv_t is LIFO type */
37#define SV_PRIO 0x4 /* sv_t is PRIO type */
38#define SV_KEYED 0x6 /* sv_t is KEYED type */
39#define SV_DEFAULT SV_FIFO
40
41
42static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
43 unsigned long timeout)
44{ 36{
45 DECLARE_WAITQUEUE(wait, current); 37 DECLARE_WAITQUEUE(wait, current);
46 38
47 add_wait_queue_exclusive(&sv->waiters, &wait); 39 add_wait_queue_exclusive(&sv->waiters, &wait);
48 __set_current_state(state); 40 __set_current_state(TASK_UNINTERRUPTIBLE);
49 spin_unlock(lock); 41 spin_unlock(lock);
50 42
51 schedule_timeout(timeout); 43 schedule();
52 44
53 remove_wait_queue(&sv->waiters, &wait); 45 remove_wait_queue(&sv->waiters, &wait);
54} 46}
@@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
58#define sv_destroy(sv) \ 50#define sv_destroy(sv) \
59 /*NOTHING*/ 51 /*NOTHING*/
60#define sv_wait(sv, pri, lock, s) \ 52#define sv_wait(sv, pri, lock, s) \
61 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) 53 _sv_wait(sv, lock)
62#define sv_wait_sig(sv, pri, lock, s) \
63 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
64#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
65 _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
66#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
67 _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
68#define sv_signal(sv) \ 54#define sv_signal(sv) \
69 wake_up(&(sv)->waiters) 55 wake_up(&(sv)->waiters)
70#define sv_broadcast(sv) \ 56#define sv_broadcast(sv) \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a44d68eb50b5..de3a198f771e 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -42,6 +42,40 @@
42#include <linux/pagevec.h> 42#include <linux/pagevec.h>
43#include <linux/writeback.h> 43#include <linux/writeback.h>
44 44
45
46/*
47 * Prime number of hash buckets since address is used as the key.
48 */
49#define NVSYNC 37
50#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
51static wait_queue_head_t xfs_ioend_wq[NVSYNC];
52
53void __init
54xfs_ioend_init(void)
55{
56 int i;
57
58 for (i = 0; i < NVSYNC; i++)
59 init_waitqueue_head(&xfs_ioend_wq[i]);
60}
61
62void
63xfs_ioend_wait(
64 xfs_inode_t *ip)
65{
66 wait_queue_head_t *wq = to_ioend_wq(ip);
67
68 wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
69}
70
71STATIC void
72xfs_ioend_wake(
73 xfs_inode_t *ip)
74{
75 if (atomic_dec_and_test(&ip->i_iocount))
76 wake_up(to_ioend_wq(ip));
77}
78
45STATIC void 79STATIC void
46xfs_count_page_state( 80xfs_count_page_state(
47 struct page *page, 81 struct page *page,
@@ -146,16 +180,25 @@ xfs_destroy_ioend(
146 xfs_ioend_t *ioend) 180 xfs_ioend_t *ioend)
147{ 181{
148 struct buffer_head *bh, *next; 182 struct buffer_head *bh, *next;
183 struct xfs_inode *ip = XFS_I(ioend->io_inode);
149 184
150 for (bh = ioend->io_buffer_head; bh; bh = next) { 185 for (bh = ioend->io_buffer_head; bh; bh = next) {
151 next = bh->b_private; 186 next = bh->b_private;
152 bh->b_end_io(bh, !ioend->io_error); 187 bh->b_end_io(bh, !ioend->io_error);
153 } 188 }
154 if (unlikely(ioend->io_error)) { 189
155 vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, 190 /*
156 __FILE__,__LINE__); 191 * Volume managers supporting multiple paths can send back ENODEV
192 * when the final path disappears. In this case continuing to fill
193 * the page cache with dirty data which cannot be written out is
194 * evil, so prevent that.
195 */
196 if (unlikely(ioend->io_error == -ENODEV)) {
197 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
198 __FILE__, __LINE__);
157 } 199 }
158 vn_iowake(XFS_I(ioend->io_inode)); 200
201 xfs_ioend_wake(ip);
159 mempool_free(ioend, xfs_ioend_pool); 202 mempool_free(ioend, xfs_ioend_pool);
160} 203}
161 204
@@ -191,7 +234,7 @@ xfs_setfilesize(
191 ip->i_d.di_size = isize; 234 ip->i_d.di_size = isize;
192 ip->i_update_core = 1; 235 ip->i_update_core = 1;
193 ip->i_update_size = 1; 236 ip->i_update_size = 1;
194 mark_inode_dirty_sync(ioend->io_inode); 237 xfs_mark_inode_dirty_sync(ip);
195 } 238 }
196 239
197 xfs_iunlock(ip, XFS_ILOCK_EXCL); 240 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -317,14 +360,9 @@ xfs_map_blocks(
317 xfs_iomap_t *mapp, 360 xfs_iomap_t *mapp,
318 int flags) 361 int flags)
319{ 362{
320 xfs_inode_t *ip = XFS_I(inode); 363 int nmaps = 1;
321 int error, nmaps = 1; 364
322 365 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
323 error = xfs_iomap(ip, offset, count,
324 flags, mapp, &nmaps);
325 if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)))
326 xfs_iflags_set(ip, XFS_IMODIFIED);
327 return -error;
328} 366}
329 367
330STATIC_INLINE int 368STATIC_INLINE int
@@ -512,7 +550,7 @@ xfs_cancel_ioend(
512 unlock_buffer(bh); 550 unlock_buffer(bh);
513 } while ((bh = next_bh) != NULL); 551 } while ((bh = next_bh) != NULL);
514 552
515 vn_iowake(XFS_I(ioend->io_inode)); 553 xfs_ioend_wake(XFS_I(ioend->io_inode));
516 mempool_free(ioend, xfs_ioend_pool); 554 mempool_free(ioend, xfs_ioend_pool);
517 } while ((ioend = next) != NULL); 555 } while ((ioend = next) != NULL);
518} 556}
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 3ba0631a3818..7b26f5ff9692 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -43,4 +43,7 @@ typedef struct xfs_ioend {
43extern const struct address_space_operations xfs_address_space_operations; 43extern const struct address_space_operations xfs_address_space_operations;
44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); 44extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
45 45
46extern void xfs_ioend_init(void);
47extern void xfs_ioend_wait(struct xfs_inode *);
48
46#endif /* __XFS_AOPS_H__ */ 49#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 36d5fcd3f593..cb329edc925b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -630,6 +630,29 @@ xfs_buf_get_flags(
630 return NULL; 630 return NULL;
631} 631}
632 632
633STATIC int
634_xfs_buf_read(
635 xfs_buf_t *bp,
636 xfs_buf_flags_t flags)
637{
638 int status;
639
640 XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags);
641
642 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
643 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
644
645 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
646 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
647 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
648 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
649
650 status = xfs_buf_iorequest(bp);
651 if (!status && !(flags & XBF_ASYNC))
652 status = xfs_buf_iowait(bp);
653 return status;
654}
655
633xfs_buf_t * 656xfs_buf_t *
634xfs_buf_read_flags( 657xfs_buf_read_flags(
635 xfs_buftarg_t *target, 658 xfs_buftarg_t *target,
@@ -646,7 +669,7 @@ xfs_buf_read_flags(
646 if (!XFS_BUF_ISDONE(bp)) { 669 if (!XFS_BUF_ISDONE(bp)) {
647 XB_TRACE(bp, "read", (unsigned long)flags); 670 XB_TRACE(bp, "read", (unsigned long)flags);
648 XFS_STATS_INC(xb_get_read); 671 XFS_STATS_INC(xb_get_read);
649 xfs_buf_iostart(bp, flags); 672 _xfs_buf_read(bp, flags);
650 } else if (flags & XBF_ASYNC) { 673 } else if (flags & XBF_ASYNC) {
651 XB_TRACE(bp, "read_async", (unsigned long)flags); 674 XB_TRACE(bp, "read_async", (unsigned long)flags);
652 /* 675 /*
@@ -1048,50 +1071,39 @@ xfs_buf_ioerror(
1048 XB_TRACE(bp, "ioerror", (unsigned long)error); 1071 XB_TRACE(bp, "ioerror", (unsigned long)error);
1049} 1072}
1050 1073
1051/*
1052 * Initiate I/O on a buffer, based on the flags supplied.
1053 * The b_iodone routine in the buffer supplied will only be called
1054 * when all of the subsidiary I/O requests, if any, have been completed.
1055 */
1056int 1074int
1057xfs_buf_iostart( 1075xfs_bawrite(
1058 xfs_buf_t *bp, 1076 void *mp,
1059 xfs_buf_flags_t flags) 1077 struct xfs_buf *bp)
1060{ 1078{
1061 int status = 0; 1079 XB_TRACE(bp, "bawrite", 0);
1062 1080
1063 XB_TRACE(bp, "iostart", (unsigned long)flags); 1081 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
1064 1082
1065 if (flags & XBF_DELWRI) { 1083 xfs_buf_delwri_dequeue(bp);
1066 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC);
1067 bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC);
1068 xfs_buf_delwri_queue(bp, 1);
1069 return 0;
1070 }
1071 1084
1072 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ 1085 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD);
1073 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1086 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
1074 bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ 1087
1075 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 1088 bp->b_mount = mp;
1089 bp->b_strat = xfs_bdstrat_cb;
1090 return xfs_bdstrat_cb(bp);
1091}
1076 1092
1077 BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); 1093void
1094xfs_bdwrite(
1095 void *mp,
1096 struct xfs_buf *bp)
1097{
1098 XB_TRACE(bp, "bdwrite", 0);
1078 1099
1079 /* For writes allow an alternate strategy routine to precede 1100 bp->b_strat = xfs_bdstrat_cb;
1080 * the actual I/O request (which may not be issued at all in 1101 bp->b_mount = mp;
1081 * a shutdown situation, for example).
1082 */
1083 status = (flags & XBF_WRITE) ?
1084 xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp);
1085 1102
1086 /* Wait for I/O if we are not an async request. 1103 bp->b_flags &= ~XBF_READ;
1087 * Note: async I/O request completion will release the buffer, 1104 bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
1088 * and that can already be done by this point. So using the
1089 * buffer pointer from here on, after async I/O, is invalid.
1090 */
1091 if (!status && !(flags & XBF_ASYNC))
1092 status = xfs_buf_iowait(bp);
1093 1105
1094 return status; 1106 xfs_buf_delwri_queue(bp, 1);
1095} 1107}
1096 1108
1097STATIC_INLINE void 1109STATIC_INLINE void
@@ -1114,8 +1126,7 @@ xfs_buf_bio_end_io(
1114 unsigned int blocksize = bp->b_target->bt_bsize; 1126 unsigned int blocksize = bp->b_target->bt_bsize;
1115 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1127 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1116 1128
1117 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1129 xfs_buf_ioerror(bp, -error);
1118 bp->b_error = EIO;
1119 1130
1120 do { 1131 do {
1121 struct page *page = bvec->bv_page; 1132 struct page *page = bvec->bv_page;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 456519a088c7..288ae7c4c800 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -168,7 +168,7 @@ typedef struct xfs_buf {
168 struct completion b_iowait; /* queue for I/O waiters */ 168 struct completion b_iowait; /* queue for I/O waiters */
169 void *b_fspriv; 169 void *b_fspriv;
170 void *b_fspriv2; 170 void *b_fspriv2;
171 void *b_fspriv3; 171 struct xfs_mount *b_mount;
172 unsigned short b_error; /* error code on I/O */ 172 unsigned short b_error; /* error code on I/O */
173 unsigned int b_page_count; /* size of page array */ 173 unsigned int b_page_count; /* size of page array */
174 unsigned int b_offset; /* page offset in first page */ 174 unsigned int b_offset; /* page offset in first page */
@@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *);
214extern void xfs_buf_unlock(xfs_buf_t *); 214extern void xfs_buf_unlock(xfs_buf_t *);
215 215
216/* Buffer Read and Write Routines */ 216/* Buffer Read and Write Routines */
217extern int xfs_bawrite(void *mp, xfs_buf_t *bp);
218extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
217extern void xfs_buf_ioend(xfs_buf_t *, int); 219extern void xfs_buf_ioend(xfs_buf_t *, int);
218extern void xfs_buf_ioerror(xfs_buf_t *, int); 220extern void xfs_buf_ioerror(xfs_buf_t *, int);
219extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t);
220extern int xfs_buf_iorequest(xfs_buf_t *); 221extern int xfs_buf_iorequest(xfs_buf_t *);
221extern int xfs_buf_iowait(xfs_buf_t *); 222extern int xfs_buf_iowait(xfs_buf_t *);
222extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, 223extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t,
@@ -311,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
311#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) 312#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
312#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) 313#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
313 314
314#define XFS_BUF_SHUT(bp) do { } while (0)
315#define XFS_BUF_UNSHUT(bp) do { } while (0)
316#define XFS_BUF_ISSHUT(bp) (0)
317
318#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) 315#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
319#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) 316#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
320#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) 317#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -334,8 +331,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
334#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) 331#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
335#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) 332#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
336#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) 333#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
337#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3)
338#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val))
339#define XFS_BUF_SET_START(bp) do { } while (0) 334#define XFS_BUF_SET_START(bp) do { } while (0)
340#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) 335#define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func))
341 336
@@ -366,14 +361,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
366#define XFS_BUF_TARGET(bp) ((bp)->b_target) 361#define XFS_BUF_TARGET(bp) ((bp)->b_target)
367#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) 362#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
368 363
369static inline int xfs_bawrite(void *mp, xfs_buf_t *bp)
370{
371 bp->b_fspriv3 = mp;
372 bp->b_strat = xfs_bdstrat_cb;
373 xfs_buf_delwri_dequeue(bp);
374 return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES);
375}
376
377static inline void xfs_buf_relse(xfs_buf_t *bp) 364static inline void xfs_buf_relse(xfs_buf_t *bp)
378{ 365{
379 if (!bp->b_relse) 366 if (!bp->b_relse)
@@ -414,17 +401,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp)
414 return error; 401 return error;
415} 402}
416 403
417/*
418 * No error can be returned from xfs_buf_iostart for delwri
419 * buffers as they are queued and no I/O is issued.
420 */
421static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
422{
423 bp->b_strat = xfs_bdstrat_cb;
424 bp->b_fspriv3 = mp;
425 (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC);
426}
427
428#define XFS_bdstrat(bp) xfs_buf_iorequest(bp) 404#define XFS_bdstrat(bp) xfs_buf_iorequest(bp)
429 405
430#define xfs_iowait(bp) xfs_buf_iowait(bp) 406#define xfs_iowait(bp) xfs_buf_iowait(bp)
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
index 8c022cd0ad67..55bddf3b6091 100644
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ b/fs/xfs/linux-2.6/xfs_cred.h
@@ -25,12 +25,4 @@
25 */ 25 */
26typedef const struct cred cred_t; 26typedef const struct cred cred_t;
27 27
28extern cred_t *sys_cred;
29
30/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */
31static inline int capable_cred(cred_t *cr, int cid)
32{
33 return (cr == sys_cred) ? 1 : capable(cid);
34}
35
36#endif /* __XFS_CRED_H__ */ 28#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 7f7abec25e14..595751f78350 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -29,7 +29,6 @@
29#include "xfs_vnodeops.h" 29#include "xfs_vnodeops.h"
30#include "xfs_bmap_btree.h" 30#include "xfs_bmap_btree.h"
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_vfsops.h"
33 32
34/* 33/*
35 * Note that we only accept fileids which are long enough rather than allow 34 * Note that we only accept fileids which are long enough rather than allow
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 3fee790f138b..e14c4e3aea0c 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -36,89 +36,54 @@
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_error.h" 37#include "xfs_error.h"
38#include "xfs_rw.h" 38#include "xfs_rw.h"
39#include "xfs_ioctl32.h"
40#include "xfs_vnodeops.h" 39#include "xfs_vnodeops.h"
40#include "xfs_da_btree.h"
41#include "xfs_ioctl.h"
41 42
42#include <linux/dcache.h> 43#include <linux/dcache.h>
43#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
44 45
45static struct vm_operations_struct xfs_file_vm_ops; 46static struct vm_operations_struct xfs_file_vm_ops;
46 47
47STATIC_INLINE ssize_t 48STATIC ssize_t
48__xfs_file_read( 49xfs_file_aio_read(
49 struct kiocb *iocb, 50 struct kiocb *iocb,
50 const struct iovec *iov, 51 const struct iovec *iov,
51 unsigned long nr_segs, 52 unsigned long nr_segs,
52 int ioflags,
53 loff_t pos) 53 loff_t pos)
54{ 54{
55 struct file *file = iocb->ki_filp; 55 struct file *file = iocb->ki_filp;
56 int ioflags = IO_ISAIO;
56 57
57 BUG_ON(iocb->ki_pos != pos); 58 BUG_ON(iocb->ki_pos != pos);
58 if (unlikely(file->f_flags & O_DIRECT)) 59 if (unlikely(file->f_flags & O_DIRECT))
59 ioflags |= IO_ISDIRECT; 60 ioflags |= IO_ISDIRECT;
61 if (file->f_mode & FMODE_NOCMTIME)
62 ioflags |= IO_INVIS;
60 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, 63 return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
61 nr_segs, &iocb->ki_pos, ioflags); 64 nr_segs, &iocb->ki_pos, ioflags);
62} 65}
63 66
64STATIC ssize_t 67STATIC ssize_t
65xfs_file_aio_read( 68xfs_file_aio_write(
66 struct kiocb *iocb,
67 const struct iovec *iov,
68 unsigned long nr_segs,
69 loff_t pos)
70{
71 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
72}
73
74STATIC ssize_t
75xfs_file_aio_read_invis(
76 struct kiocb *iocb,
77 const struct iovec *iov,
78 unsigned long nr_segs,
79 loff_t pos)
80{
81 return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
82}
83
84STATIC_INLINE ssize_t
85__xfs_file_write(
86 struct kiocb *iocb, 69 struct kiocb *iocb,
87 const struct iovec *iov, 70 const struct iovec *iov,
88 unsigned long nr_segs, 71 unsigned long nr_segs,
89 int ioflags,
90 loff_t pos) 72 loff_t pos)
91{ 73{
92 struct file *file = iocb->ki_filp; 74 struct file *file = iocb->ki_filp;
75 int ioflags = IO_ISAIO;
93 76
94 BUG_ON(iocb->ki_pos != pos); 77 BUG_ON(iocb->ki_pos != pos);
95 if (unlikely(file->f_flags & O_DIRECT)) 78 if (unlikely(file->f_flags & O_DIRECT))
96 ioflags |= IO_ISDIRECT; 79 ioflags |= IO_ISDIRECT;
80 if (file->f_mode & FMODE_NOCMTIME)
81 ioflags |= IO_INVIS;
97 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, 82 return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
98 &iocb->ki_pos, ioflags); 83 &iocb->ki_pos, ioflags);
99} 84}
100 85
101STATIC ssize_t 86STATIC ssize_t
102xfs_file_aio_write(
103 struct kiocb *iocb,
104 const struct iovec *iov,
105 unsigned long nr_segs,
106 loff_t pos)
107{
108 return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
109}
110
111STATIC ssize_t
112xfs_file_aio_write_invis(
113 struct kiocb *iocb,
114 const struct iovec *iov,
115 unsigned long nr_segs,
116 loff_t pos)
117{
118 return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
119}
120
121STATIC ssize_t
122xfs_file_splice_read( 87xfs_file_splice_read(
123 struct file *infilp, 88 struct file *infilp,
124 loff_t *ppos, 89 loff_t *ppos,
@@ -126,20 +91,13 @@ xfs_file_splice_read(
126 size_t len, 91 size_t len,
127 unsigned int flags) 92 unsigned int flags)
128{ 93{
129 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 94 int ioflags = 0;
130 infilp, ppos, pipe, len, flags, 0); 95
131} 96 if (infilp->f_mode & FMODE_NOCMTIME)
97 ioflags |= IO_INVIS;
132 98
133STATIC ssize_t
134xfs_file_splice_read_invis(
135 struct file *infilp,
136 loff_t *ppos,
137 struct pipe_inode_info *pipe,
138 size_t len,
139 unsigned int flags)
140{
141 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), 99 return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
142 infilp, ppos, pipe, len, flags, IO_INVIS); 100 infilp, ppos, pipe, len, flags, ioflags);
143} 101}
144 102
145STATIC ssize_t 103STATIC ssize_t
@@ -150,30 +108,49 @@ xfs_file_splice_write(
150 size_t len, 108 size_t len,
151 unsigned int flags) 109 unsigned int flags)
152{ 110{
153 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 111 int ioflags = 0;
154 pipe, outfilp, ppos, len, flags, 0); 112
155} 113 if (outfilp->f_mode & FMODE_NOCMTIME)
114 ioflags |= IO_INVIS;
156 115
157STATIC ssize_t
158xfs_file_splice_write_invis(
159 struct pipe_inode_info *pipe,
160 struct file *outfilp,
161 loff_t *ppos,
162 size_t len,
163 unsigned int flags)
164{
165 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), 116 return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
166 pipe, outfilp, ppos, len, flags, IO_INVIS); 117 pipe, outfilp, ppos, len, flags, ioflags);
167} 118}
168 119
169STATIC int 120STATIC int
170xfs_file_open( 121xfs_file_open(
171 struct inode *inode, 122 struct inode *inode,
172 struct file *filp) 123 struct file *file)
173{ 124{
174 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 125 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
175 return -EFBIG; 126 return -EFBIG;
176 return -xfs_open(XFS_I(inode)); 127 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
128 return -EIO;
129 return 0;
130}
131
132STATIC int
133xfs_dir_open(
134 struct inode *inode,
135 struct file *file)
136{
137 struct xfs_inode *ip = XFS_I(inode);
138 int mode;
139 int error;
140
141 error = xfs_file_open(inode, file);
142 if (error)
143 return error;
144
145 /*
146 * If there are any blocks, read-ahead block 0 as we're almost
147 * certain to have the next operation be a read there.
148 */
149 mode = xfs_ilock_map_shared(ip);
150 if (ip->i_d.di_nextents > 0)
151 xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
152 xfs_iunlock(ip, mode);
153 return 0;
177} 154}
178 155
179STATIC int 156STATIC int
@@ -227,7 +204,7 @@ xfs_file_readdir(
227 * point we can change the ->readdir prototype to include the 204 * point we can change the ->readdir prototype to include the
228 * buffer size. 205 * buffer size.
229 */ 206 */
230 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size); 207 bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size);
231 208
232 error = xfs_readdir(ip, dirent, bufsize, 209 error = xfs_readdir(ip, dirent, bufsize,
233 (xfs_off_t *)&filp->f_pos, filldir); 210 (xfs_off_t *)&filp->f_pos, filldir);
@@ -248,48 +225,6 @@ xfs_file_mmap(
248 return 0; 225 return 0;
249} 226}
250 227
251STATIC long
252xfs_file_ioctl(
253 struct file *filp,
254 unsigned int cmd,
255 unsigned long p)
256{
257 int error;
258 struct inode *inode = filp->f_path.dentry->d_inode;
259
260 error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
261 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
262
263 /* NOTE: some of the ioctl's return positive #'s as a
264 * byte count indicating success, such as
265 * readlink_by_handle. So we don't "sign flip"
266 * like most other routines. This means true
267 * errors need to be returned as a negative value.
268 */
269 return error;
270}
271
272STATIC long
273xfs_file_ioctl_invis(
274 struct file *filp,
275 unsigned int cmd,
276 unsigned long p)
277{
278 int error;
279 struct inode *inode = filp->f_path.dentry->d_inode;
280
281 error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
282 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
283
284 /* NOTE: some of the ioctl's return positive #'s as a
285 * byte count indicating success, such as
286 * readlink_by_handle. So we don't "sign flip"
287 * like most other routines. This means true
288 * errors need to be returned as a negative value.
289 */
290 return error;
291}
292
293/* 228/*
294 * mmap()d file has taken write protection fault and is being made 229 * mmap()d file has taken write protection fault and is being made
295 * writable. We can set the page state up correctly for a writable 230 * writable. We can set the page state up correctly for a writable
@@ -325,26 +260,8 @@ const struct file_operations xfs_file_operations = {
325#endif 260#endif
326}; 261};
327 262
328const struct file_operations xfs_invis_file_operations = {
329 .llseek = generic_file_llseek,
330 .read = do_sync_read,
331 .write = do_sync_write,
332 .aio_read = xfs_file_aio_read_invis,
333 .aio_write = xfs_file_aio_write_invis,
334 .splice_read = xfs_file_splice_read_invis,
335 .splice_write = xfs_file_splice_write_invis,
336 .unlocked_ioctl = xfs_file_ioctl_invis,
337#ifdef CONFIG_COMPAT
338 .compat_ioctl = xfs_file_compat_invis_ioctl,
339#endif
340 .mmap = xfs_file_mmap,
341 .open = xfs_file_open,
342 .release = xfs_file_release,
343 .fsync = xfs_file_fsync,
344};
345
346
347const struct file_operations xfs_dir_file_operations = { 263const struct file_operations xfs_dir_file_operations = {
264 .open = xfs_dir_open,
348 .read = generic_read_dir, 265 .read = generic_read_dir,
349 .readdir = xfs_file_readdir, 266 .readdir = xfs_file_readdir,
350 .llseek = generic_file_llseek, 267 .llseek = generic_file_llseek,
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 36caa6d957df..5aeb77776961 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; }
24int fs_nosys(void) { return ENOSYS; } 24int fs_nosys(void) { return ENOSYS; }
25void fs_noval(void) { return; } 25void fs_noval(void) { return; }
26 26
27/*
28 * note: all filemap functions return negative error codes. These
29 * need to be inverted before returning to the xfs core functions.
30 */
27void 31void
28xfs_tosspages( 32xfs_tosspages(
29 xfs_inode_t *ip, 33 xfs_inode_t *ip,
@@ -53,7 +57,7 @@ xfs_flushinval_pages(
53 if (!ret) 57 if (!ret)
54 truncate_inode_pages(mapping, first); 58 truncate_inode_pages(mapping, first);
55 } 59 }
56 return ret; 60 return -ret;
57} 61}
58 62
59int 63int
@@ -72,10 +76,23 @@ xfs_flush_pages(
72 xfs_iflags_clear(ip, XFS_ITRUNCATED); 76 xfs_iflags_clear(ip, XFS_ITRUNCATED);
73 ret = filemap_fdatawrite(mapping); 77 ret = filemap_fdatawrite(mapping);
74 if (flags & XFS_B_ASYNC) 78 if (flags & XFS_B_ASYNC)
75 return ret; 79 return -ret;
76 ret2 = filemap_fdatawait(mapping); 80 ret2 = filemap_fdatawait(mapping);
77 if (!ret) 81 if (!ret)
78 ret = ret2; 82 ret = ret2;
79 } 83 }
80 return ret; 84 return -ret;
85}
86
87int
88xfs_wait_on_pages(
89 xfs_inode_t *ip,
90 xfs_off_t first,
91 xfs_off_t last)
92{
93 struct address_space *mapping = VFS_I(ip)->i_mapping;
94
95 if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
96 return -filemap_fdatawait(mapping);
97 return 0;
81} 98}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index ef90e64641e6..2ae8b1ccb02e 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -26,7 +26,6 @@
26 */ 26 */
27xfs_param_t xfs_params = { 27xfs_param_t xfs_params = {
28 /* MIN DFLT MAX */ 28 /* MIN DFLT MAX */
29 .restrict_chown = { 0, 1, 1 },
30 .sgid_inherit = { 0, 0, 1 }, 29 .sgid_inherit = { 0, 0, 1 },
31 .symlink_mode = { 0, 0, 1 }, 30 .symlink_mode = { 0, 0, 1 },
32 .panic_mask = { 0, 0, 255 }, 31 .panic_mask = { 0, 0, 255 },
@@ -43,10 +42,3 @@ xfs_param_t xfs_params = {
43 .inherit_nodfrg = { 0, 1, 1 }, 42 .inherit_nodfrg = { 0, 1, 1 },
44 .fstrm_timer = { 1, 30*100, 3600*100}, 43 .fstrm_timer = { 1, 30*100, 3600*100},
45}; 44};
46
47/*
48 * Global system credential structure.
49 */
50static cred_t sys_cred_val;
51cred_t *sys_cred = &sys_cred_val;
52
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
index 6eda8a3eb6f1..69f71caf061c 100644
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ b/fs/xfs/linux-2.6/xfs_globals.h
@@ -19,6 +19,5 @@
19#define __XFS_GLOBALS_H__ 19#define __XFS_GLOBALS_H__
20 20
21extern uint64_t xfs_panic_mask; /* set to cause more panics */ 21extern uint64_t xfs_panic_mask; /* set to cause more panics */
22extern cred_t *sys_cred;
23 22
24#endif /* __XFS_GLOBALS_H__ */ 23#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 281cbd5a25cf..67205f6198ba 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -68,26 +68,22 @@
68 * XFS_IOC_PATH_TO_HANDLE 68 * XFS_IOC_PATH_TO_HANDLE
69 * returns full handle for a path 69 * returns full handle for a path
70 */ 70 */
71STATIC int 71int
72xfs_find_handle( 72xfs_find_handle(
73 unsigned int cmd, 73 unsigned int cmd,
74 void __user *arg) 74 xfs_fsop_handlereq_t *hreq)
75{ 75{
76 int hsize; 76 int hsize;
77 xfs_handle_t handle; 77 xfs_handle_t handle;
78 xfs_fsop_handlereq_t hreq;
79 struct inode *inode; 78 struct inode *inode;
80 79
81 if (copy_from_user(&hreq, arg, sizeof(hreq)))
82 return -XFS_ERROR(EFAULT);
83
84 memset((char *)&handle, 0, sizeof(handle)); 80 memset((char *)&handle, 0, sizeof(handle));
85 81
86 switch (cmd) { 82 switch (cmd) {
87 case XFS_IOC_PATH_TO_FSHANDLE: 83 case XFS_IOC_PATH_TO_FSHANDLE:
88 case XFS_IOC_PATH_TO_HANDLE: { 84 case XFS_IOC_PATH_TO_HANDLE: {
89 struct path path; 85 struct path path;
90 int error = user_lpath((const char __user *)hreq.path, &path); 86 int error = user_lpath((const char __user *)hreq->path, &path);
91 if (error) 87 if (error)
92 return error; 88 return error;
93 89
@@ -101,7 +97,7 @@ xfs_find_handle(
101 case XFS_IOC_FD_TO_HANDLE: { 97 case XFS_IOC_FD_TO_HANDLE: {
102 struct file *file; 98 struct file *file;
103 99
104 file = fget(hreq.fd); 100 file = fget(hreq->fd);
105 if (!file) 101 if (!file)
106 return -EBADF; 102 return -EBADF;
107 103
@@ -158,8 +154,8 @@ xfs_find_handle(
158 } 154 }
159 155
160 /* now copy our handle into the user buffer & write out the size */ 156 /* now copy our handle into the user buffer & write out the size */
161 if (copy_to_user(hreq.ohandle, &handle, hsize) || 157 if (copy_to_user(hreq->ohandle, &handle, hsize) ||
162 copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { 158 copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) {
163 iput(inode); 159 iput(inode);
164 return -XFS_ERROR(EFAULT); 160 return -XFS_ERROR(EFAULT);
165 } 161 }
@@ -249,10 +245,10 @@ xfs_vget_fsop_handlereq(
249 return 0; 245 return 0;
250} 246}
251 247
252STATIC int 248int
253xfs_open_by_handle( 249xfs_open_by_handle(
254 xfs_mount_t *mp, 250 xfs_mount_t *mp,
255 void __user *arg, 251 xfs_fsop_handlereq_t *hreq,
256 struct file *parfilp, 252 struct file *parfilp,
257 struct inode *parinode) 253 struct inode *parinode)
258{ 254{
@@ -263,14 +259,11 @@ xfs_open_by_handle(
263 struct file *filp; 259 struct file *filp;
264 struct inode *inode; 260 struct inode *inode;
265 struct dentry *dentry; 261 struct dentry *dentry;
266 xfs_fsop_handlereq_t hreq;
267 262
268 if (!capable(CAP_SYS_ADMIN)) 263 if (!capable(CAP_SYS_ADMIN))
269 return -XFS_ERROR(EPERM); 264 return -XFS_ERROR(EPERM);
270 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
271 return -XFS_ERROR(EFAULT);
272 265
273 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); 266 error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
274 if (error) 267 if (error)
275 return -error; 268 return -error;
276 269
@@ -281,10 +274,10 @@ xfs_open_by_handle(
281 } 274 }
282 275
283#if BITS_PER_LONG != 32 276#if BITS_PER_LONG != 32
284 hreq.oflags |= O_LARGEFILE; 277 hreq->oflags |= O_LARGEFILE;
285#endif 278#endif
286 /* Put open permission in namei format. */ 279 /* Put open permission in namei format. */
287 permflag = hreq.oflags; 280 permflag = hreq->oflags;
288 if ((permflag+1) & O_ACCMODE) 281 if ((permflag+1) & O_ACCMODE)
289 permflag++; 282 permflag++;
290 if (permflag & O_TRUNC) 283 if (permflag & O_TRUNC)
@@ -322,15 +315,16 @@ xfs_open_by_handle(
322 mntget(parfilp->f_path.mnt); 315 mntget(parfilp->f_path.mnt);
323 316
324 /* Create file pointer. */ 317 /* Create file pointer. */
325 filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred); 318 filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred);
326 if (IS_ERR(filp)) { 319 if (IS_ERR(filp)) {
327 put_unused_fd(new_fd); 320 put_unused_fd(new_fd);
328 return -XFS_ERROR(-PTR_ERR(filp)); 321 return -XFS_ERROR(-PTR_ERR(filp));
329 } 322 }
323
330 if (inode->i_mode & S_IFREG) { 324 if (inode->i_mode & S_IFREG) {
331 /* invisible operation should not change atime */ 325 /* invisible operation should not change atime */
332 filp->f_flags |= O_NOATIME; 326 filp->f_flags |= O_NOATIME;
333 filp->f_op = &xfs_invis_file_operations; 327 filp->f_mode |= FMODE_NOCMTIME;
334 } 328 }
335 329
336 fd_install(new_fd, filp); 330 fd_install(new_fd, filp);
@@ -363,24 +357,21 @@ do_readlink(
363} 357}
364 358
365 359
366STATIC int 360int
367xfs_readlink_by_handle( 361xfs_readlink_by_handle(
368 xfs_mount_t *mp, 362 xfs_mount_t *mp,
369 void __user *arg, 363 xfs_fsop_handlereq_t *hreq,
370 struct inode *parinode) 364 struct inode *parinode)
371{ 365{
372 struct inode *inode; 366 struct inode *inode;
373 xfs_fsop_handlereq_t hreq;
374 __u32 olen; 367 __u32 olen;
375 void *link; 368 void *link;
376 int error; 369 int error;
377 370
378 if (!capable(CAP_SYS_ADMIN)) 371 if (!capable(CAP_SYS_ADMIN))
379 return -XFS_ERROR(EPERM); 372 return -XFS_ERROR(EPERM);
380 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
381 return -XFS_ERROR(EFAULT);
382 373
383 error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); 374 error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode);
384 if (error) 375 if (error)
385 return -error; 376 return -error;
386 377
@@ -390,7 +381,7 @@ xfs_readlink_by_handle(
390 goto out_iput; 381 goto out_iput;
391 } 382 }
392 383
393 if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { 384 if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
394 error = -XFS_ERROR(EFAULT); 385 error = -XFS_ERROR(EFAULT);
395 goto out_iput; 386 goto out_iput;
396 } 387 }
@@ -402,7 +393,7 @@ xfs_readlink_by_handle(
402 error = -xfs_readlink(XFS_I(inode), link); 393 error = -xfs_readlink(XFS_I(inode), link);
403 if (error) 394 if (error)
404 goto out_kfree; 395 goto out_kfree;
405 error = do_readlink(hreq.ohandle, olen, link); 396 error = do_readlink(hreq->ohandle, olen, link);
406 if (error) 397 if (error)
407 goto out_kfree; 398 goto out_kfree;
408 399
@@ -501,7 +492,7 @@ xfs_attrlist_by_handle(
501 return -error; 492 return -error;
502} 493}
503 494
504STATIC int 495int
505xfs_attrmulti_attr_get( 496xfs_attrmulti_attr_get(
506 struct inode *inode, 497 struct inode *inode,
507 char *name, 498 char *name,
@@ -530,7 +521,7 @@ xfs_attrmulti_attr_get(
530 return error; 521 return error;
531} 522}
532 523
533STATIC int 524int
534xfs_attrmulti_attr_set( 525xfs_attrmulti_attr_set(
535 struct inode *inode, 526 struct inode *inode,
536 char *name, 527 char *name,
@@ -560,7 +551,7 @@ xfs_attrmulti_attr_set(
560 return error; 551 return error;
561} 552}
562 553
563STATIC int 554int
564xfs_attrmulti_attr_remove( 555xfs_attrmulti_attr_remove(
565 struct inode *inode, 556 struct inode *inode,
566 char *name, 557 char *name,
@@ -662,19 +653,26 @@ xfs_attrmulti_by_handle(
662 return -error; 653 return -error;
663} 654}
664 655
665STATIC int 656int
666xfs_ioc_space( 657xfs_ioc_space(
667 struct xfs_inode *ip, 658 struct xfs_inode *ip,
668 struct inode *inode, 659 struct inode *inode,
669 struct file *filp, 660 struct file *filp,
670 int ioflags, 661 int ioflags,
671 unsigned int cmd, 662 unsigned int cmd,
672 void __user *arg) 663 xfs_flock64_t *bf)
673{ 664{
674 xfs_flock64_t bf;
675 int attr_flags = 0; 665 int attr_flags = 0;
676 int error; 666 int error;
677 667
668 /*
669 * Only allow the sys admin to reserve space unless
670 * unwritten extents are enabled.
671 */
672 if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
673 !capable(CAP_SYS_ADMIN))
674 return -XFS_ERROR(EPERM);
675
678 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) 676 if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
679 return -XFS_ERROR(EPERM); 677 return -XFS_ERROR(EPERM);
680 678
@@ -684,16 +682,12 @@ xfs_ioc_space(
684 if (!S_ISREG(inode->i_mode)) 682 if (!S_ISREG(inode->i_mode))
685 return -XFS_ERROR(EINVAL); 683 return -XFS_ERROR(EINVAL);
686 684
687 if (copy_from_user(&bf, arg, sizeof(bf)))
688 return -XFS_ERROR(EFAULT);
689
690 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 685 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
691 attr_flags |= XFS_ATTR_NONBLOCK; 686 attr_flags |= XFS_ATTR_NONBLOCK;
692 if (ioflags & IO_INVIS) 687 if (ioflags & IO_INVIS)
693 attr_flags |= XFS_ATTR_DMI; 688 attr_flags |= XFS_ATTR_DMI;
694 689
695 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 690 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
696 NULL, attr_flags);
697 return -error; 691 return -error;
698} 692}
699 693
@@ -1105,10 +1099,6 @@ xfs_ioctl_setattr(
1105 1099
1106 /* 1100 /*
1107 * Change file ownership. Must be the owner or privileged. 1101 * Change file ownership. Must be the owner or privileged.
1108 * If the system was configured with the "restricted_chown"
1109 * option, the owner is not permitted to give away the file,
1110 * and can change the group id only to a group of which he
1111 * or she is a member.
1112 */ 1102 */
1113 if (mask & FSX_PROJID) { 1103 if (mask & FSX_PROJID) {
1114 /* 1104 /*
@@ -1137,7 +1127,7 @@ xfs_ioctl_setattr(
1137 * the superblock version number since projids didn't 1127 * the superblock version number since projids didn't
1138 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. 1128 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
1139 */ 1129 */
1140 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) 1130 if (ip->i_d.di_version == 1)
1141 xfs_bump_ino_vers2(tp, ip); 1131 xfs_bump_ino_vers2(tp, ip);
1142 } 1132 }
1143 1133
@@ -1256,43 +1246,67 @@ xfs_ioc_setxflags(
1256} 1246}
1257 1247
1258STATIC int 1248STATIC int
1249xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
1250{
1251 struct getbmap __user *base = *ap;
1252
1253 /* copy only getbmap portion (not getbmapx) */
1254 if (copy_to_user(base, bmv, sizeof(struct getbmap)))
1255 return XFS_ERROR(EFAULT);
1256
1257 *ap += sizeof(struct getbmap);
1258 return 0;
1259}
1260
1261STATIC int
1259xfs_ioc_getbmap( 1262xfs_ioc_getbmap(
1260 struct xfs_inode *ip, 1263 struct xfs_inode *ip,
1261 int ioflags, 1264 int ioflags,
1262 unsigned int cmd, 1265 unsigned int cmd,
1263 void __user *arg) 1266 void __user *arg)
1264{ 1267{
1265 struct getbmap bm; 1268 struct getbmapx bmx;
1266 int iflags;
1267 int error; 1269 int error;
1268 1270
1269 if (copy_from_user(&bm, arg, sizeof(bm))) 1271 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
1270 return -XFS_ERROR(EFAULT); 1272 return -XFS_ERROR(EFAULT);
1271 1273
1272 if (bm.bmv_count < 2) 1274 if (bmx.bmv_count < 2)
1273 return -XFS_ERROR(EINVAL); 1275 return -XFS_ERROR(EINVAL);
1274 1276
1275 iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); 1277 bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
1276 if (ioflags & IO_INVIS) 1278 if (ioflags & IO_INVIS)
1277 iflags |= BMV_IF_NO_DMAPI_READ; 1279 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1278 1280
1279 error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags); 1281 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
1282 (struct getbmap *)arg+1);
1280 if (error) 1283 if (error)
1281 return -error; 1284 return -error;
1282 1285
1283 if (copy_to_user(arg, &bm, sizeof(bm))) 1286 /* copy back header - only size of getbmap */
1287 if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
1284 return -XFS_ERROR(EFAULT); 1288 return -XFS_ERROR(EFAULT);
1285 return 0; 1289 return 0;
1286} 1290}
1287 1291
1288STATIC int 1292STATIC int
1293xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
1294{
1295 struct getbmapx __user *base = *ap;
1296
1297 if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
1298 return XFS_ERROR(EFAULT);
1299
1300 *ap += sizeof(struct getbmapx);
1301 return 0;
1302}
1303
1304STATIC int
1289xfs_ioc_getbmapx( 1305xfs_ioc_getbmapx(
1290 struct xfs_inode *ip, 1306 struct xfs_inode *ip,
1291 void __user *arg) 1307 void __user *arg)
1292{ 1308{
1293 struct getbmapx bmx; 1309 struct getbmapx bmx;
1294 struct getbmap bm;
1295 int iflags;
1296 int error; 1310 int error;
1297 1311
1298 if (copy_from_user(&bmx, arg, sizeof(bmx))) 1312 if (copy_from_user(&bmx, arg, sizeof(bmx)))
@@ -1301,46 +1315,46 @@ xfs_ioc_getbmapx(
1301 if (bmx.bmv_count < 2) 1315 if (bmx.bmv_count < 2)
1302 return -XFS_ERROR(EINVAL); 1316 return -XFS_ERROR(EINVAL);
1303 1317
1304 /* 1318 if (bmx.bmv_iflags & (~BMV_IF_VALID))
1305 * Map input getbmapx structure to a getbmap
1306 * structure for xfs_getbmap.
1307 */
1308 GETBMAP_CONVERT(bmx, bm);
1309
1310 iflags = bmx.bmv_iflags;
1311
1312 if (iflags & (~BMV_IF_VALID))
1313 return -XFS_ERROR(EINVAL); 1319 return -XFS_ERROR(EINVAL);
1314 1320
1315 iflags |= BMV_IF_EXTENDED; 1321 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
1316 1322 (struct getbmapx *)arg+1);
1317 error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags);
1318 if (error) 1323 if (error)
1319 return -error; 1324 return -error;
1320 1325
1321 GETBMAP_CONVERT(bm, bmx); 1326 /* copy back header */
1322 1327 if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
1323 if (copy_to_user(arg, &bmx, sizeof(bmx)))
1324 return -XFS_ERROR(EFAULT); 1328 return -XFS_ERROR(EFAULT);
1325 1329
1326 return 0; 1330 return 0;
1327} 1331}
1328 1332
1329int 1333/*
1330xfs_ioctl( 1334 * Note: some of the ioctl's return positive numbers as a
1331 xfs_inode_t *ip, 1335 * byte count indicating success, such as readlink_by_handle.
1336 * So we don't "sign flip" like most other routines. This means
1337 * true errors need to be returned as a negative value.
1338 */
1339long
1340xfs_file_ioctl(
1332 struct file *filp, 1341 struct file *filp,
1333 int ioflags,
1334 unsigned int cmd, 1342 unsigned int cmd,
1335 void __user *arg) 1343 unsigned long p)
1336{ 1344{
1337 struct inode *inode = filp->f_path.dentry->d_inode; 1345 struct inode *inode = filp->f_path.dentry->d_inode;
1338 xfs_mount_t *mp = ip->i_mount; 1346 struct xfs_inode *ip = XFS_I(inode);
1347 struct xfs_mount *mp = ip->i_mount;
1348 void __user *arg = (void __user *)p;
1349 int ioflags = 0;
1339 int error; 1350 int error;
1340 1351
1341 xfs_itrace_entry(XFS_I(inode)); 1352 if (filp->f_mode & FMODE_NOCMTIME)
1342 switch (cmd) { 1353 ioflags |= IO_INVIS;
1343 1354
1355 xfs_itrace_entry(ip);
1356
1357 switch (cmd) {
1344 case XFS_IOC_ALLOCSP: 1358 case XFS_IOC_ALLOCSP:
1345 case XFS_IOC_FREESP: 1359 case XFS_IOC_FREESP:
1346 case XFS_IOC_RESVSP: 1360 case XFS_IOC_RESVSP:
@@ -1348,17 +1362,13 @@ xfs_ioctl(
1348 case XFS_IOC_ALLOCSP64: 1362 case XFS_IOC_ALLOCSP64:
1349 case XFS_IOC_FREESP64: 1363 case XFS_IOC_FREESP64:
1350 case XFS_IOC_RESVSP64: 1364 case XFS_IOC_RESVSP64:
1351 case XFS_IOC_UNRESVSP64: 1365 case XFS_IOC_UNRESVSP64: {
1352 /* 1366 xfs_flock64_t bf;
1353 * Only allow the sys admin to reserve space unless
1354 * unwritten extents are enabled.
1355 */
1356 if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
1357 !capable(CAP_SYS_ADMIN))
1358 return -EPERM;
1359
1360 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
1361 1367
1368 if (copy_from_user(&bf, arg, sizeof(bf)))
1369 return -XFS_ERROR(EFAULT);
1370 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
1371 }
1362 case XFS_IOC_DIOINFO: { 1372 case XFS_IOC_DIOINFO: {
1363 struct dioattr da; 1373 struct dioattr da;
1364 xfs_buftarg_t *target = 1374 xfs_buftarg_t *target =
@@ -1418,18 +1428,30 @@ xfs_ioctl(
1418 1428
1419 case XFS_IOC_FD_TO_HANDLE: 1429 case XFS_IOC_FD_TO_HANDLE:
1420 case XFS_IOC_PATH_TO_HANDLE: 1430 case XFS_IOC_PATH_TO_HANDLE:
1421 case XFS_IOC_PATH_TO_FSHANDLE: 1431 case XFS_IOC_PATH_TO_FSHANDLE: {
1422 return xfs_find_handle(cmd, arg); 1432 xfs_fsop_handlereq_t hreq;
1423 1433
1424 case XFS_IOC_OPEN_BY_HANDLE: 1434 if (copy_from_user(&hreq, arg, sizeof(hreq)))
1425 return xfs_open_by_handle(mp, arg, filp, inode); 1435 return -XFS_ERROR(EFAULT);
1436 return xfs_find_handle(cmd, &hreq);
1437 }
1438 case XFS_IOC_OPEN_BY_HANDLE: {
1439 xfs_fsop_handlereq_t hreq;
1426 1440
1441 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1442 return -XFS_ERROR(EFAULT);
1443 return xfs_open_by_handle(mp, &hreq, filp, inode);
1444 }
1427 case XFS_IOC_FSSETDM_BY_HANDLE: 1445 case XFS_IOC_FSSETDM_BY_HANDLE:
1428 return xfs_fssetdm_by_handle(mp, arg, inode); 1446 return xfs_fssetdm_by_handle(mp, arg, inode);
1429 1447
1430 case XFS_IOC_READLINK_BY_HANDLE: 1448 case XFS_IOC_READLINK_BY_HANDLE: {
1431 return xfs_readlink_by_handle(mp, arg, inode); 1449 xfs_fsop_handlereq_t hreq;
1432 1450
1451 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1452 return -XFS_ERROR(EFAULT);
1453 return xfs_readlink_by_handle(mp, &hreq, inode);
1454 }
1433 case XFS_IOC_ATTRLIST_BY_HANDLE: 1455 case XFS_IOC_ATTRLIST_BY_HANDLE:
1434 return xfs_attrlist_by_handle(mp, arg, inode); 1456 return xfs_attrlist_by_handle(mp, arg, inode);
1435 1457
@@ -1437,7 +1459,11 @@ xfs_ioctl(
1437 return xfs_attrmulti_by_handle(mp, arg, filp, inode); 1459 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
1438 1460
1439 case XFS_IOC_SWAPEXT: { 1461 case XFS_IOC_SWAPEXT: {
1440 error = xfs_swapext((struct xfs_swapext __user *)arg); 1462 struct xfs_swapext sxp;
1463
1464 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
1465 return -XFS_ERROR(EFAULT);
1466 error = xfs_swapext(&sxp);
1441 return -error; 1467 return -error;
1442 } 1468 }
1443 1469
@@ -1493,9 +1519,6 @@ xfs_ioctl(
1493 case XFS_IOC_FSGROWFSDATA: { 1519 case XFS_IOC_FSGROWFSDATA: {
1494 xfs_growfs_data_t in; 1520 xfs_growfs_data_t in;
1495 1521
1496 if (!capable(CAP_SYS_ADMIN))
1497 return -EPERM;
1498
1499 if (copy_from_user(&in, arg, sizeof(in))) 1522 if (copy_from_user(&in, arg, sizeof(in)))
1500 return -XFS_ERROR(EFAULT); 1523 return -XFS_ERROR(EFAULT);
1501 1524
@@ -1506,9 +1529,6 @@ xfs_ioctl(
1506 case XFS_IOC_FSGROWFSLOG: { 1529 case XFS_IOC_FSGROWFSLOG: {
1507 xfs_growfs_log_t in; 1530 xfs_growfs_log_t in;
1508 1531
1509 if (!capable(CAP_SYS_ADMIN))
1510 return -EPERM;
1511
1512 if (copy_from_user(&in, arg, sizeof(in))) 1532 if (copy_from_user(&in, arg, sizeof(in)))
1513 return -XFS_ERROR(EFAULT); 1533 return -XFS_ERROR(EFAULT);
1514 1534
@@ -1519,9 +1539,6 @@ xfs_ioctl(
1519 case XFS_IOC_FSGROWFSRT: { 1539 case XFS_IOC_FSGROWFSRT: {
1520 xfs_growfs_rt_t in; 1540 xfs_growfs_rt_t in;
1521 1541
1522 if (!capable(CAP_SYS_ADMIN))
1523 return -EPERM;
1524
1525 if (copy_from_user(&in, arg, sizeof(in))) 1542 if (copy_from_user(&in, arg, sizeof(in)))
1526 return -XFS_ERROR(EFAULT); 1543 return -XFS_ERROR(EFAULT);
1527 1544
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
new file mode 100644
index 000000000000..8c16bf2d7e03
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_ioctl.h
@@ -0,0 +1,82 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_IOCTL_H__
19#define __XFS_IOCTL_H__
20
21extern int
22xfs_ioc_space(
23 struct xfs_inode *ip,
24 struct inode *inode,
25 struct file *filp,
26 int ioflags,
27 unsigned int cmd,
28 xfs_flock64_t *bf);
29
30extern int
31xfs_find_handle(
32 unsigned int cmd,
33 xfs_fsop_handlereq_t *hreq);
34
35extern int
36xfs_open_by_handle(
37 xfs_mount_t *mp,
38 xfs_fsop_handlereq_t *hreq,
39 struct file *parfilp,
40 struct inode *parinode);
41
42extern int
43xfs_readlink_by_handle(
44 xfs_mount_t *mp,
45 xfs_fsop_handlereq_t *hreq,
46 struct inode *parinode);
47
48extern int
49xfs_attrmulti_attr_get(
50 struct inode *inode,
51 char *name,
52 char __user *ubuf,
53 __uint32_t *len,
54 __uint32_t flags);
55
56extern int
57 xfs_attrmulti_attr_set(
58 struct inode *inode,
59 char *name,
60 const char __user *ubuf,
61 __uint32_t len,
62 __uint32_t flags);
63
64extern int
65xfs_attrmulti_attr_remove(
66 struct inode *inode,
67 char *name,
68 __uint32_t flags);
69
70extern long
71xfs_file_ioctl(
72 struct file *filp,
73 unsigned int cmd,
74 unsigned long p);
75
76extern long
77xfs_file_compat_ioctl(
78 struct file *file,
79 unsigned int cmd,
80 unsigned long arg);
81
82#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index a4b254eb43b2..0504cece9f66 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -16,11 +16,7 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/init.h>
20#include <linux/ioctl.h> 19#include <linux/ioctl.h>
21#include <linux/syscalls.h>
22#include <linux/types.h>
23#include <linux/fs.h>
24#include <asm/uaccess.h> 20#include <asm/uaccess.h>
25#include "xfs.h" 21#include "xfs.h"
26#include "xfs_fs.h" 22#include "xfs_fs.h"
@@ -36,7 +32,6 @@
36#include "xfs_bmap_btree.h" 32#include "xfs_bmap_btree.h"
37#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
38#include "xfs_dir2_sf.h" 34#include "xfs_dir2_sf.h"
39#include "xfs_vfs.h"
40#include "xfs_vnode.h" 35#include "xfs_vnode.h"
41#include "xfs_dinode.h" 36#include "xfs_dinode.h"
42#include "xfs_inode.h" 37#include "xfs_inode.h"
@@ -44,221 +39,219 @@
44#include "xfs_error.h" 39#include "xfs_error.h"
45#include "xfs_dfrag.h" 40#include "xfs_dfrag.h"
46#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_fsops.h"
43#include "xfs_alloc.h"
44#include "xfs_rtalloc.h"
45#include "xfs_attr.h"
46#include "xfs_ioctl.h"
47#include "xfs_ioctl32.h" 47#include "xfs_ioctl32.h"
48 48
49#define _NATIVE_IOC(cmd, type) \ 49#define _NATIVE_IOC(cmd, type) \
50 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) 50 _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
51 51
52#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 52#ifdef BROKEN_X86_ALIGNMENT
53#define BROKEN_X86_ALIGNMENT 53STATIC int
54#define _PACKED __attribute__((packed)) 54xfs_compat_flock64_copyin(
55/* on ia32 l_start is on a 32-bit boundary */ 55 xfs_flock64_t *bf,
56typedef struct xfs_flock64_32 { 56 compat_xfs_flock64_t __user *arg32)
57 __s16 l_type;
58 __s16 l_whence;
59 __s64 l_start __attribute__((packed));
60 /* len == 0 means until end of file */
61 __s64 l_len __attribute__((packed));
62 __s32 l_sysid;
63 __u32 l_pid;
64 __s32 l_pad[4]; /* reserve area */
65} xfs_flock64_32_t;
66
67#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32)
68#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32)
69#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32)
70#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32)
71#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32)
72#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32)
73#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32)
74#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32)
75
76/* just account for different alignment */
77STATIC unsigned long
78xfs_ioctl32_flock(
79 unsigned long arg)
80{ 57{
81 xfs_flock64_32_t __user *p32 = (void __user *)arg; 58 if (get_user(bf->l_type, &arg32->l_type) ||
82 xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); 59 get_user(bf->l_whence, &arg32->l_whence) ||
83 60 get_user(bf->l_start, &arg32->l_start) ||
84 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || 61 get_user(bf->l_len, &arg32->l_len) ||
85 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || 62 get_user(bf->l_sysid, &arg32->l_sysid) ||
86 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || 63 get_user(bf->l_pid, &arg32->l_pid) ||
87 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || 64 copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
88 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || 65 return -XFS_ERROR(EFAULT);
89 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || 66 return 0;
90 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
91 return -EFAULT;
92
93 return (unsigned long)p;
94} 67}
95 68
96typedef struct compat_xfs_fsop_geom_v1 { 69STATIC int
97 __u32 blocksize; /* filesystem (data) block size */ 70xfs_compat_ioc_fsgeometry_v1(
98 __u32 rtextsize; /* realtime extent size */ 71 struct xfs_mount *mp,
99 __u32 agblocks; /* fsblocks in an AG */ 72 compat_xfs_fsop_geom_v1_t __user *arg32)
100 __u32 agcount; /* number of allocation groups */
101 __u32 logblocks; /* fsblocks in the log */
102 __u32 sectsize; /* (data) sector size, bytes */
103 __u32 inodesize; /* inode size in bytes */
104 __u32 imaxpct; /* max allowed inode space(%) */
105 __u64 datablocks; /* fsblocks in data subvolume */
106 __u64 rtblocks; /* fsblocks in realtime subvol */
107 __u64 rtextents; /* rt extents in realtime subvol*/
108 __u64 logstart; /* starting fsblock of the log */
109 unsigned char uuid[16]; /* unique id of the filesystem */
110 __u32 sunit; /* stripe unit, fsblocks */
111 __u32 swidth; /* stripe width, fsblocks */
112 __s32 version; /* structure version */
113 __u32 flags; /* superblock version flags */
114 __u32 logsectsize; /* log sector size, bytes */
115 __u32 rtsectsize; /* realtime sector size, bytes */
116 __u32 dirblocksize; /* directory block size, bytes */
117} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
118
119#define XFS_IOC_FSGEOMETRY_V1_32 \
120 _IOR ('X', 100, struct compat_xfs_fsop_geom_v1)
121
122STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg)
123{ 73{
124 compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; 74 xfs_fsop_geom_t fsgeo;
125 xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); 75 int error;
126 76
127 if (copy_in_user(p, p32, sizeof(*p32))) 77 error = xfs_fs_geometry(mp, &fsgeo, 3);
128 return -EFAULT; 78 if (error)
129 return (unsigned long)p; 79 return -error;
80 /* The 32-bit variant simply has some padding at the end */
81 if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
82 return -XFS_ERROR(EFAULT);
83 return 0;
130} 84}
131 85
132typedef struct compat_xfs_inogrp { 86STATIC int
133 __u64 xi_startino; /* starting inode number */ 87xfs_compat_growfs_data_copyin(
134 __s32 xi_alloccount; /* # bits set in allocmask */ 88 struct xfs_growfs_data *in,
135 __u64 xi_allocmask; /* mask of allocated inodes */ 89 compat_xfs_growfs_data_t __user *arg32)
136} __attribute__((packed)) compat_xfs_inogrp_t;
137
138STATIC int xfs_inumbers_fmt_compat(
139 void __user *ubuffer,
140 const xfs_inogrp_t *buffer,
141 long count,
142 long *written)
143{ 90{
144 compat_xfs_inogrp_t __user *p32 = ubuffer; 91 if (get_user(in->newblocks, &arg32->newblocks) ||
145 long i; 92 get_user(in->imaxpct, &arg32->imaxpct))
93 return -XFS_ERROR(EFAULT);
94 return 0;
95}
96
97STATIC int
98xfs_compat_growfs_rt_copyin(
99 struct xfs_growfs_rt *in,
100 compat_xfs_growfs_rt_t __user *arg32)
101{
102 if (get_user(in->newblocks, &arg32->newblocks) ||
103 get_user(in->extsize, &arg32->extsize))
104 return -XFS_ERROR(EFAULT);
105 return 0;
106}
107
108STATIC int
109xfs_inumbers_fmt_compat(
110 void __user *ubuffer,
111 const xfs_inogrp_t *buffer,
112 long count,
113 long *written)
114{
115 compat_xfs_inogrp_t __user *p32 = ubuffer;
116 long i;
146 117
147 for (i = 0; i < count; i++) { 118 for (i = 0; i < count; i++) {
148 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 119 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
149 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 120 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
150 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 121 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
151 return -EFAULT; 122 return -XFS_ERROR(EFAULT);
152 } 123 }
153 *written = count * sizeof(*p32); 124 *written = count * sizeof(*p32);
154 return 0; 125 return 0;
155} 126}
156 127
157#else 128#else
158
159#define xfs_inumbers_fmt_compat xfs_inumbers_fmt 129#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
160#define _PACKED 130#endif /* BROKEN_X86_ALIGNMENT */
161 131
162#endif 132STATIC int
133xfs_ioctl32_bstime_copyin(
134 xfs_bstime_t *bstime,
135 compat_xfs_bstime_t __user *bstime32)
136{
137 compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */
163 138
164/* XFS_IOC_FSBULKSTAT and friends */ 139 if (get_user(sec32, &bstime32->tv_sec) ||
140 get_user(bstime->tv_nsec, &bstime32->tv_nsec))
141 return -XFS_ERROR(EFAULT);
142 bstime->tv_sec = sec32;
143 return 0;
144}
145
146/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
147STATIC int
148xfs_ioctl32_bstat_copyin(
149 xfs_bstat_t *bstat,
150 compat_xfs_bstat_t __user *bstat32)
151{
152 if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
153 get_user(bstat->bs_mode, &bstat32->bs_mode) ||
154 get_user(bstat->bs_nlink, &bstat32->bs_nlink) ||
155 get_user(bstat->bs_uid, &bstat32->bs_uid) ||
156 get_user(bstat->bs_gid, &bstat32->bs_gid) ||
157 get_user(bstat->bs_rdev, &bstat32->bs_rdev) ||
158 get_user(bstat->bs_blksize, &bstat32->bs_blksize) ||
159 get_user(bstat->bs_size, &bstat32->bs_size) ||
160 xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
161 xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
162 xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
163 get_user(bstat->bs_blocks, &bstat32->bs_size) ||
164 get_user(bstat->bs_xflags, &bstat32->bs_size) ||
165 get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
166 get_user(bstat->bs_extents, &bstat32->bs_extents) ||
167 get_user(bstat->bs_gen, &bstat32->bs_gen) ||
168 get_user(bstat->bs_projid, &bstat32->bs_projid) ||
169 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
170 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
171 get_user(bstat->bs_aextents, &bstat32->bs_aextents))
172 return -XFS_ERROR(EFAULT);
173 return 0;
174}
165 175
166typedef struct compat_xfs_bstime { 176/* XFS_IOC_FSBULKSTAT and friends */
167 __s32 tv_sec; /* seconds */
168 __s32 tv_nsec; /* and nanoseconds */
169} compat_xfs_bstime_t;
170 177
171STATIC int xfs_bstime_store_compat( 178STATIC int
172 compat_xfs_bstime_t __user *p32, 179xfs_bstime_store_compat(
173 const xfs_bstime_t *p) 180 compat_xfs_bstime_t __user *p32,
181 const xfs_bstime_t *p)
174{ 182{
175 __s32 sec32; 183 __s32 sec32;
176 184
177 sec32 = p->tv_sec; 185 sec32 = p->tv_sec;
178 if (put_user(sec32, &p32->tv_sec) || 186 if (put_user(sec32, &p32->tv_sec) ||
179 put_user(p->tv_nsec, &p32->tv_nsec)) 187 put_user(p->tv_nsec, &p32->tv_nsec))
180 return -EFAULT; 188 return -XFS_ERROR(EFAULT);
181 return 0; 189 return 0;
182} 190}
183 191
184typedef struct compat_xfs_bstat { 192/* Return 0 on success or positive error (to xfs_bulkstat()) */
185 __u64 bs_ino; /* inode number */ 193STATIC int
186 __u16 bs_mode; /* type and mode */ 194xfs_bulkstat_one_fmt_compat(
187 __u16 bs_nlink; /* number of links */
188 __u32 bs_uid; /* user id */
189 __u32 bs_gid; /* group id */
190 __u32 bs_rdev; /* device value */
191 __s32 bs_blksize; /* block size */
192 __s64 bs_size; /* file size */
193 compat_xfs_bstime_t bs_atime; /* access time */
194 compat_xfs_bstime_t bs_mtime; /* modify time */
195 compat_xfs_bstime_t bs_ctime; /* inode change time */
196 int64_t bs_blocks; /* number of blocks */
197 __u32 bs_xflags; /* extended flags */
198 __s32 bs_extsize; /* extent size */
199 __s32 bs_extents; /* number of extents */
200 __u32 bs_gen; /* generation count */
201 __u16 bs_projid; /* project id */
202 unsigned char bs_pad[14]; /* pad space, unused */
203 __u32 bs_dmevmask; /* DMIG event mask */
204 __u16 bs_dmstate; /* DMIG state info */
205 __u16 bs_aextents; /* attribute number of extents */
206} _PACKED compat_xfs_bstat_t;
207
208STATIC int xfs_bulkstat_one_fmt_compat(
209 void __user *ubuffer, 195 void __user *ubuffer,
196 int ubsize,
197 int *ubused,
210 const xfs_bstat_t *buffer) 198 const xfs_bstat_t *buffer)
211{ 199{
212 compat_xfs_bstat_t __user *p32 = ubuffer; 200 compat_xfs_bstat_t __user *p32 = ubuffer;
213 201
214 if (put_user(buffer->bs_ino, &p32->bs_ino) || 202 if (ubsize < sizeof(*p32))
215 put_user(buffer->bs_mode, &p32->bs_mode) || 203 return XFS_ERROR(ENOMEM);
216 put_user(buffer->bs_nlink, &p32->bs_nlink) || 204
217 put_user(buffer->bs_uid, &p32->bs_uid) || 205 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
218 put_user(buffer->bs_gid, &p32->bs_gid) || 206 put_user(buffer->bs_mode, &p32->bs_mode) ||
219 put_user(buffer->bs_rdev, &p32->bs_rdev) || 207 put_user(buffer->bs_nlink, &p32->bs_nlink) ||
220 put_user(buffer->bs_blksize, &p32->bs_blksize) || 208 put_user(buffer->bs_uid, &p32->bs_uid) ||
221 put_user(buffer->bs_size, &p32->bs_size) || 209 put_user(buffer->bs_gid, &p32->bs_gid) ||
210 put_user(buffer->bs_rdev, &p32->bs_rdev) ||
211 put_user(buffer->bs_blksize, &p32->bs_blksize) ||
212 put_user(buffer->bs_size, &p32->bs_size) ||
222 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || 213 xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
223 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || 214 xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
224 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || 215 xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
225 put_user(buffer->bs_blocks, &p32->bs_blocks) || 216 put_user(buffer->bs_blocks, &p32->bs_blocks) ||
226 put_user(buffer->bs_xflags, &p32->bs_xflags) || 217 put_user(buffer->bs_xflags, &p32->bs_xflags) ||
227 put_user(buffer->bs_extsize, &p32->bs_extsize) || 218 put_user(buffer->bs_extsize, &p32->bs_extsize) ||
228 put_user(buffer->bs_extents, &p32->bs_extents) || 219 put_user(buffer->bs_extents, &p32->bs_extents) ||
229 put_user(buffer->bs_gen, &p32->bs_gen) || 220 put_user(buffer->bs_gen, &p32->bs_gen) ||
230 put_user(buffer->bs_projid, &p32->bs_projid) || 221 put_user(buffer->bs_projid, &p32->bs_projid) ||
231 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || 222 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
232 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 223 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
233 put_user(buffer->bs_aextents, &p32->bs_aextents)) 224 put_user(buffer->bs_aextents, &p32->bs_aextents))
234 return -EFAULT; 225 return XFS_ERROR(EFAULT);
235 return sizeof(*p32); 226 if (ubused)
227 *ubused = sizeof(*p32);
228 return 0;
236} 229}
237 230
238 231STATIC int
239 232xfs_bulkstat_one_compat(
240typedef struct compat_xfs_fsop_bulkreq { 233 xfs_mount_t *mp, /* mount point for filesystem */
241 compat_uptr_t lastip; /* last inode # pointer */ 234 xfs_ino_t ino, /* inode number to get data for */
242 __s32 icount; /* count of entries in buffer */ 235 void __user *buffer, /* buffer to place output in */
243 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 236 int ubsize, /* size of buffer */
244 compat_uptr_t ocount; /* output count pointer */ 237 void *private_data, /* my private data */
245} compat_xfs_fsop_bulkreq_t; 238 xfs_daddr_t bno, /* starting bno of inode cluster */
246 239 int *ubused, /* bytes used by me */
247#define XFS_IOC_FSBULKSTAT_32 \ 240 void *dibuff, /* on-disk inode buffer */
248 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) 241 int *stat) /* BULKSTAT_RV_... */
249#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ 242{
250 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) 243 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
251#define XFS_IOC_FSINUMBERS_32 \ 244 xfs_bulkstat_one_fmt_compat, bno,
252 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) 245 ubused, dibuff, stat);
246}
253 247
254/* copied from xfs_ioctl.c */ 248/* copied from xfs_ioctl.c */
255STATIC int 249STATIC int
256xfs_ioc_bulkstat_compat( 250xfs_compat_ioc_bulkstat(
257 xfs_mount_t *mp, 251 xfs_mount_t *mp,
258 unsigned int cmd, 252 unsigned int cmd,
259 void __user *arg) 253 compat_xfs_fsop_bulkreq_t __user *p32)
260{ 254{
261 compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg;
262 u32 addr; 255 u32 addr;
263 xfs_fsop_bulkreq_t bulkreq; 256 xfs_fsop_bulkreq_t bulkreq;
264 int count; /* # of records returned */ 257 int count; /* # of records returned */
@@ -270,20 +263,20 @@ xfs_ioc_bulkstat_compat(
270 /* should be called again (unused here, but used in dmapi) */ 263 /* should be called again (unused here, but used in dmapi) */
271 264
272 if (!capable(CAP_SYS_ADMIN)) 265 if (!capable(CAP_SYS_ADMIN))
273 return -EPERM; 266 return -XFS_ERROR(EPERM);
274 267
275 if (XFS_FORCED_SHUTDOWN(mp)) 268 if (XFS_FORCED_SHUTDOWN(mp))
276 return -XFS_ERROR(EIO); 269 return -XFS_ERROR(EIO);
277 270
278 if (get_user(addr, &p32->lastip)) 271 if (get_user(addr, &p32->lastip))
279 return -EFAULT; 272 return -XFS_ERROR(EFAULT);
280 bulkreq.lastip = compat_ptr(addr); 273 bulkreq.lastip = compat_ptr(addr);
281 if (get_user(bulkreq.icount, &p32->icount) || 274 if (get_user(bulkreq.icount, &p32->icount) ||
282 get_user(addr, &p32->ubuffer)) 275 get_user(addr, &p32->ubuffer))
283 return -EFAULT; 276 return -XFS_ERROR(EFAULT);
284 bulkreq.ubuffer = compat_ptr(addr); 277 bulkreq.ubuffer = compat_ptr(addr);
285 if (get_user(addr, &p32->ocount)) 278 if (get_user(addr, &p32->ocount))
286 return -EFAULT; 279 return -XFS_ERROR(EFAULT);
287 bulkreq.ocount = compat_ptr(addr); 280 bulkreq.ocount = compat_ptr(addr);
288 281
289 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 282 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
@@ -295,17 +288,22 @@ xfs_ioc_bulkstat_compat(
295 if (bulkreq.ubuffer == NULL) 288 if (bulkreq.ubuffer == NULL)
296 return -XFS_ERROR(EINVAL); 289 return -XFS_ERROR(EINVAL);
297 290
298 if (cmd == XFS_IOC_FSINUMBERS) 291 if (cmd == XFS_IOC_FSINUMBERS_32) {
299 error = xfs_inumbers(mp, &inlast, &count, 292 error = xfs_inumbers(mp, &inlast, &count,
300 bulkreq.ubuffer, xfs_inumbers_fmt_compat); 293 bulkreq.ubuffer, xfs_inumbers_fmt_compat);
301 else { 294 } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
302 /* declare a var to get a warning in case the type changes */ 295 int res;
303 bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; 296
297 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
298 sizeof(compat_xfs_bstat_t),
299 NULL, 0, NULL, NULL, &res);
300 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
304 error = xfs_bulkstat(mp, &inlast, &count, 301 error = xfs_bulkstat(mp, &inlast, &count,
305 xfs_bulkstat_one, formatter, 302 xfs_bulkstat_one_compat, NULL,
306 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 303 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
307 BULKSTAT_FG_QUICK, &done); 304 BULKSTAT_FG_QUICK, &done);
308 } 305 } else
306 error = XFS_ERROR(EINVAL);
309 if (error) 307 if (error)
310 return -error; 308 return -error;
311 309
@@ -321,63 +319,306 @@ xfs_ioc_bulkstat_compat(
321 return 0; 319 return 0;
322} 320}
323 321
322STATIC int
323xfs_compat_handlereq_copyin(
324 xfs_fsop_handlereq_t *hreq,
325 compat_xfs_fsop_handlereq_t __user *arg32)
326{
327 compat_xfs_fsop_handlereq_t hreq32;
328
329 if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
330 return -XFS_ERROR(EFAULT);
331
332 hreq->fd = hreq32.fd;
333 hreq->path = compat_ptr(hreq32.path);
334 hreq->oflags = hreq32.oflags;
335 hreq->ihandle = compat_ptr(hreq32.ihandle);
336 hreq->ihandlen = hreq32.ihandlen;
337 hreq->ohandle = compat_ptr(hreq32.ohandle);
338 hreq->ohandlen = compat_ptr(hreq32.ohandlen);
324 339
340 return 0;
341}
325 342
326typedef struct compat_xfs_fsop_handlereq { 343/*
327 __u32 fd; /* fd for FD_TO_HANDLE */ 344 * Convert userspace handle data into inode.
328 compat_uptr_t path; /* user pathname */ 345 *
329 __u32 oflags; /* open flags */ 346 * We use the fact that all the fsop_handlereq ioctl calls have a data
330 compat_uptr_t ihandle; /* user supplied handle */ 347 * structure argument whose first component is always a xfs_fsop_handlereq_t,
331 __u32 ihandlen; /* user supplied length */ 348 * so we can pass that sub structure into this handy, shared routine.
332 compat_uptr_t ohandle; /* user buffer for handle */ 349 *
333 compat_uptr_t ohandlen; /* user buffer length */ 350 * If no error, caller must always iput the returned inode.
334} compat_xfs_fsop_handlereq_t; 351 */
335 352STATIC int
336#define XFS_IOC_PATH_TO_FSHANDLE_32 \ 353xfs_vget_fsop_handlereq_compat(
337 _IOWR('X', 104, struct compat_xfs_fsop_handlereq) 354 xfs_mount_t *mp,
338#define XFS_IOC_PATH_TO_HANDLE_32 \ 355 struct inode *parinode, /* parent inode pointer */
339 _IOWR('X', 105, struct compat_xfs_fsop_handlereq) 356 compat_xfs_fsop_handlereq_t *hreq,
340#define XFS_IOC_FD_TO_HANDLE_32 \ 357 struct inode **inode)
341 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
342#define XFS_IOC_OPEN_BY_HANDLE_32 \
343 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
344#define XFS_IOC_READLINK_BY_HANDLE_32 \
345 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
346
347STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg)
348{ 358{
349 compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; 359 void __user *hanp;
350 xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); 360 size_t hlen;
351 u32 addr; 361 xfs_fid_t *xfid;
352 362 xfs_handle_t *handlep;
353 if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || 363 xfs_handle_t handle;
354 get_user(addr, &p32->path) || 364 xfs_inode_t *ip;
355 put_user(compat_ptr(addr), &p->path) || 365 xfs_ino_t ino;
356 copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || 366 __u32 igen;
357 get_user(addr, &p32->ihandle) || 367 int error;
358 put_user(compat_ptr(addr), &p->ihandle) || 368
359 copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || 369 /*
360 get_user(addr, &p32->ohandle) || 370 * Only allow handle opens under a directory.
361 put_user(compat_ptr(addr), &p->ohandle) || 371 */
362 get_user(addr, &p32->ohandlen) || 372 if (!S_ISDIR(parinode->i_mode))
363 put_user(compat_ptr(addr), &p->ohandlen)) 373 return XFS_ERROR(ENOTDIR);
364 return -EFAULT; 374
365 375 hanp = compat_ptr(hreq->ihandle);
366 return (unsigned long)p; 376 hlen = hreq->ihandlen;
377 handlep = &handle;
378
379 if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep))
380 return XFS_ERROR(EINVAL);
381 if (copy_from_user(handlep, hanp, hlen))
382 return XFS_ERROR(EFAULT);
383 if (hlen < sizeof(*handlep))
384 memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
385 if (hlen > sizeof(handlep->ha_fsid)) {
386 if (handlep->ha_fid.fid_len !=
387 (hlen - sizeof(handlep->ha_fsid) -
388 sizeof(handlep->ha_fid.fid_len)) ||
389 handlep->ha_fid.fid_pad)
390 return XFS_ERROR(EINVAL);
391 }
392
393 /*
394 * Crack the handle, obtain the inode # & generation #
395 */
396 xfid = (struct xfs_fid *)&handlep->ha_fid;
397 if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
398 ino = xfid->fid_ino;
399 igen = xfid->fid_gen;
400 } else {
401 return XFS_ERROR(EINVAL);
402 }
403
404 /*
405 * Get the XFS inode, building a Linux inode to go with it.
406 */
407 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0);
408 if (error)
409 return error;
410 if (ip == NULL)
411 return XFS_ERROR(EIO);
412 if (ip->i_d.di_gen != igen) {
413 xfs_iput_new(ip, XFS_ILOCK_SHARED);
414 return XFS_ERROR(ENOENT);
415 }
416
417 xfs_iunlock(ip, XFS_ILOCK_SHARED);
418
419 *inode = VFS_I(ip);
420 return 0;
367} 421}
368 422
423STATIC int
424xfs_compat_attrlist_by_handle(
425 xfs_mount_t *mp,
426 void __user *arg,
427 struct inode *parinode)
428{
429 int error;
430 attrlist_cursor_kern_t *cursor;
431 compat_xfs_fsop_attrlist_handlereq_t al_hreq;
432 struct inode *inode;
433 char *kbuf;
434
435 if (!capable(CAP_SYS_ADMIN))
436 return -XFS_ERROR(EPERM);
437 if (copy_from_user(&al_hreq, arg,
438 sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
439 return -XFS_ERROR(EFAULT);
440 if (al_hreq.buflen > XATTR_LIST_MAX)
441 return -XFS_ERROR(EINVAL);
442
443 /*
444 * Reject flags, only allow namespaces.
445 */
446 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
447 return -XFS_ERROR(EINVAL);
448
449 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq,
450 &inode);
451 if (error)
452 goto out;
453
454 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
455 if (!kbuf)
456 goto out_vn_rele;
457
458 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
459 error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen,
460 al_hreq.flags, cursor);
461 if (error)
462 goto out_kfree;
463
464 if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
465 error = -EFAULT;
466
467 out_kfree:
468 kfree(kbuf);
469 out_vn_rele:
470 iput(inode);
471 out:
472 return -error;
473}
369 474
370STATIC long 475STATIC int
371xfs_compat_ioctl( 476xfs_compat_attrmulti_by_handle(
372 int mode, 477 xfs_mount_t *mp,
373 struct file *file, 478 void __user *arg,
374 unsigned cmd, 479 struct inode *parinode)
375 unsigned long arg) 480{
481 int error;
482 compat_xfs_attr_multiop_t *ops;
483 compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
484 struct inode *inode;
485 unsigned int i, size;
486 char *attr_name;
487
488 if (!capable(CAP_SYS_ADMIN))
489 return -XFS_ERROR(EPERM);
490 if (copy_from_user(&am_hreq, arg,
491 sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
492 return -XFS_ERROR(EFAULT);
493
494 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq,
495 &inode);
496 if (error)
497 goto out;
498
499 error = E2BIG;
500 size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
501 if (!size || size > 16 * PAGE_SIZE)
502 goto out_vn_rele;
503
504 error = ENOMEM;
505 ops = kmalloc(size, GFP_KERNEL);
506 if (!ops)
507 goto out_vn_rele;
508
509 error = EFAULT;
510 if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
511 goto out_kfree_ops;
512
513 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
514 if (!attr_name)
515 goto out_kfree_ops;
516
517
518 error = 0;
519 for (i = 0; i < am_hreq.opcount; i++) {
520 ops[i].am_error = strncpy_from_user(attr_name,
521 compat_ptr(ops[i].am_attrname),
522 MAXNAMELEN);
523 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
524 error = -ERANGE;
525 if (ops[i].am_error < 0)
526 break;
527
528 switch (ops[i].am_opcode) {
529 case ATTR_OP_GET:
530 ops[i].am_error = xfs_attrmulti_attr_get(inode,
531 attr_name,
532 compat_ptr(ops[i].am_attrvalue),
533 &ops[i].am_length, ops[i].am_flags);
534 break;
535 case ATTR_OP_SET:
536 ops[i].am_error = xfs_attrmulti_attr_set(inode,
537 attr_name,
538 compat_ptr(ops[i].am_attrvalue),
539 ops[i].am_length, ops[i].am_flags);
540 break;
541 case ATTR_OP_REMOVE:
542 ops[i].am_error = xfs_attrmulti_attr_remove(inode,
543 attr_name, ops[i].am_flags);
544 break;
545 default:
546 ops[i].am_error = EINVAL;
547 }
548 }
549
550 if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
551 error = XFS_ERROR(EFAULT);
552
553 kfree(attr_name);
554 out_kfree_ops:
555 kfree(ops);
556 out_vn_rele:
557 iput(inode);
558 out:
559 return -error;
560}
561
562STATIC int
563xfs_compat_fssetdm_by_handle(
564 xfs_mount_t *mp,
565 void __user *arg,
566 struct inode *parinode)
567{
568 int error;
569 struct fsdmidata fsd;
570 compat_xfs_fsop_setdm_handlereq_t dmhreq;
571 struct inode *inode;
572
573 if (!capable(CAP_MKNOD))
574 return -XFS_ERROR(EPERM);
575 if (copy_from_user(&dmhreq, arg,
576 sizeof(compat_xfs_fsop_setdm_handlereq_t)))
577 return -XFS_ERROR(EFAULT);
578
579 error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq,
580 &inode);
581 if (error)
582 return -error;
583
584 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
585 error = -XFS_ERROR(EPERM);
586 goto out;
587 }
588
589 if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
590 error = -XFS_ERROR(EFAULT);
591 goto out;
592 }
593
594 error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask,
595 fsd.fsd_dmstate);
596
597out:
598 iput(inode);
599 return error;
600}
601
602long
603xfs_file_compat_ioctl(
604 struct file *filp,
605 unsigned cmd,
606 unsigned long p)
376{ 607{
377 struct inode *inode = file->f_path.dentry->d_inode; 608 struct inode *inode = filp->f_path.dentry->d_inode;
378 int error; 609 struct xfs_inode *ip = XFS_I(inode);
610 struct xfs_mount *mp = ip->i_mount;
611 void __user *arg = (void __user *)p;
612 int ioflags = 0;
613 int error;
614
615 if (filp->f_mode & FMODE_NOCMTIME)
616 ioflags |= IO_INVIS;
617
618 xfs_itrace_entry(ip);
379 619
380 switch (cmd) { 620 switch (cmd) {
621 /* No size or alignment issues on any arch */
381 case XFS_IOC_DIOINFO: 622 case XFS_IOC_DIOINFO:
382 case XFS_IOC_FSGEOMETRY: 623 case XFS_IOC_FSGEOMETRY:
383 case XFS_IOC_FSGETXATTR: 624 case XFS_IOC_FSGETXATTR:
@@ -387,48 +628,18 @@ xfs_compat_ioctl(
387 case XFS_IOC_GETBMAP: 628 case XFS_IOC_GETBMAP:
388 case XFS_IOC_GETBMAPA: 629 case XFS_IOC_GETBMAPA:
389 case XFS_IOC_GETBMAPX: 630 case XFS_IOC_GETBMAPX:
390/* not handled
391 case XFS_IOC_FSSETDM_BY_HANDLE:
392 case XFS_IOC_ATTRLIST_BY_HANDLE:
393 case XFS_IOC_ATTRMULTI_BY_HANDLE:
394*/
395 case XFS_IOC_FSCOUNTS: 631 case XFS_IOC_FSCOUNTS:
396 case XFS_IOC_SET_RESBLKS: 632 case XFS_IOC_SET_RESBLKS:
397 case XFS_IOC_GET_RESBLKS: 633 case XFS_IOC_GET_RESBLKS:
398 case XFS_IOC_FSGROWFSDATA:
399 case XFS_IOC_FSGROWFSLOG: 634 case XFS_IOC_FSGROWFSLOG:
400 case XFS_IOC_FSGROWFSRT:
401 case XFS_IOC_FREEZE: 635 case XFS_IOC_FREEZE:
402 case XFS_IOC_THAW: 636 case XFS_IOC_THAW:
403 case XFS_IOC_GOINGDOWN: 637 case XFS_IOC_GOINGDOWN:
404 case XFS_IOC_ERROR_INJECTION: 638 case XFS_IOC_ERROR_INJECTION:
405 case XFS_IOC_ERROR_CLEARALL: 639 case XFS_IOC_ERROR_CLEARALL:
406 break; 640 return xfs_file_ioctl(filp, cmd, p);
407 641#ifndef BROKEN_X86_ALIGNMENT
408 case XFS_IOC32_GETXFLAGS: 642 /* These are handled fine if no alignment issues */
409 case XFS_IOC32_SETXFLAGS:
410 case XFS_IOC32_GETVERSION:
411 cmd = _NATIVE_IOC(cmd, long);
412 break;
413#ifdef BROKEN_X86_ALIGNMENT
414 /* xfs_flock_t has wrong u32 vs u64 alignment */
415 case XFS_IOC_ALLOCSP_32:
416 case XFS_IOC_FREESP_32:
417 case XFS_IOC_ALLOCSP64_32:
418 case XFS_IOC_FREESP64_32:
419 case XFS_IOC_RESVSP_32:
420 case XFS_IOC_UNRESVSP_32:
421 case XFS_IOC_RESVSP64_32:
422 case XFS_IOC_UNRESVSP64_32:
423 arg = xfs_ioctl32_flock(arg);
424 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
425 break;
426 case XFS_IOC_FSGEOMETRY_V1_32:
427 arg = xfs_ioctl32_geom_v1(arg);
428 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1);
429 break;
430
431#else /* These are handled fine if no alignment issues */
432 case XFS_IOC_ALLOCSP: 643 case XFS_IOC_ALLOCSP:
433 case XFS_IOC_FREESP: 644 case XFS_IOC_FREESP:
434 case XFS_IOC_RESVSP: 645 case XFS_IOC_RESVSP:
@@ -438,51 +649,97 @@ xfs_compat_ioctl(
438 case XFS_IOC_RESVSP64: 649 case XFS_IOC_RESVSP64:
439 case XFS_IOC_UNRESVSP64: 650 case XFS_IOC_UNRESVSP64:
440 case XFS_IOC_FSGEOMETRY_V1: 651 case XFS_IOC_FSGEOMETRY_V1:
441 break; 652 case XFS_IOC_FSGROWFSDATA:
653 case XFS_IOC_FSGROWFSRT:
654 return xfs_file_ioctl(filp, cmd, p);
655#else
656 case XFS_IOC_ALLOCSP_32:
657 case XFS_IOC_FREESP_32:
658 case XFS_IOC_ALLOCSP64_32:
659 case XFS_IOC_FREESP64_32:
660 case XFS_IOC_RESVSP_32:
661 case XFS_IOC_UNRESVSP_32:
662 case XFS_IOC_RESVSP64_32:
663 case XFS_IOC_UNRESVSP64_32: {
664 struct xfs_flock64 bf;
442 665
443 /* xfs_bstat_t still has wrong u32 vs u64 alignment */ 666 if (xfs_compat_flock64_copyin(&bf, arg))
444 case XFS_IOC_SWAPEXT: 667 return -XFS_ERROR(EFAULT);
445 break; 668 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
669 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
670 }
671 case XFS_IOC_FSGEOMETRY_V1_32:
672 return xfs_compat_ioc_fsgeometry_v1(mp, arg);
673 case XFS_IOC_FSGROWFSDATA_32: {
674 struct xfs_growfs_data in;
675
676 if (xfs_compat_growfs_data_copyin(&in, arg))
677 return -XFS_ERROR(EFAULT);
678 error = xfs_growfs_data(mp, &in);
679 return -error;
680 }
681 case XFS_IOC_FSGROWFSRT_32: {
682 struct xfs_growfs_rt in;
446 683
684 if (xfs_compat_growfs_rt_copyin(&in, arg))
685 return -XFS_ERROR(EFAULT);
686 error = xfs_growfs_rt(mp, &in);
687 return -error;
688 }
447#endif 689#endif
690 /* long changes size, but xfs only copiese out 32 bits */
691 case XFS_IOC_GETXFLAGS_32:
692 case XFS_IOC_SETXFLAGS_32:
693 case XFS_IOC_GETVERSION_32:
694 cmd = _NATIVE_IOC(cmd, long);
695 return xfs_file_ioctl(filp, cmd, p);
696 case XFS_IOC_SWAPEXT: {
697 struct xfs_swapext sxp;
698 struct compat_xfs_swapext __user *sxu = arg;
699
700 /* Bulk copy in up to the sx_stat field, then copy bstat */
701 if (copy_from_user(&sxp, sxu,
702 offsetof(struct xfs_swapext, sx_stat)) ||
703 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
704 return -XFS_ERROR(EFAULT);
705 error = xfs_swapext(&sxp);
706 return -error;
707 }
448 case XFS_IOC_FSBULKSTAT_32: 708 case XFS_IOC_FSBULKSTAT_32:
449 case XFS_IOC_FSBULKSTAT_SINGLE_32: 709 case XFS_IOC_FSBULKSTAT_SINGLE_32:
450 case XFS_IOC_FSINUMBERS_32: 710 case XFS_IOC_FSINUMBERS_32:
451 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); 711 return xfs_compat_ioc_bulkstat(mp, cmd, arg);
452 return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount,
453 cmd, (void __user*)arg);
454 case XFS_IOC_FD_TO_HANDLE_32: 712 case XFS_IOC_FD_TO_HANDLE_32:
455 case XFS_IOC_PATH_TO_HANDLE_32: 713 case XFS_IOC_PATH_TO_HANDLE_32:
456 case XFS_IOC_PATH_TO_FSHANDLE_32: 714 case XFS_IOC_PATH_TO_FSHANDLE_32: {
457 case XFS_IOC_OPEN_BY_HANDLE_32: 715 struct xfs_fsop_handlereq hreq;
458 case XFS_IOC_READLINK_BY_HANDLE_32: 716
459 arg = xfs_ioctl32_fshandle(arg); 717 if (xfs_compat_handlereq_copyin(&hreq, arg))
718 return -XFS_ERROR(EFAULT);
460 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); 719 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
461 break; 720 return xfs_find_handle(cmd, &hreq);
462 default:
463 return -ENOIOCTLCMD;
464 } 721 }
722 case XFS_IOC_OPEN_BY_HANDLE_32: {
723 struct xfs_fsop_handlereq hreq;
465 724
466 error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); 725 if (xfs_compat_handlereq_copyin(&hreq, arg))
467 xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); 726 return -XFS_ERROR(EFAULT);
468 727 return xfs_open_by_handle(mp, &hreq, filp, inode);
469 return error; 728 }
470} 729 case XFS_IOC_READLINK_BY_HANDLE_32: {
471 730 struct xfs_fsop_handlereq hreq;
472long
473xfs_file_compat_ioctl(
474 struct file *file,
475 unsigned cmd,
476 unsigned long arg)
477{
478 return xfs_compat_ioctl(0, file, cmd, arg);
479}
480 731
481long 732 if (xfs_compat_handlereq_copyin(&hreq, arg))
482xfs_file_compat_invis_ioctl( 733 return -XFS_ERROR(EFAULT);
483 struct file *file, 734 return xfs_readlink_by_handle(mp, &hreq, inode);
484 unsigned cmd, 735 }
485 unsigned long arg) 736 case XFS_IOC_ATTRLIST_BY_HANDLE_32:
486{ 737 return xfs_compat_attrlist_by_handle(mp, arg, inode);
487 return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); 738 case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
739 return xfs_compat_attrmulti_by_handle(mp, arg, inode);
740 case XFS_IOC_FSSETDM_BY_HANDLE_32:
741 return xfs_compat_fssetdm_by_handle(mp, arg, inode);
742 default:
743 return -XFS_ERROR(ENOIOCTLCMD);
744 }
488} 745}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 02de6e62ee37..1024c4f8ba0d 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -18,7 +18,217 @@
18#ifndef __XFS_IOCTL32_H__ 18#ifndef __XFS_IOCTL32_H__
19#define __XFS_IOCTL32_H__ 19#define __XFS_IOCTL32_H__
20 20
21extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); 21#include <linux/compat.h>
22extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); 22
23/*
24 * on 32-bit arches, ioctl argument structures may have different sizes
25 * and/or alignment. We define compat structures which match the
26 * 32-bit sizes/alignments here, and their associated ioctl numbers.
27 *
28 * xfs_ioctl32.c contains routines to copy these structures in and out.
29 */
30
31/* stock kernel-level ioctls we support */
32#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS
33#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS
34#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION
35
36/*
37 * On intel, even if sizes match, alignment and/or padding may differ.
38 */
39#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
40#define BROKEN_X86_ALIGNMENT
41#define __compat_packed __attribute__((packed))
42#else
43#define __compat_packed
44#endif
45
46typedef struct compat_xfs_bstime {
47 compat_time_t tv_sec; /* seconds */
48 __s32 tv_nsec; /* and nanoseconds */
49} compat_xfs_bstime_t;
50
51typedef struct compat_xfs_bstat {
52 __u64 bs_ino; /* inode number */
53 __u16 bs_mode; /* type and mode */
54 __u16 bs_nlink; /* number of links */
55 __u32 bs_uid; /* user id */
56 __u32 bs_gid; /* group id */
57 __u32 bs_rdev; /* device value */
58 __s32 bs_blksize; /* block size */
59 __s64 bs_size; /* file size */
60 compat_xfs_bstime_t bs_atime; /* access time */
61 compat_xfs_bstime_t bs_mtime; /* modify time */
62 compat_xfs_bstime_t bs_ctime; /* inode change time */
63 int64_t bs_blocks; /* number of blocks */
64 __u32 bs_xflags; /* extended flags */
65 __s32 bs_extsize; /* extent size */
66 __s32 bs_extents; /* number of extents */
67 __u32 bs_gen; /* generation count */
68 __u16 bs_projid; /* project id */
69 unsigned char bs_pad[14]; /* pad space, unused */
70 __u32 bs_dmevmask; /* DMIG event mask */
71 __u16 bs_dmstate; /* DMIG state info */
72 __u16 bs_aextents; /* attribute number of extents */
73} __compat_packed compat_xfs_bstat_t;
74
75typedef struct compat_xfs_fsop_bulkreq {
76 compat_uptr_t lastip; /* last inode # pointer */
77 __s32 icount; /* count of entries in buffer */
78 compat_uptr_t ubuffer; /* user buffer for inode desc. */
79 compat_uptr_t ocount; /* output count pointer */
80} compat_xfs_fsop_bulkreq_t;
81
82#define XFS_IOC_FSBULKSTAT_32 \
83 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
84#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
85 _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
86#define XFS_IOC_FSINUMBERS_32 \
87 _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
88
89typedef struct compat_xfs_fsop_handlereq {
90 __u32 fd; /* fd for FD_TO_HANDLE */
91 compat_uptr_t path; /* user pathname */
92 __u32 oflags; /* open flags */
93 compat_uptr_t ihandle; /* user supplied handle */
94 __u32 ihandlen; /* user supplied length */
95 compat_uptr_t ohandle; /* user buffer for handle */
96 compat_uptr_t ohandlen; /* user buffer length */
97} compat_xfs_fsop_handlereq_t;
98
99#define XFS_IOC_PATH_TO_FSHANDLE_32 \
100 _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
101#define XFS_IOC_PATH_TO_HANDLE_32 \
102 _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
103#define XFS_IOC_FD_TO_HANDLE_32 \
104 _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
105#define XFS_IOC_OPEN_BY_HANDLE_32 \
106 _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
107#define XFS_IOC_READLINK_BY_HANDLE_32 \
108 _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
109
110/* The bstat field in the swapext struct needs translation */
111typedef struct compat_xfs_swapext {
112 __int64_t sx_version; /* version */
113 __int64_t sx_fdtarget; /* fd of target file */
114 __int64_t sx_fdtmp; /* fd of tmp file */
115 xfs_off_t sx_offset; /* offset into file */
116 xfs_off_t sx_length; /* leng from offset */
117 char sx_pad[16]; /* pad space, unused */
118 compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
119} __compat_packed compat_xfs_swapext_t;
120
121#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
122
123typedef struct compat_xfs_fsop_attrlist_handlereq {
124 struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
125 struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */
126 __u32 flags; /* which namespace to use */
127 __u32 buflen; /* length of buffer supplied */
128 compat_uptr_t buffer; /* returned names */
129} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
130
131/* Note: actually this is read/write */
132#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
133 _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
134
135/* am_opcodes defined in xfs_fs.h */
136typedef struct compat_xfs_attr_multiop {
137 __u32 am_opcode;
138 __s32 am_error;
139 compat_uptr_t am_attrname;
140 compat_uptr_t am_attrvalue;
141 __u32 am_length;
142 __u32 am_flags;
143} compat_xfs_attr_multiop_t;
144
145typedef struct compat_xfs_fsop_attrmulti_handlereq {
146 struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
147 __u32 opcount;/* count of following multiop */
148 /* ptr to compat_xfs_attr_multiop */
149 compat_uptr_t ops; /* attr_multi data */
150} compat_xfs_fsop_attrmulti_handlereq_t;
151
152#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
153 _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
154
155typedef struct compat_xfs_fsop_setdm_handlereq {
156 struct compat_xfs_fsop_handlereq hreq; /* handle information */
157 /* ptr to struct fsdmidata */
158 compat_uptr_t data; /* DMAPI data */
159} compat_xfs_fsop_setdm_handlereq_t;
160
161#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
162 _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
163
164#ifdef BROKEN_X86_ALIGNMENT
165/* on ia32 l_start is on a 32-bit boundary */
166typedef struct compat_xfs_flock64 {
167 __s16 l_type;
168 __s16 l_whence;
169 __s64 l_start __attribute__((packed));
170 /* len == 0 means until end of file */
171 __s64 l_len __attribute__((packed));
172 __s32 l_sysid;
173 __u32 l_pid;
174 __s32 l_pad[4]; /* reserve area */
175} compat_xfs_flock64_t;
176
177#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
178#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
179#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
180#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
181#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
182#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
183#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
184#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
185
186typedef struct compat_xfs_fsop_geom_v1 {
187 __u32 blocksize; /* filesystem (data) block size */
188 __u32 rtextsize; /* realtime extent size */
189 __u32 agblocks; /* fsblocks in an AG */
190 __u32 agcount; /* number of allocation groups */
191 __u32 logblocks; /* fsblocks in the log */
192 __u32 sectsize; /* (data) sector size, bytes */
193 __u32 inodesize; /* inode size in bytes */
194 __u32 imaxpct; /* max allowed inode space(%) */
195 __u64 datablocks; /* fsblocks in data subvolume */
196 __u64 rtblocks; /* fsblocks in realtime subvol */
197 __u64 rtextents; /* rt extents in realtime subvol*/
198 __u64 logstart; /* starting fsblock of the log */
199 unsigned char uuid[16]; /* unique id of the filesystem */
200 __u32 sunit; /* stripe unit, fsblocks */
201 __u32 swidth; /* stripe width, fsblocks */
202 __s32 version; /* structure version */
203 __u32 flags; /* superblock version flags */
204 __u32 logsectsize; /* log sector size, bytes */
205 __u32 rtsectsize; /* realtime sector size, bytes */
206 __u32 dirblocksize; /* directory block size, bytes */
207} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
208
209#define XFS_IOC_FSGEOMETRY_V1_32 \
210 _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
211
212typedef struct compat_xfs_inogrp {
213 __u64 xi_startino; /* starting inode number */
214 __s32 xi_alloccount; /* # bits set in allocmask */
215 __u64 xi_allocmask; /* mask of allocated inodes */
216} __attribute__((packed)) compat_xfs_inogrp_t;
217
218/* These growfs input structures have padding on the end, so must translate */
219typedef struct compat_xfs_growfs_data {
220 __u64 newblocks; /* new data subvol size, fsblocks */
221 __u32 imaxpct; /* new inode space percentage limit */
222} __attribute__((packed)) compat_xfs_growfs_data_t;
223
224typedef struct compat_xfs_growfs_rt {
225 __u64 newblocks; /* new realtime size, fsblocks */
226 __u32 extsize; /* new realtime extent size, fsblocks */
227} __attribute__((packed)) compat_xfs_growfs_rt_t;
228
229#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
230#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt)
231
232#endif /* BROKEN_X86_ALIGNMENT */
23 233
24#endif /* __XFS_IOCTL32_H__ */ 234#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 095d271f3434..7aa53fefc67f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -53,6 +53,7 @@
53#include <linux/namei.h> 53#include <linux/namei.h>
54#include <linux/security.h> 54#include <linux/security.h>
55#include <linux/falloc.h> 55#include <linux/falloc.h>
56#include <linux/fiemap.h>
56 57
57/* 58/*
58 * Bring the atime in the XFS inode uptodate. 59 * Bring the atime in the XFS inode uptodate.
@@ -64,14 +65,14 @@ xfs_synchronize_atime(
64{ 65{
65 struct inode *inode = VFS_I(ip); 66 struct inode *inode = VFS_I(ip);
66 67
67 if (inode) { 68 if (!(inode->i_state & I_CLEAR)) {
68 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; 69 ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
69 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; 70 ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
70 } 71 }
71} 72}
72 73
73/* 74/*
74 * If the linux inode exists, mark it dirty. 75 * If the linux inode is valid, mark it dirty.
75 * Used when commiting a dirty inode into a transaction so that 76 * Used when commiting a dirty inode into a transaction so that
76 * the inode will get written back by the linux code 77 * the inode will get written back by the linux code
77 */ 78 */
@@ -81,7 +82,7 @@ xfs_mark_inode_dirty_sync(
81{ 82{
82 struct inode *inode = VFS_I(ip); 83 struct inode *inode = VFS_I(ip);
83 84
84 if (inode) 85 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
85 mark_inode_dirty_sync(inode); 86 mark_inode_dirty_sync(inode);
86} 87}
87 88
@@ -128,7 +129,7 @@ xfs_ichgtime(
128 if (sync_it) { 129 if (sync_it) {
129 SYNCHRONIZE(); 130 SYNCHRONIZE();
130 ip->i_update_core = 1; 131 ip->i_update_core = 1;
131 mark_inode_dirty_sync(inode); 132 xfs_mark_inode_dirty_sync(ip);
132 } 133 }
133} 134}
134 135
@@ -158,8 +159,6 @@ xfs_init_security(
158 } 159 }
159 160
160 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); 161 error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
161 if (!error)
162 xfs_iflags_set(ip, XFS_IMODIFIED);
163 162
164 kfree(name); 163 kfree(name);
165 kfree(value); 164 kfree(value);
@@ -260,7 +259,6 @@ xfs_vn_mknod(
260 error = _ACL_INHERIT(inode, mode, default_acl); 259 error = _ACL_INHERIT(inode, mode, default_acl);
261 if (unlikely(error)) 260 if (unlikely(error))
262 goto out_cleanup_inode; 261 goto out_cleanup_inode;
263 xfs_iflags_set(ip, XFS_IMODIFIED);
264 _ACL_FREE(default_acl); 262 _ACL_FREE(default_acl);
265 } 263 }
266 264
@@ -366,21 +364,17 @@ xfs_vn_link(
366 struct inode *dir, 364 struct inode *dir,
367 struct dentry *dentry) 365 struct dentry *dentry)
368{ 366{
369 struct inode *inode; /* inode of guy being linked to */ 367 struct inode *inode = old_dentry->d_inode;
370 struct xfs_name name; 368 struct xfs_name name;
371 int error; 369 int error;
372 370
373 inode = old_dentry->d_inode;
374 xfs_dentry_to_name(&name, dentry); 371 xfs_dentry_to_name(&name, dentry);
375 372
376 igrab(inode);
377 error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 373 error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
378 if (unlikely(error)) { 374 if (unlikely(error))
379 iput(inode);
380 return -error; 375 return -error;
381 }
382 376
383 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 377 atomic_inc(&inode->i_count);
384 d_instantiate(dentry, inode); 378 d_instantiate(dentry, inode);
385 return 0; 379 return 0;
386} 380}
@@ -601,7 +595,7 @@ xfs_vn_setattr(
601 struct dentry *dentry, 595 struct dentry *dentry,
602 struct iattr *iattr) 596 struct iattr *iattr)
603{ 597{
604 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); 598 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
605} 599}
606 600
607/* 601/*
@@ -642,7 +636,7 @@ xfs_vn_fallocate(
642 636
643 xfs_ilock(ip, XFS_IOLOCK_EXCL); 637 xfs_ilock(ip, XFS_IOLOCK_EXCL);
644 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 638 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
645 0, NULL, XFS_ATTR_NOLOCK); 639 0, XFS_ATTR_NOLOCK);
646 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 640 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
647 offset + len > i_size_read(inode)) 641 offset + len > i_size_read(inode))
648 new_size = offset + len; 642 new_size = offset + len;
@@ -653,7 +647,7 @@ xfs_vn_fallocate(
653 647
654 iattr.ia_valid = ATTR_SIZE; 648 iattr.ia_valid = ATTR_SIZE;
655 iattr.ia_size = new_size; 649 iattr.ia_size = new_size;
656 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); 650 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
657 } 651 }
658 652
659 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 653 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -661,6 +655,88 @@ out_error:
661 return error; 655 return error;
662} 656}
663 657
658#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
659
660/*
661 * Call fiemap helper to fill in user data.
662 * Returns positive errors to xfs_getbmap.
663 */
664STATIC int
665xfs_fiemap_format(
666 void **arg,
667 struct getbmapx *bmv,
668 int *full)
669{
670 int error;
671 struct fiemap_extent_info *fieinfo = *arg;
672 u32 fiemap_flags = 0;
673 u64 logical, physical, length;
674
675 /* Do nothing for a hole */
676 if (bmv->bmv_block == -1LL)
677 return 0;
678
679 logical = BBTOB(bmv->bmv_offset);
680 physical = BBTOB(bmv->bmv_block);
681 length = BBTOB(bmv->bmv_length);
682
683 if (bmv->bmv_oflags & BMV_OF_PREALLOC)
684 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
685 else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
686 fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
687 physical = 0; /* no block yet */
688 }
689 if (bmv->bmv_oflags & BMV_OF_LAST)
690 fiemap_flags |= FIEMAP_EXTENT_LAST;
691
692 error = fiemap_fill_next_extent(fieinfo, logical, physical,
693 length, fiemap_flags);
694 if (error > 0) {
695 error = 0;
696 *full = 1; /* user array now full */
697 }
698
699 return -error;
700}
701
702STATIC int
703xfs_vn_fiemap(
704 struct inode *inode,
705 struct fiemap_extent_info *fieinfo,
706 u64 start,
707 u64 length)
708{
709 xfs_inode_t *ip = XFS_I(inode);
710 struct getbmapx bm;
711 int error;
712
713 error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
714 if (error)
715 return error;
716
717 /* Set up bmap header for xfs internal routine */
718 bm.bmv_offset = BTOBB(start);
719 /* Special case for whole file */
720 if (length == FIEMAP_MAX_OFFSET)
721 bm.bmv_length = -1LL;
722 else
723 bm.bmv_length = BTOBB(length);
724
725 /* our formatter will tell xfs_getbmap when to stop. */
726 bm.bmv_count = MAXEXTNUM;
727 bm.bmv_iflags = BMV_IF_PREALLOC;
728 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
729 bm.bmv_iflags |= BMV_IF_ATTRFORK;
730 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
731 bm.bmv_iflags |= BMV_IF_DELALLOC;
732
733 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
734 if (error)
735 return -error;
736
737 return 0;
738}
739
664static const struct inode_operations xfs_inode_operations = { 740static const struct inode_operations xfs_inode_operations = {
665 .permission = xfs_vn_permission, 741 .permission = xfs_vn_permission,
666 .truncate = xfs_vn_truncate, 742 .truncate = xfs_vn_truncate,
@@ -671,6 +747,7 @@ static const struct inode_operations xfs_inode_operations = {
671 .removexattr = generic_removexattr, 747 .removexattr = generic_removexattr,
672 .listxattr = xfs_vn_listxattr, 748 .listxattr = xfs_vn_listxattr,
673 .fallocate = xfs_vn_fallocate, 749 .fallocate = xfs_vn_fallocate,
750 .fiemap = xfs_vn_fiemap,
674}; 751};
675 752
676static const struct inode_operations xfs_dir_inode_operations = { 753static const struct inode_operations xfs_dir_inode_operations = {
@@ -766,12 +843,20 @@ xfs_diflags_to_iflags(
766 * When reading existing inodes from disk this is called directly 843 * When reading existing inodes from disk this is called directly
767 * from xfs_iget, when creating a new inode it is called from 844 * from xfs_iget, when creating a new inode it is called from
768 * xfs_ialloc after setting up the inode. 845 * xfs_ialloc after setting up the inode.
846 *
847 * We are always called with an uninitialised linux inode here.
848 * We need to initialise the necessary fields and take a reference
849 * on it.
769 */ 850 */
770void 851void
771xfs_setup_inode( 852xfs_setup_inode(
772 struct xfs_inode *ip) 853 struct xfs_inode *ip)
773{ 854{
774 struct inode *inode = ip->i_vnode; 855 struct inode *inode = &ip->i_vnode;
856
857 inode->i_ino = ip->i_ino;
858 inode->i_state = I_NEW|I_LOCK;
859 inode_add_to_lists(ip->i_mount->m_super, inode);
775 860
776 inode->i_mode = ip->i_d.di_mode; 861 inode->i_mode = ip->i_d.di_mode;
777 inode->i_nlink = ip->i_d.di_nlink; 862 inode->i_nlink = ip->i_d.di_nlink;
@@ -799,7 +884,6 @@ xfs_setup_inode(
799 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; 884 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
800 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; 885 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
801 xfs_diflags_to_iflags(inode, ip); 886 xfs_diflags_to_iflags(inode, ip);
802 xfs_iflags_clear(ip, XFS_IMODIFIED);
803 887
804 switch (inode->i_mode & S_IFMT) { 888 switch (inode->i_mode & S_IFMT) {
805 case S_IFREG: 889 case S_IFREG:
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 8b1a1e31dc21..ef41c92ce66e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -22,7 +22,6 @@ struct xfs_inode;
22 22
23extern const struct file_operations xfs_file_operations; 23extern const struct file_operations xfs_file_operations;
24extern const struct file_operations xfs_dir_file_operations; 24extern const struct file_operations xfs_dir_file_operations;
25extern const struct file_operations xfs_invis_file_operations;
26 25
27extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); 26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
28 27
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index cc0f7b3a9795..507492d6dccd 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -21,18 +21,12 @@
21#include <linux/types.h> 21#include <linux/types.h>
22 22
23/* 23/*
24 * Some types are conditional depending on the target system.
25 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. 24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
26 * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well 25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
27 * as requiring XFS_BIG_BLKNOS to be set.
28 */ 26 */
29#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 27#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
30# define XFS_BIG_BLKNOS 1 28# define XFS_BIG_BLKNOS 1
31# if BITS_PER_LONG == 64 29# define XFS_BIG_INUMS 1
32# define XFS_BIG_INUMS 1
33# else
34# define XFS_BIG_INUMS 0
35# endif
36#else 30#else
37# define XFS_BIG_BLKNOS 0 31# define XFS_BIG_BLKNOS 0
38# define XFS_BIG_INUMS 0 32# define XFS_BIG_INUMS 0
@@ -77,6 +71,7 @@
77#include <linux/spinlock.h> 71#include <linux/spinlock.h>
78#include <linux/random.h> 72#include <linux/random.h>
79#include <linux/ctype.h> 73#include <linux/ctype.h>
74#include <linux/writeback.h>
80 75
81#include <asm/page.h> 76#include <asm/page.h>
82#include <asm/div64.h> 77#include <asm/div64.h>
@@ -85,7 +80,6 @@
85#include <asm/byteorder.h> 80#include <asm/byteorder.h>
86#include <asm/unaligned.h> 81#include <asm/unaligned.h>
87 82
88#include <xfs_vfs.h>
89#include <xfs_cred.h> 83#include <xfs_cred.h>
90#include <xfs_vnode.h> 84#include <xfs_vnode.h>
91#include <xfs_stats.h> 85#include <xfs_stats.h>
@@ -107,7 +101,6 @@
107#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 101#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
108#endif 102#endif
109 103
110#define restricted_chown xfs_params.restrict_chown.val
111#define irix_sgid_inherit xfs_params.sgid_inherit.val 104#define irix_sgid_inherit xfs_params.sgid_inherit.val
112#define irix_symlink_mode xfs_params.symlink_mode.val 105#define irix_symlink_mode xfs_params.symlink_mode.val
113#define xfs_panic_mask xfs_params.panic_mask.val 106#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1957e5357d04..7e90daa0d1d1 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,7 +51,6 @@
51#include "xfs_vnodeops.h" 51#include "xfs_vnodeops.h"
52 52
53#include <linux/capability.h> 53#include <linux/capability.h>
54#include <linux/mount.h>
55#include <linux/writeback.h> 54#include <linux/writeback.h>
56 55
57 56
@@ -243,7 +242,7 @@ xfs_read(
243 242
244 if (unlikely(ioflags & IO_ISDIRECT)) { 243 if (unlikely(ioflags & IO_ISDIRECT)) {
245 if (inode->i_mapping->nrpages) 244 if (inode->i_mapping->nrpages)
246 ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), 245 ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
247 -1, FI_REMAPF_LOCKED); 246 -1, FI_REMAPF_LOCKED);
248 mutex_unlock(&inode->i_mutex); 247 mutex_unlock(&inode->i_mutex);
249 if (ret) { 248 if (ret) {
@@ -668,15 +667,8 @@ start:
668 if (new_size > xip->i_size) 667 if (new_size > xip->i_size)
669 xip->i_new_size = new_size; 668 xip->i_new_size = new_size;
670 669
671 /* 670 if (likely(!(ioflags & IO_INVIS)))
672 * We're not supposed to change timestamps in readonly-mounted
673 * filesystems. Throw it away if anyone asks us.
674 */
675 if (likely(!(ioflags & IO_INVIS) &&
676 !mnt_want_write(file->f_path.mnt))) {
677 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 671 xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
678 mnt_drop_write(file->f_path.mnt);
679 }
680 672
681 /* 673 /*
682 * If the offset is beyond the size of the file, we have a couple 674 * If the offset is beyond the size of the file, we have a couple
@@ -715,7 +707,6 @@ start:
715 } 707 }
716 } 708 }
717 709
718retry:
719 /* We can write back this queue in page reclaim */ 710 /* We can write back this queue in page reclaim */
720 current->backing_dev_info = mapping->backing_dev_info; 711 current->backing_dev_info = mapping->backing_dev_info;
721 712
@@ -771,6 +762,17 @@ retry:
771 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) 762 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
772 ret = wait_on_sync_kiocb(iocb); 763 ret = wait_on_sync_kiocb(iocb);
773 764
765 isize = i_size_read(inode);
766 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
767 *offset = isize;
768
769 if (*offset > xip->i_size) {
770 xfs_ilock(xip, XFS_ILOCK_EXCL);
771 if (*offset > xip->i_size)
772 xip->i_size = *offset;
773 xfs_iunlock(xip, XFS_ILOCK_EXCL);
774 }
775
774 if (ret == -ENOSPC && 776 if (ret == -ENOSPC &&
775 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { 777 DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
776 xfs_iunlock(xip, iolock); 778 xfs_iunlock(xip, iolock);
@@ -784,20 +786,7 @@ retry:
784 xfs_ilock(xip, iolock); 786 xfs_ilock(xip, iolock);
785 if (error) 787 if (error)
786 goto out_unlock_internal; 788 goto out_unlock_internal;
787 pos = xip->i_size; 789 goto start;
788 ret = 0;
789 goto retry;
790 }
791
792 isize = i_size_read(inode);
793 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
794 *offset = isize;
795
796 if (*offset > xip->i_size) {
797 xfs_ilock(xip, XFS_ILOCK_EXCL);
798 if (*offset > xip->i_size)
799 xip->i_size = *offset;
800 xfs_iunlock(xip, XFS_ILOCK_EXCL);
801 } 790 }
802 791
803 error = -ret; 792 error = -ret;
@@ -855,13 +844,7 @@ retry:
855int 844int
856xfs_bdstrat_cb(struct xfs_buf *bp) 845xfs_bdstrat_cb(struct xfs_buf *bp)
857{ 846{
858 xfs_mount_t *mp; 847 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
859
860 mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *);
861 if (!XFS_FORCED_SHUTDOWN(mp)) {
862 xfs_buf_iorequest(bp);
863 return 0;
864 } else {
865 xfs_buftrace("XFS__BDSTRAT IOERROR", bp); 848 xfs_buftrace("XFS__BDSTRAT IOERROR", bp);
866 /* 849 /*
867 * Metadata write that didn't get logged but 850 * Metadata write that didn't get logged but
@@ -874,6 +857,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp)
874 else 857 else
875 return (xfs_bioerror(bp)); 858 return (xfs_bioerror(bp));
876 } 859 }
860
861 xfs_buf_iorequest(bp);
862 return 0;
877} 863}
878 864
879/* 865/*
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index 3d5b67c075c7..c3526d445f6a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -53,11 +53,15 @@ xfs_read_xfsstats(
53 { "icluster", XFSSTAT_END_INODE_CLUSTER }, 53 { "icluster", XFSSTAT_END_INODE_CLUSTER },
54 { "vnodes", XFSSTAT_END_VNODE_OPS }, 54 { "vnodes", XFSSTAT_END_VNODE_OPS },
55 { "buf", XFSSTAT_END_BUF }, 55 { "buf", XFSSTAT_END_BUF },
56 { "abtb2", XFSSTAT_END_ABTB_V2 },
57 { "abtc2", XFSSTAT_END_ABTC_V2 },
58 { "bmbt2", XFSSTAT_END_BMBT_V2 },
59 { "ibt2", XFSSTAT_END_IBT_V2 },
56 }; 60 };
57 61
58 /* Loop over all stats groups */ 62 /* Loop over all stats groups */
59 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { 63 for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) {
60 len += sprintf(buffer + len, xstats[i].desc); 64 len += sprintf(buffer + len, "%s", xstats[i].desc);
61 /* inner loop does each group */ 65 /* inner loop does each group */
62 while (j < xstats[i].endpoint) { 66 while (j < xstats[i].endpoint) {
63 val = 0; 67 val = 0;
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index e83820febc9f..736854b1ca1a 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -118,6 +118,71 @@ struct xfsstats {
118 __uint32_t xb_page_retries; 118 __uint32_t xb_page_retries;
119 __uint32_t xb_page_found; 119 __uint32_t xb_page_found;
120 __uint32_t xb_get_read; 120 __uint32_t xb_get_read;
121/* Version 2 btree counters */
122#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
123 __uint32_t xs_abtb_2_lookup;
124 __uint32_t xs_abtb_2_compare;
125 __uint32_t xs_abtb_2_insrec;
126 __uint32_t xs_abtb_2_delrec;
127 __uint32_t xs_abtb_2_newroot;
128 __uint32_t xs_abtb_2_killroot;
129 __uint32_t xs_abtb_2_increment;
130 __uint32_t xs_abtb_2_decrement;
131 __uint32_t xs_abtb_2_lshift;
132 __uint32_t xs_abtb_2_rshift;
133 __uint32_t xs_abtb_2_split;
134 __uint32_t xs_abtb_2_join;
135 __uint32_t xs_abtb_2_alloc;
136 __uint32_t xs_abtb_2_free;
137 __uint32_t xs_abtb_2_moves;
138#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
139 __uint32_t xs_abtc_2_lookup;
140 __uint32_t xs_abtc_2_compare;
141 __uint32_t xs_abtc_2_insrec;
142 __uint32_t xs_abtc_2_delrec;
143 __uint32_t xs_abtc_2_newroot;
144 __uint32_t xs_abtc_2_killroot;
145 __uint32_t xs_abtc_2_increment;
146 __uint32_t xs_abtc_2_decrement;
147 __uint32_t xs_abtc_2_lshift;
148 __uint32_t xs_abtc_2_rshift;
149 __uint32_t xs_abtc_2_split;
150 __uint32_t xs_abtc_2_join;
151 __uint32_t xs_abtc_2_alloc;
152 __uint32_t xs_abtc_2_free;
153 __uint32_t xs_abtc_2_moves;
154#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
155 __uint32_t xs_bmbt_2_lookup;
156 __uint32_t xs_bmbt_2_compare;
157 __uint32_t xs_bmbt_2_insrec;
158 __uint32_t xs_bmbt_2_delrec;
159 __uint32_t xs_bmbt_2_newroot;
160 __uint32_t xs_bmbt_2_killroot;
161 __uint32_t xs_bmbt_2_increment;
162 __uint32_t xs_bmbt_2_decrement;
163 __uint32_t xs_bmbt_2_lshift;
164 __uint32_t xs_bmbt_2_rshift;
165 __uint32_t xs_bmbt_2_split;
166 __uint32_t xs_bmbt_2_join;
167 __uint32_t xs_bmbt_2_alloc;
168 __uint32_t xs_bmbt_2_free;
169 __uint32_t xs_bmbt_2_moves;
170#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
171 __uint32_t xs_ibt_2_lookup;
172 __uint32_t xs_ibt_2_compare;
173 __uint32_t xs_ibt_2_insrec;
174 __uint32_t xs_ibt_2_delrec;
175 __uint32_t xs_ibt_2_newroot;
176 __uint32_t xs_ibt_2_killroot;
177 __uint32_t xs_ibt_2_increment;
178 __uint32_t xs_ibt_2_decrement;
179 __uint32_t xs_ibt_2_lshift;
180 __uint32_t xs_ibt_2_rshift;
181 __uint32_t xs_ibt_2_split;
182 __uint32_t xs_ibt_2_join;
183 __uint32_t xs_ibt_2_alloc;
184 __uint32_t xs_ibt_2_free;
185 __uint32_t xs_ibt_2_moves;
121/* Extra precision counters */ 186/* Extra precision counters */
122 __uint64_t xs_xstrat_bytes; 187 __uint64_t xs_xstrat_bytes;
123 __uint64_t xs_write_bytes; 188 __uint64_t xs_write_bytes;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 37ebe36056eb..36f6cc703ef2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -18,7 +18,6 @@
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_bit.h" 19#include "xfs_bit.h"
20#include "xfs_log.h" 20#include "xfs_log.h"
21#include "xfs_clnt.h"
22#include "xfs_inum.h" 21#include "xfs_inum.h"
23#include "xfs_trans.h" 22#include "xfs_trans.h"
24#include "xfs_sb.h" 23#include "xfs_sb.h"
@@ -36,6 +35,7 @@
36#include "xfs_dinode.h" 35#include "xfs_dinode.h"
37#include "xfs_inode.h" 36#include "xfs_inode.h"
38#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
39#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
40#include "xfs_bmap.h" 40#include "xfs_bmap.h"
41#include "xfs_rtalloc.h" 41#include "xfs_rtalloc.h"
@@ -48,7 +48,6 @@
48#include "xfs_buf_item.h" 48#include "xfs_buf_item.h"
49#include "xfs_utils.h" 49#include "xfs_utils.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_vfsops.h"
52#include "xfs_version.h" 51#include "xfs_version.h"
53#include "xfs_log_priv.h" 52#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h" 53#include "xfs_trans_priv.h"
@@ -58,6 +57,7 @@
58#include "xfs_extfree_item.h" 57#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h" 58#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h" 59#include "xfs_inode_item.h"
60#include "xfs_sync.h"
61 61
62#include <linux/namei.h> 62#include <linux/namei.h>
63#include <linux/init.h> 63#include <linux/init.h>
@@ -70,36 +70,9 @@
70 70
71static struct quotactl_ops xfs_quotactl_operations; 71static struct quotactl_ops xfs_quotactl_operations;
72static struct super_operations xfs_super_operations; 72static struct super_operations xfs_super_operations;
73static kmem_zone_t *xfs_vnode_zone;
74static kmem_zone_t *xfs_ioend_zone; 73static kmem_zone_t *xfs_ioend_zone;
75mempool_t *xfs_ioend_pool; 74mempool_t *xfs_ioend_pool;
76 75
77STATIC struct xfs_mount_args *
78xfs_args_allocate(
79 struct super_block *sb,
80 int silent)
81{
82 struct xfs_mount_args *args;
83
84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
88 args->logbufs = args->logbufsize = -1;
89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
90
91 /* Copy the already-parsed mount(2) flags we're interested in */
92 if (sb->s_flags & MS_DIRSYNC)
93 args->flags |= XFSMNT_DIRSYNC;
94 if (sb->s_flags & MS_SYNCHRONOUS)
95 args->flags |= XFSMNT_WSYNC;
96 if (silent)
97 args->flags |= XFSMNT_QUIET;
98 args->flags |= XFSMNT_32BITINODES;
99
100 return args;
101}
102
103#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ 76#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
104#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ 77#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
105#define MNTOPT_LOGDEV "logdev" /* log device */ 78#define MNTOPT_LOGDEV "logdev" /* log device */
@@ -188,26 +161,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
188 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
189} 162}
190 163
164/*
165 * This function fills in xfs_mount_t fields based on mount args.
166 * Note: the superblock has _not_ yet been read in.
167 *
168 * Note that this function leaks the various device name allocations on
169 * failure. The caller takes care of them.
170 */
191STATIC int 171STATIC int
192xfs_parseargs( 172xfs_parseargs(
193 struct xfs_mount *mp, 173 struct xfs_mount *mp,
194 char *options, 174 char *options,
195 struct xfs_mount_args *args, 175 char **mtpt)
196 int update)
197{ 176{
177 struct super_block *sb = mp->m_super;
198 char *this_char, *value, *eov; 178 char *this_char, *value, *eov;
199 int dsunit, dswidth, vol_dsunit, vol_dswidth; 179 int dsunit = 0;
200 int iosize; 180 int dswidth = 0;
181 int iosize = 0;
201 int dmapi_implies_ikeep = 1; 182 int dmapi_implies_ikeep = 1;
183 uchar_t iosizelog = 0;
184
185 /*
186 * Copy binary VFS mount flags we are interested in.
187 */
188 if (sb->s_flags & MS_RDONLY)
189 mp->m_flags |= XFS_MOUNT_RDONLY;
190 if (sb->s_flags & MS_DIRSYNC)
191 mp->m_flags |= XFS_MOUNT_DIRSYNC;
192 if (sb->s_flags & MS_SYNCHRONOUS)
193 mp->m_flags |= XFS_MOUNT_WSYNC;
194
195 /*
196 * Set some default flags that could be cleared by the mount option
197 * parsing.
198 */
199 mp->m_flags |= XFS_MOUNT_BARRIER;
200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
202 202
203 args->flags |= XFSMNT_BARRIER; 203 /*
204 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 204 * These can be overridden by the mount option parsing.
205 */
206 mp->m_logbufs = -1;
207 mp->m_logbsize = -1;
205 208
206 if (!options) 209 if (!options)
207 goto done; 210 goto done;
208 211
209 iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0;
210
211 while ((this_char = strsep(&options, ",")) != NULL) { 212 while ((this_char = strsep(&options, ",")) != NULL) {
212 if (!*this_char) 213 if (!*this_char)
213 continue; 214 continue;
@@ -221,7 +222,7 @@ xfs_parseargs(
221 this_char); 222 this_char);
222 return EINVAL; 223 return EINVAL;
223 } 224 }
224 args->logbufs = simple_strtoul(value, &eov, 10); 225 mp->m_logbufs = simple_strtoul(value, &eov, 10);
225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 226 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
226 if (!value || !*value) { 227 if (!value || !*value) {
227 cmn_err(CE_WARN, 228 cmn_err(CE_WARN,
@@ -229,7 +230,7 @@ xfs_parseargs(
229 this_char); 230 this_char);
230 return EINVAL; 231 return EINVAL;
231 } 232 }
232 args->logbufsize = suffix_strtoul(value, &eov, 10); 233 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
233 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 234 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
234 if (!value || !*value) { 235 if (!value || !*value) {
235 cmn_err(CE_WARN, 236 cmn_err(CE_WARN,
@@ -237,7 +238,9 @@ xfs_parseargs(
237 this_char); 238 this_char);
238 return EINVAL; 239 return EINVAL;
239 } 240 }
240 strncpy(args->logname, value, MAXNAMELEN); 241 mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
242 if (!mp->m_logname)
243 return ENOMEM;
241 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 244 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
242 if (!value || !*value) { 245 if (!value || !*value) {
243 cmn_err(CE_WARN, 246 cmn_err(CE_WARN,
@@ -245,7 +248,9 @@ xfs_parseargs(
245 this_char); 248 this_char);
246 return EINVAL; 249 return EINVAL;
247 } 250 }
248 strncpy(args->mtpt, value, MAXNAMELEN); 251 *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
252 if (!*mtpt)
253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 254 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 cmn_err(CE_WARN,
@@ -253,7 +258,9 @@ xfs_parseargs(
253 this_char); 258 this_char);
254 return EINVAL; 259 return EINVAL;
255 } 260 }
256 strncpy(args->rtname, value, MAXNAMELEN); 261 mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
262 if (!mp->m_rtname)
263 return ENOMEM;
257 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 264 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
258 if (!value || !*value) { 265 if (!value || !*value) {
259 cmn_err(CE_WARN, 266 cmn_err(CE_WARN,
@@ -262,8 +269,7 @@ xfs_parseargs(
262 return EINVAL; 269 return EINVAL;
263 } 270 }
264 iosize = simple_strtoul(value, &eov, 10); 271 iosize = simple_strtoul(value, &eov, 10);
265 args->flags |= XFSMNT_IOSIZE; 272 iosizelog = ffs(iosize) - 1;
266 args->iosizelog = (uint8_t) iosize;
267 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 273 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
268 if (!value || !*value) { 274 if (!value || !*value) {
269 cmn_err(CE_WARN, 275 cmn_err(CE_WARN,
@@ -272,8 +278,7 @@ xfs_parseargs(
272 return EINVAL; 278 return EINVAL;
273 } 279 }
274 iosize = suffix_strtoul(value, &eov, 10); 280 iosize = suffix_strtoul(value, &eov, 10);
275 args->flags |= XFSMNT_IOSIZE; 281 iosizelog = ffs(iosize) - 1;
276 args->iosizelog = ffs(iosize) - 1;
277 } else if (!strcmp(this_char, MNTOPT_GRPID) || 282 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
278 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 283 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
279 mp->m_flags |= XFS_MOUNT_GRPID; 284 mp->m_flags |= XFS_MOUNT_GRPID;
@@ -281,23 +286,25 @@ xfs_parseargs(
281 !strcmp(this_char, MNTOPT_SYSVGROUPS)) { 286 !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
282 mp->m_flags &= ~XFS_MOUNT_GRPID; 287 mp->m_flags &= ~XFS_MOUNT_GRPID;
283 } else if (!strcmp(this_char, MNTOPT_WSYNC)) { 288 } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
284 args->flags |= XFSMNT_WSYNC; 289 mp->m_flags |= XFS_MOUNT_WSYNC;
285 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { 290 } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
286 args->flags |= XFSMNT_OSYNCISOSYNC; 291 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
287 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { 292 } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
288 args->flags |= XFSMNT_NORECOVERY; 293 mp->m_flags |= XFS_MOUNT_NORECOVERY;
289 } else if (!strcmp(this_char, MNTOPT_INO64)) { 294 } else if (!strcmp(this_char, MNTOPT_INO64)) {
290 args->flags |= XFSMNT_INO64; 295#if XFS_BIG_INUMS
291#if !XFS_BIG_INUMS 296 mp->m_flags |= XFS_MOUNT_INO64;
297 mp->m_inoadd = XFS_INO64_OFFSET;
298#else
292 cmn_err(CE_WARN, 299 cmn_err(CE_WARN,
293 "XFS: %s option not allowed on this system", 300 "XFS: %s option not allowed on this system",
294 this_char); 301 this_char);
295 return EINVAL; 302 return EINVAL;
296#endif 303#endif
297 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { 304 } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
298 args->flags |= XFSMNT_NOALIGN; 305 mp->m_flags |= XFS_MOUNT_NOALIGN;
299 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { 306 } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
300 args->flags |= XFSMNT_SWALLOC; 307 mp->m_flags |= XFS_MOUNT_SWALLOC;
301 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 308 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
302 if (!value || !*value) { 309 if (!value || !*value) {
303 cmn_err(CE_WARN, 310 cmn_err(CE_WARN,
@@ -315,7 +322,7 @@ xfs_parseargs(
315 } 322 }
316 dswidth = simple_strtoul(value, &eov, 10); 323 dswidth = simple_strtoul(value, &eov, 10);
317 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 324 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
318 args->flags &= ~XFSMNT_32BITINODES; 325 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
319#if !XFS_BIG_INUMS 326#if !XFS_BIG_INUMS
320 cmn_err(CE_WARN, 327 cmn_err(CE_WARN,
321 "XFS: %s option not allowed on this system", 328 "XFS: %s option not allowed on this system",
@@ -323,56 +330,61 @@ xfs_parseargs(
323 return EINVAL; 330 return EINVAL;
324#endif 331#endif
325 } else if (!strcmp(this_char, MNTOPT_NOUUID)) { 332 } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
326 args->flags |= XFSMNT_NOUUID; 333 mp->m_flags |= XFS_MOUNT_NOUUID;
327 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 334 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
328 args->flags |= XFSMNT_BARRIER; 335 mp->m_flags |= XFS_MOUNT_BARRIER;
329 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { 336 } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
330 args->flags &= ~XFSMNT_BARRIER; 337 mp->m_flags &= ~XFS_MOUNT_BARRIER;
331 } else if (!strcmp(this_char, MNTOPT_IKEEP)) { 338 } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
332 args->flags |= XFSMNT_IKEEP; 339 mp->m_flags |= XFS_MOUNT_IKEEP;
333 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { 340 } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
334 dmapi_implies_ikeep = 0; 341 dmapi_implies_ikeep = 0;
335 args->flags &= ~XFSMNT_IKEEP; 342 mp->m_flags &= ~XFS_MOUNT_IKEEP;
336 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { 343 } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
337 args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; 344 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
338 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { 345 } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
339 args->flags2 |= XFSMNT2_COMPAT_IOSIZE; 346 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
340 } else if (!strcmp(this_char, MNTOPT_ATTR2)) { 347 } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
341 args->flags |= XFSMNT_ATTR2; 348 mp->m_flags |= XFS_MOUNT_ATTR2;
342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 349 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
343 args->flags &= ~XFSMNT_ATTR2; 350 mp->m_flags &= ~XFS_MOUNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2; 351 mp->m_flags |= XFS_MOUNT_NOATTR2;
345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 352 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
346 args->flags2 |= XFSMNT2_FILESTREAMS; 353 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 354 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
348 args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); 355 mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
349 args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); 356 XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
357 XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
358 XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
350 } else if (!strcmp(this_char, MNTOPT_QUOTA) || 359 } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
351 !strcmp(this_char, MNTOPT_UQUOTA) || 360 !strcmp(this_char, MNTOPT_UQUOTA) ||
352 !strcmp(this_char, MNTOPT_USRQUOTA)) { 361 !strcmp(this_char, MNTOPT_USRQUOTA)) {
353 args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; 362 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
363 XFS_UQUOTA_ENFD);
354 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || 364 } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
355 !strcmp(this_char, MNTOPT_UQUOTANOENF)) { 365 !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
356 args->flags |= XFSMNT_UQUOTA; 366 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
357 args->flags &= ~XFSMNT_UQUOTAENF; 367 mp->m_qflags &= ~XFS_UQUOTA_ENFD;
358 } else if (!strcmp(this_char, MNTOPT_PQUOTA) || 368 } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
359 !strcmp(this_char, MNTOPT_PRJQUOTA)) { 369 !strcmp(this_char, MNTOPT_PRJQUOTA)) {
360 args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; 370 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
371 XFS_OQUOTA_ENFD);
361 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { 372 } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
362 args->flags |= XFSMNT_PQUOTA; 373 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
363 args->flags &= ~XFSMNT_PQUOTAENF; 374 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
364 } else if (!strcmp(this_char, MNTOPT_GQUOTA) || 375 } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
365 !strcmp(this_char, MNTOPT_GRPQUOTA)) { 376 !strcmp(this_char, MNTOPT_GRPQUOTA)) {
366 args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; 377 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
378 XFS_OQUOTA_ENFD);
367 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 379 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
368 args->flags |= XFSMNT_GQUOTA; 380 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
369 args->flags &= ~XFSMNT_GQUOTAENF; 381 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
370 } else if (!strcmp(this_char, MNTOPT_DMAPI)) { 382 } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
371 args->flags |= XFSMNT_DMAPI; 383 mp->m_flags |= XFS_MOUNT_DMAPI;
372 } else if (!strcmp(this_char, MNTOPT_XDSM)) { 384 } else if (!strcmp(this_char, MNTOPT_XDSM)) {
373 args->flags |= XFSMNT_DMAPI; 385 mp->m_flags |= XFS_MOUNT_DMAPI;
374 } else if (!strcmp(this_char, MNTOPT_DMI)) { 386 } else if (!strcmp(this_char, MNTOPT_DMI)) {
375 args->flags |= XFSMNT_DMAPI; 387 mp->m_flags |= XFS_MOUNT_DMAPI;
376 } else if (!strcmp(this_char, "ihashsize")) { 388 } else if (!strcmp(this_char, "ihashsize")) {
377 cmn_err(CE_WARN, 389 cmn_err(CE_WARN,
378 "XFS: ihashsize no longer used, option is deprecated."); 390 "XFS: ihashsize no longer used, option is deprecated.");
@@ -390,27 +402,29 @@ xfs_parseargs(
390 } 402 }
391 } 403 }
392 404
393 if (args->flags & XFSMNT_NORECOVERY) { 405 /*
394 if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { 406 * no recovery flag requires a read-only mount
395 cmn_err(CE_WARN, 407 */
396 "XFS: no-recovery mounts must be read-only."); 408 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
397 return EINVAL; 409 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
398 } 410 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only.");
411 return EINVAL;
399 } 412 }
400 413
401 if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { 414 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
402 cmn_err(CE_WARN, 415 cmn_err(CE_WARN,
403 "XFS: sunit and swidth options incompatible with the noalign option"); 416 "XFS: sunit and swidth options incompatible with the noalign option");
404 return EINVAL; 417 return EINVAL;
405 } 418 }
406 419
407 if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { 420 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
421 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
408 cmn_err(CE_WARN, 422 cmn_err(CE_WARN,
409 "XFS: cannot mount with both project and group quota"); 423 "XFS: cannot mount with both project and group quota");
410 return EINVAL; 424 return EINVAL;
411 } 425 }
412 426
413 if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { 427 if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
414 printk("XFS: %s option needs the mount point option as well\n", 428 printk("XFS: %s option needs the mount point option as well\n",
415 MNTOPT_DMAPI); 429 MNTOPT_DMAPI);
416 return EINVAL; 430 return EINVAL;
@@ -438,27 +452,66 @@ xfs_parseargs(
438 * Note that if "ikeep" or "noikeep" mount options are 452 * Note that if "ikeep" or "noikeep" mount options are
439 * supplied, then they are honored. 453 * supplied, then they are honored.
440 */ 454 */
441 if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) 455 if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
442 args->flags |= XFSMNT_IKEEP; 456 mp->m_flags |= XFS_MOUNT_IKEEP;
443 457
444 if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { 458done:
459 if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
460 /*
461 * At this point the superblock has not been read
462 * in, therefore we do not know the block size.
463 * Before the mount call ends we will convert
464 * these to FSBs.
465 */
445 if (dsunit) { 466 if (dsunit) {
446 args->sunit = dsunit; 467 mp->m_dalign = dsunit;
447 args->flags |= XFSMNT_RETERR; 468 mp->m_flags |= XFS_MOUNT_RETERR;
448 } else {
449 args->sunit = vol_dsunit;
450 } 469 }
451 dswidth ? (args->swidth = dswidth) : 470
452 (args->swidth = vol_dswidth); 471 if (dswidth)
453 } else { 472 mp->m_swidth = dswidth;
454 args->sunit = args->swidth = 0; 473 }
474
475 if (mp->m_logbufs != -1 &&
476 mp->m_logbufs != 0 &&
477 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
478 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
479 cmn_err(CE_WARN,
480 "XFS: invalid logbufs value: %d [not %d-%d]",
481 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
482 return XFS_ERROR(EINVAL);
483 }
484 if (mp->m_logbsize != -1 &&
485 mp->m_logbsize != 0 &&
486 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
487 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
488 !is_power_of_2(mp->m_logbsize))) {
489 cmn_err(CE_WARN,
490 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
491 mp->m_logbsize);
492 return XFS_ERROR(EINVAL);
493 }
494
495 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
496 if (!mp->m_fsname)
497 return ENOMEM;
498 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
499
500 if (iosizelog) {
501 if (iosizelog > XFS_MAX_IO_LOG ||
502 iosizelog < XFS_MIN_IO_LOG) {
503 cmn_err(CE_WARN,
504 "XFS: invalid log iosize: %d [not %d-%d]",
505 iosizelog, XFS_MIN_IO_LOG,
506 XFS_MAX_IO_LOG);
507 return XFS_ERROR(EINVAL);
508 }
509
510 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
511 mp->m_readio_log = iosizelog;
512 mp->m_writeio_log = iosizelog;
455 } 513 }
456 514
457done:
458 if (args->flags & XFSMNT_32BITINODES)
459 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
460 if (args->flags2)
461 args->flags |= XFSMNT_FLAGS2;
462 return 0; 515 return 0;
463} 516}
464 517
@@ -704,8 +757,7 @@ xfs_close_devices(
704 */ 757 */
705STATIC int 758STATIC int
706xfs_open_devices( 759xfs_open_devices(
707 struct xfs_mount *mp, 760 struct xfs_mount *mp)
708 struct xfs_mount_args *args)
709{ 761{
710 struct block_device *ddev = mp->m_super->s_bdev; 762 struct block_device *ddev = mp->m_super->s_bdev;
711 struct block_device *logdev = NULL, *rtdev = NULL; 763 struct block_device *logdev = NULL, *rtdev = NULL;
@@ -714,14 +766,14 @@ xfs_open_devices(
714 /* 766 /*
715 * Open real time and log devices - order is important. 767 * Open real time and log devices - order is important.
716 */ 768 */
717 if (args->logname[0]) { 769 if (mp->m_logname) {
718 error = xfs_blkdev_get(mp, args->logname, &logdev); 770 error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
719 if (error) 771 if (error)
720 goto out; 772 goto out;
721 } 773 }
722 774
723 if (args->rtname[0]) { 775 if (mp->m_rtname) {
724 error = xfs_blkdev_get(mp, args->rtname, &rtdev); 776 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
725 if (error) 777 if (error)
726 goto out_close_logdev; 778 goto out_close_logdev;
727 779
@@ -813,18 +865,18 @@ xfs_setup_devices(
813 */ 865 */
814void 866void
815xfsaild_wakeup( 867xfsaild_wakeup(
816 xfs_mount_t *mp, 868 struct xfs_ail *ailp,
817 xfs_lsn_t threshold_lsn) 869 xfs_lsn_t threshold_lsn)
818{ 870{
819 mp->m_ail.xa_target = threshold_lsn; 871 ailp->xa_target = threshold_lsn;
820 wake_up_process(mp->m_ail.xa_task); 872 wake_up_process(ailp->xa_task);
821} 873}
822 874
823int 875int
824xfsaild( 876xfsaild(
825 void *data) 877 void *data)
826{ 878{
827 xfs_mount_t *mp = (xfs_mount_t *)data; 879 struct xfs_ail *ailp = data;
828 xfs_lsn_t last_pushed_lsn = 0; 880 xfs_lsn_t last_pushed_lsn = 0;
829 long tout = 0; 881 long tout = 0;
830 882
@@ -836,11 +888,11 @@ xfsaild(
836 /* swsusp */ 888 /* swsusp */
837 try_to_freeze(); 889 try_to_freeze();
838 890
839 ASSERT(mp->m_log); 891 ASSERT(ailp->xa_mount->m_log);
840 if (XFS_FORCED_SHUTDOWN(mp)) 892 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
841 continue; 893 continue;
842 894
843 tout = xfsaild_push(mp, &last_pushed_lsn); 895 tout = xfsaild_push(ailp, &last_pushed_lsn);
844 } 896 }
845 897
846 return 0; 898 return 0;
@@ -848,43 +900,82 @@ xfsaild(
848 900
849int 901int
850xfsaild_start( 902xfsaild_start(
851 xfs_mount_t *mp) 903 struct xfs_ail *ailp)
852{ 904{
853 mp->m_ail.xa_target = 0; 905 ailp->xa_target = 0;
854 mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); 906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
855 if (IS_ERR(mp->m_ail.xa_task)) 907 if (IS_ERR(ailp->xa_task))
856 return -PTR_ERR(mp->m_ail.xa_task); 908 return -PTR_ERR(ailp->xa_task);
857 return 0; 909 return 0;
858} 910}
859 911
860void 912void
861xfsaild_stop( 913xfsaild_stop(
862 xfs_mount_t *mp) 914 struct xfs_ail *ailp)
863{ 915{
864 kthread_stop(mp->m_ail.xa_task); 916 kthread_stop(ailp->xa_task);
865} 917}
866 918
867 919
868 920/* Catch misguided souls that try to use this interface on XFS */
869STATIC struct inode * 921STATIC struct inode *
870xfs_fs_alloc_inode( 922xfs_fs_alloc_inode(
871 struct super_block *sb) 923 struct super_block *sb)
872{ 924{
873 return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); 925 BUG();
926 return NULL;
874} 927}
875 928
929/*
930 * Now that the generic code is guaranteed not to be accessing
931 * the linux inode, we can reclaim the inode.
932 */
876STATIC void 933STATIC void
877xfs_fs_destroy_inode( 934xfs_fs_destroy_inode(
878 struct inode *inode) 935 struct inode *inode)
879{ 936{
880 kmem_zone_free(xfs_vnode_zone, inode); 937 xfs_inode_t *ip = XFS_I(inode);
938
939 XFS_STATS_INC(vn_reclaim);
940 if (xfs_reclaim(ip))
941 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
881} 942}
882 943
944/*
945 * Slab object creation initialisation for the XFS inode.
946 * This covers only the idempotent fields in the XFS inode;
947 * all other fields need to be initialised on allocation
948 * from the slab. This avoids the need to repeatedly intialise
949 * fields in the xfs inode that left in the initialise state
950 * when freeing the inode.
951 */
883STATIC void 952STATIC void
884xfs_fs_inode_init_once( 953xfs_fs_inode_init_once(
885 void *vnode) 954 void *inode)
886{ 955{
887 inode_init_once((struct inode *)vnode); 956 struct xfs_inode *ip = inode;
957
958 memset(ip, 0, sizeof(struct xfs_inode));
959
960 /* vfs inode */
961 inode_init_once(VFS_I(ip));
962
963 /* xfs inode */
964 atomic_set(&ip->i_iocount, 0);
965 atomic_set(&ip->i_pincount, 0);
966 spin_lock_init(&ip->i_flags_lock);
967 init_waitqueue_head(&ip->i_ipin_wait);
968 /*
969 * Because we want to use a counting completion, complete
970 * the flush completion once to allow a single access to
971 * the flush completion without blocking.
972 */
973 init_completion(&ip->i_flush);
974 complete(&ip->i_flush);
975
976 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
977 "xfsino", ip->i_ino);
978 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
888} 979}
889 980
890/* 981/*
@@ -898,21 +989,26 @@ xfs_fs_write_inode(
898 struct inode *inode, 989 struct inode *inode,
899 int sync) 990 int sync)
900{ 991{
992 struct xfs_inode *ip = XFS_I(inode);
901 int error = 0; 993 int error = 0;
902 int flags = 0; 994 int flags = 0;
903 995
904 xfs_itrace_entry(XFS_I(inode)); 996 xfs_itrace_entry(ip);
905 if (sync) { 997 if (sync) {
906 filemap_fdatawait(inode->i_mapping); 998 error = xfs_wait_on_pages(ip, 0, -1);
999 if (error)
1000 goto out_error;
907 flags |= FLUSH_SYNC; 1001 flags |= FLUSH_SYNC;
908 } 1002 }
909 error = xfs_inode_flush(XFS_I(inode), flags); 1003 error = xfs_inode_flush(ip, flags);
1004
1005out_error:
910 /* 1006 /*
911 * if we failed to write out the inode then mark 1007 * if we failed to write out the inode then mark
912 * it dirty again so we'll try again later. 1008 * it dirty again so we'll try again later.
913 */ 1009 */
914 if (error) 1010 if (error)
915 mark_inode_dirty_sync(inode); 1011 xfs_mark_inode_dirty_sync(ip);
916 1012
917 return -error; 1013 return -error;
918} 1014}
@@ -923,164 +1019,12 @@ xfs_fs_clear_inode(
923{ 1019{
924 xfs_inode_t *ip = XFS_I(inode); 1020 xfs_inode_t *ip = XFS_I(inode);
925 1021
926 /* 1022 xfs_itrace_entry(ip);
927 * ip can be null when xfs_iget_core calls xfs_idestroy if we 1023 XFS_STATS_INC(vn_rele);
928 * find an inode with di_mode == 0 but without IGET_CREATE set. 1024 XFS_STATS_INC(vn_remove);
929 */ 1025 XFS_STATS_DEC(vn_active);
930 if (ip) {
931 xfs_itrace_entry(ip);
932 XFS_STATS_INC(vn_rele);
933 XFS_STATS_INC(vn_remove);
934 XFS_STATS_INC(vn_reclaim);
935 XFS_STATS_DEC(vn_active);
936
937 xfs_inactive(ip);
938 xfs_iflags_clear(ip, XFS_IMODIFIED);
939 if (xfs_reclaim(ip))
940 panic("%s: cannot reclaim 0x%p\n", __func__, inode);
941 }
942
943 ASSERT(XFS_I(inode) == NULL);
944}
945 1026
946/* 1027 xfs_inactive(ip);
947 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
948 * Doing this has two advantages:
949 * - It saves on stack space, which is tight in certain situations
950 * - It can be used (with care) as a mechanism to avoid deadlocks.
951 * Flushing while allocating in a full filesystem requires both.
952 */
953STATIC void
954xfs_syncd_queue_work(
955 struct xfs_mount *mp,
956 void *data,
957 void (*syncer)(struct xfs_mount *, void *))
958{
959 struct bhv_vfs_sync_work *work;
960
961 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
962 INIT_LIST_HEAD(&work->w_list);
963 work->w_syncer = syncer;
964 work->w_data = data;
965 work->w_mount = mp;
966 spin_lock(&mp->m_sync_lock);
967 list_add_tail(&work->w_list, &mp->m_sync_list);
968 spin_unlock(&mp->m_sync_lock);
969 wake_up_process(mp->m_sync_task);
970}
971
972/*
973 * Flush delayed allocate data, attempting to free up reserved space
974 * from existing allocations. At this point a new allocation attempt
975 * has failed with ENOSPC and we are in the process of scratching our
976 * heads, looking about for more room...
977 */
978STATIC void
979xfs_flush_inode_work(
980 struct xfs_mount *mp,
981 void *arg)
982{
983 struct inode *inode = arg;
984 filemap_flush(inode->i_mapping);
985 iput(inode);
986}
987
988void
989xfs_flush_inode(
990 xfs_inode_t *ip)
991{
992 struct inode *inode = VFS_I(ip);
993
994 igrab(inode);
995 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
996 delay(msecs_to_jiffies(500));
997}
998
999/*
1000 * This is the "bigger hammer" version of xfs_flush_inode_work...
1001 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
1002 */
1003STATIC void
1004xfs_flush_device_work(
1005 struct xfs_mount *mp,
1006 void *arg)
1007{
1008 struct inode *inode = arg;
1009 sync_blockdev(mp->m_super->s_bdev);
1010 iput(inode);
1011}
1012
1013void
1014xfs_flush_device(
1015 xfs_inode_t *ip)
1016{
1017 struct inode *inode = VFS_I(ip);
1018
1019 igrab(inode);
1020 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
1021 delay(msecs_to_jiffies(500));
1022 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
1023}
1024
1025STATIC void
1026xfs_sync_worker(
1027 struct xfs_mount *mp,
1028 void *unused)
1029{
1030 int error;
1031
1032 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1033 error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR);
1034 mp->m_sync_seq++;
1035 wake_up(&mp->m_wait_single_sync_task);
1036}
1037
1038STATIC int
1039xfssyncd(
1040 void *arg)
1041{
1042 struct xfs_mount *mp = arg;
1043 long timeleft;
1044 bhv_vfs_sync_work_t *work, *n;
1045 LIST_HEAD (tmp);
1046
1047 set_freezable();
1048 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
1049 for (;;) {
1050 timeleft = schedule_timeout_interruptible(timeleft);
1051 /* swsusp */
1052 try_to_freeze();
1053 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
1054 break;
1055
1056 spin_lock(&mp->m_sync_lock);
1057 /*
1058 * We can get woken by laptop mode, to do a sync -
1059 * that's the (only!) case where the list would be
1060 * empty with time remaining.
1061 */
1062 if (!timeleft || list_empty(&mp->m_sync_list)) {
1063 if (!timeleft)
1064 timeleft = xfs_syncd_centisecs *
1065 msecs_to_jiffies(10);
1066 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
1067 list_add_tail(&mp->m_sync_work.w_list,
1068 &mp->m_sync_list);
1069 }
1070 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
1071 list_move(&work->w_list, &tmp);
1072 spin_unlock(&mp->m_sync_lock);
1073
1074 list_for_each_entry_safe(work, n, &tmp, w_list) {
1075 (*work->w_syncer)(mp, work->w_data);
1076 list_del(&work->w_list);
1077 if (work == &mp->m_sync_work)
1078 continue;
1079 kmem_free(work);
1080 }
1081 }
1082
1083 return 0;
1084} 1028}
1085 1029
1086STATIC void 1030STATIC void
@@ -1099,11 +1043,9 @@ xfs_fs_put_super(
1099 struct xfs_mount *mp = XFS_M(sb); 1043 struct xfs_mount *mp = XFS_M(sb);
1100 struct xfs_inode *rip = mp->m_rootip; 1044 struct xfs_inode *rip = mp->m_rootip;
1101 int unmount_event_flags = 0; 1045 int unmount_event_flags = 0;
1102 int error;
1103 1046
1104 kthread_stop(mp->m_sync_task); 1047 xfs_syncd_stop(mp);
1105 1048 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
1106 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1107 1049
1108#ifdef HAVE_DMAPI 1050#ifdef HAVE_DMAPI
1109 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1051 if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1128,18 +1070,6 @@ xfs_fs_put_super(
1128 xfs_filestream_unmount(mp); 1070 xfs_filestream_unmount(mp);
1129 1071
1130 XFS_bflush(mp->m_ddev_targp); 1072 XFS_bflush(mp->m_ddev_targp);
1131 error = xfs_unmount_flush(mp, 0);
1132 WARN_ON(error);
1133
1134 /*
1135 * If we're forcing a shutdown, typically because of a media error,
1136 * we want to make sure we invalidate dirty pages that belong to
1137 * referenced vnodes as well.
1138 */
1139 if (XFS_FORCED_SHUTDOWN(mp)) {
1140 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1141 ASSERT(error != EFSCORRUPTED);
1142 }
1143 1073
1144 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1074 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1145 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, 1075 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
@@ -1161,7 +1091,7 @@ xfs_fs_write_super(
1161 struct super_block *sb) 1091 struct super_block *sb)
1162{ 1092{
1163 if (!(sb->s_flags & MS_RDONLY)) 1093 if (!(sb->s_flags & MS_RDONLY))
1164 xfs_sync(XFS_M(sb), SYNC_FSDATA); 1094 xfs_sync_fsdata(XFS_M(sb), 0);
1165 sb->s_dirt = 0; 1095 sb->s_dirt = 0;
1166} 1096}
1167 1097
@@ -1172,7 +1102,6 @@ xfs_fs_sync_super(
1172{ 1102{
1173 struct xfs_mount *mp = XFS_M(sb); 1103 struct xfs_mount *mp = XFS_M(sb);
1174 int error; 1104 int error;
1175 int flags;
1176 1105
1177 /* 1106 /*
1178 * Treat a sync operation like a freeze. This is to work 1107 * Treat a sync operation like a freeze. This is to work
@@ -1186,20 +1115,10 @@ xfs_fs_sync_super(
1186 * dirty the Linux inode until after the transaction I/O 1115 * dirty the Linux inode until after the transaction I/O
1187 * completes. 1116 * completes.
1188 */ 1117 */
1189 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { 1118 if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE))
1190 /* 1119 error = xfs_quiesce_data(mp);
1191 * First stage of freeze - no more writers will make progress 1120 else
1192 * now we are here, so we flush delwri and delalloc buffers 1121 error = xfs_sync_fsdata(mp, 0);
1193 * here, then wait for all I/O to complete. Data is frozen at
1194 * that point. Metadata is not frozen, transactions can still
1195 * occur here so don't bother flushing the buftarg (i.e
1196 * SYNC_QUIESCE) because it'll just get dirty again.
1197 */
1198 flags = SYNC_DATA_QUIESCE;
1199 } else
1200 flags = SYNC_FSDATA;
1201
1202 error = xfs_sync(mp, flags);
1203 sb->s_dirt = 0; 1122 sb->s_dirt = 0;
1204 1123
1205 if (unlikely(laptop_mode)) { 1124 if (unlikely(laptop_mode)) {
@@ -1337,9 +1256,8 @@ xfs_fs_remount(
1337 1256
1338 /* rw -> ro */ 1257 /* rw -> ro */
1339 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { 1258 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1340 xfs_filestream_flush(mp); 1259 xfs_quiesce_data(mp);
1341 xfs_sync(mp, SYNC_DATA_QUIESCE); 1260 xfs_quiesce_attr(mp);
1342 xfs_attr_quiesce(mp);
1343 mp->m_flags |= XFS_MOUNT_RDONLY; 1261 mp->m_flags |= XFS_MOUNT_RDONLY;
1344 } 1262 }
1345 1263
@@ -1348,7 +1266,7 @@ xfs_fs_remount(
1348 1266
1349/* 1267/*
1350 * Second stage of a freeze. The data is already frozen so we only 1268 * Second stage of a freeze. The data is already frozen so we only
1351 * need to take care of themetadata. Once that's done write a dummy 1269 * need to take care of the metadata. Once that's done write a dummy
1352 * record to dirty the log in case of a crash while frozen. 1270 * record to dirty the log in case of a crash while frozen.
1353 */ 1271 */
1354STATIC void 1272STATIC void
@@ -1357,7 +1275,7 @@ xfs_fs_lockfs(
1357{ 1275{
1358 struct xfs_mount *mp = XFS_M(sb); 1276 struct xfs_mount *mp = XFS_M(sb);
1359 1277
1360 xfs_attr_quiesce(mp); 1278 xfs_quiesce_attr(mp);
1361 xfs_fs_log_dummy(mp); 1279 xfs_fs_log_dummy(mp);
1362} 1280}
1363 1281
@@ -1422,175 +1340,28 @@ xfs_fs_setxquota(
1422 1340
1423/* 1341/*
1424 * This function fills in xfs_mount_t fields based on mount args. 1342 * This function fills in xfs_mount_t fields based on mount args.
1425 * Note: the superblock has _not_ yet been read in.
1426 */
1427STATIC int
1428xfs_start_flags(
1429 struct xfs_mount_args *ap,
1430 struct xfs_mount *mp)
1431{
1432 int error;
1433
1434 /* Values are in BBs */
1435 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1436 /*
1437 * At this point the superblock has not been read
1438 * in, therefore we do not know the block size.
1439 * Before the mount call ends we will convert
1440 * these to FSBs.
1441 */
1442 mp->m_dalign = ap->sunit;
1443 mp->m_swidth = ap->swidth;
1444 }
1445
1446 if (ap->logbufs != -1 &&
1447 ap->logbufs != 0 &&
1448 (ap->logbufs < XLOG_MIN_ICLOGS ||
1449 ap->logbufs > XLOG_MAX_ICLOGS)) {
1450 cmn_err(CE_WARN,
1451 "XFS: invalid logbufs value: %d [not %d-%d]",
1452 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1453 return XFS_ERROR(EINVAL);
1454 }
1455 mp->m_logbufs = ap->logbufs;
1456 if (ap->logbufsize != -1 &&
1457 ap->logbufsize != 0 &&
1458 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1459 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1460 !is_power_of_2(ap->logbufsize))) {
1461 cmn_err(CE_WARN,
1462 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1463 ap->logbufsize);
1464 return XFS_ERROR(EINVAL);
1465 }
1466
1467 error = ENOMEM;
1468
1469 mp->m_logbsize = ap->logbufsize;
1470 mp->m_fsname_len = strlen(ap->fsname) + 1;
1471
1472 mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL);
1473 if (!mp->m_fsname)
1474 goto out;
1475
1476 if (ap->rtname[0]) {
1477 mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL);
1478 if (!mp->m_rtname)
1479 goto out_free_fsname;
1480
1481 }
1482
1483 if (ap->logname[0]) {
1484 mp->m_logname = kstrdup(ap->logname, GFP_KERNEL);
1485 if (!mp->m_logname)
1486 goto out_free_rtname;
1487 }
1488
1489 if (ap->flags & XFSMNT_WSYNC)
1490 mp->m_flags |= XFS_MOUNT_WSYNC;
1491#if XFS_BIG_INUMS
1492 if (ap->flags & XFSMNT_INO64) {
1493 mp->m_flags |= XFS_MOUNT_INO64;
1494 mp->m_inoadd = XFS_INO64_OFFSET;
1495 }
1496#endif
1497 if (ap->flags & XFSMNT_RETERR)
1498 mp->m_flags |= XFS_MOUNT_RETERR;
1499 if (ap->flags & XFSMNT_NOALIGN)
1500 mp->m_flags |= XFS_MOUNT_NOALIGN;
1501 if (ap->flags & XFSMNT_SWALLOC)
1502 mp->m_flags |= XFS_MOUNT_SWALLOC;
1503 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1504 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1505 if (ap->flags & XFSMNT_32BITINODES)
1506 mp->m_flags |= XFS_MOUNT_32BITINODES;
1507
1508 if (ap->flags & XFSMNT_IOSIZE) {
1509 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1510 ap->iosizelog < XFS_MIN_IO_LOG) {
1511 cmn_err(CE_WARN,
1512 "XFS: invalid log iosize: %d [not %d-%d]",
1513 ap->iosizelog, XFS_MIN_IO_LOG,
1514 XFS_MAX_IO_LOG);
1515 return XFS_ERROR(EINVAL);
1516 }
1517
1518 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1519 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1520 }
1521
1522 if (ap->flags & XFSMNT_IKEEP)
1523 mp->m_flags |= XFS_MOUNT_IKEEP;
1524 if (ap->flags & XFSMNT_DIRSYNC)
1525 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1526 if (ap->flags & XFSMNT_ATTR2)
1527 mp->m_flags |= XFS_MOUNT_ATTR2;
1528 if (ap->flags & XFSMNT_NOATTR2)
1529 mp->m_flags |= XFS_MOUNT_NOATTR2;
1530
1531 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1532 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1533
1534 /*
1535 * no recovery flag requires a read-only mount
1536 */
1537 if (ap->flags & XFSMNT_NORECOVERY) {
1538 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1539 cmn_err(CE_WARN,
1540 "XFS: tried to mount a FS read-write without recovery!");
1541 return XFS_ERROR(EINVAL);
1542 }
1543 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1544 }
1545
1546 if (ap->flags & XFSMNT_NOUUID)
1547 mp->m_flags |= XFS_MOUNT_NOUUID;
1548 if (ap->flags & XFSMNT_BARRIER)
1549 mp->m_flags |= XFS_MOUNT_BARRIER;
1550 else
1551 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1552
1553 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1554 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1555
1556 if (ap->flags & XFSMNT_DMAPI)
1557 mp->m_flags |= XFS_MOUNT_DMAPI;
1558 return 0;
1559
1560
1561 out_free_rtname:
1562 kfree(mp->m_rtname);
1563 out_free_fsname:
1564 kfree(mp->m_fsname);
1565 out:
1566 return error;
1567}
1568
1569/*
1570 * This function fills in xfs_mount_t fields based on mount args.
1571 * Note: the superblock _has_ now been read in. 1343 * Note: the superblock _has_ now been read in.
1572 */ 1344 */
1573STATIC int 1345STATIC int
1574xfs_finish_flags( 1346xfs_finish_flags(
1575 struct xfs_mount_args *ap,
1576 struct xfs_mount *mp) 1347 struct xfs_mount *mp)
1577{ 1348{
1578 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 1349 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1579 1350
1580 /* Fail a mount where the logbuf is smaller then the log stripe */ 1351 /* Fail a mount where the logbuf is smaller then the log stripe */
1581 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1352 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1582 if ((ap->logbufsize <= 0) && 1353 if (mp->m_logbsize <= 0 &&
1583 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { 1354 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1584 mp->m_logbsize = mp->m_sb.sb_logsunit; 1355 mp->m_logbsize = mp->m_sb.sb_logsunit;
1585 } else if (ap->logbufsize > 0 && 1356 } else if (mp->m_logbsize > 0 &&
1586 ap->logbufsize < mp->m_sb.sb_logsunit) { 1357 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1587 cmn_err(CE_WARN, 1358 cmn_err(CE_WARN,
1588 "XFS: logbuf size must be greater than or equal to log stripe size"); 1359 "XFS: logbuf size must be greater than or equal to log stripe size");
1589 return XFS_ERROR(EINVAL); 1360 return XFS_ERROR(EINVAL);
1590 } 1361 }
1591 } else { 1362 } else {
1592 /* Fail a mount if the logbuf is larger than 32K */ 1363 /* Fail a mount if the logbuf is larger than 32K */
1593 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { 1364 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1594 cmn_err(CE_WARN, 1365 cmn_err(CE_WARN,
1595 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1366 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1596 return XFS_ERROR(EINVAL); 1367 return XFS_ERROR(EINVAL);
@@ -1602,7 +1373,7 @@ xfs_finish_flags(
1602 * told by noattr2 to turn it off 1373 * told by noattr2 to turn it off
1603 */ 1374 */
1604 if (xfs_sb_version_hasattr2(&mp->m_sb) && 1375 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1605 !(ap->flags & XFSMNT_NOATTR2)) 1376 !(mp->m_flags & XFS_MOUNT_NOATTR2))
1606 mp->m_flags |= XFS_MOUNT_ATTR2; 1377 mp->m_flags |= XFS_MOUNT_ATTR2;
1607 1378
1608 /* 1379 /*
@@ -1614,48 +1385,6 @@ xfs_finish_flags(
1614 return XFS_ERROR(EROFS); 1385 return XFS_ERROR(EROFS);
1615 } 1386 }
1616 1387
1617 /*
1618 * check for shared mount.
1619 */
1620 if (ap->flags & XFSMNT_SHARED) {
1621 if (!xfs_sb_version_hasshared(&mp->m_sb))
1622 return XFS_ERROR(EINVAL);
1623
1624 /*
1625 * For IRIX 6.5, shared mounts must have the shared
1626 * version bit set, have the persistent readonly
1627 * field set, must be version 0 and can only be mounted
1628 * read-only.
1629 */
1630 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
1631 (mp->m_sb.sb_shared_vn != 0))
1632 return XFS_ERROR(EINVAL);
1633
1634 mp->m_flags |= XFS_MOUNT_SHARED;
1635
1636 /*
1637 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1638 */
1639 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
1640 return XFS_ERROR(EINVAL);
1641 }
1642
1643 if (ap->flags & XFSMNT_UQUOTA) {
1644 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1645 if (ap->flags & XFSMNT_UQUOTAENF)
1646 mp->m_qflags |= XFS_UQUOTA_ENFD;
1647 }
1648
1649 if (ap->flags & XFSMNT_GQUOTA) {
1650 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1651 if (ap->flags & XFSMNT_GQUOTAENF)
1652 mp->m_qflags |= XFS_OQUOTA_ENFD;
1653 } else if (ap->flags & XFSMNT_PQUOTA) {
1654 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1655 if (ap->flags & XFSMNT_PQUOTAENF)
1656 mp->m_qflags |= XFS_OQUOTA_ENFD;
1657 }
1658
1659 return 0; 1388 return 0;
1660} 1389}
1661 1390
@@ -1667,19 +1396,14 @@ xfs_fs_fill_super(
1667{ 1396{
1668 struct inode *root; 1397 struct inode *root;
1669 struct xfs_mount *mp = NULL; 1398 struct xfs_mount *mp = NULL;
1670 struct xfs_mount_args *args;
1671 int flags = 0, error = ENOMEM; 1399 int flags = 0, error = ENOMEM;
1672 1400 char *mtpt = NULL;
1673 args = xfs_args_allocate(sb, silent);
1674 if (!args)
1675 return -ENOMEM;
1676 1401
1677 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1402 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1678 if (!mp) 1403 if (!mp)
1679 goto out_free_args; 1404 goto out;
1680 1405
1681 spin_lock_init(&mp->m_sb_lock); 1406 spin_lock_init(&mp->m_sb_lock);
1682 mutex_init(&mp->m_ilock);
1683 mutex_init(&mp->m_growlock); 1407 mutex_init(&mp->m_growlock);
1684 atomic_set(&mp->m_active_trans, 0); 1408 atomic_set(&mp->m_active_trans, 0);
1685 INIT_LIST_HEAD(&mp->m_sync_list); 1409 INIT_LIST_HEAD(&mp->m_sync_list);
@@ -1689,12 +1413,9 @@ xfs_fs_fill_super(
1689 mp->m_super = sb; 1413 mp->m_super = sb;
1690 sb->s_fs_info = mp; 1414 sb->s_fs_info = mp;
1691 1415
1692 if (sb->s_flags & MS_RDONLY) 1416 error = xfs_parseargs(mp, (char *)data, &mtpt);
1693 mp->m_flags |= XFS_MOUNT_RDONLY;
1694
1695 error = xfs_parseargs(mp, (char *)data, args, 0);
1696 if (error) 1417 if (error)
1697 goto out_free_mp; 1418 goto out_free_fsname;
1698 1419
1699 sb_min_blocksize(sb, BBSIZE); 1420 sb_min_blocksize(sb, BBSIZE);
1700 sb->s_xattr = xfs_xattr_handlers; 1421 sb->s_xattr = xfs_xattr_handlers;
@@ -1702,33 +1423,28 @@ xfs_fs_fill_super(
1702 sb->s_qcop = &xfs_quotactl_operations; 1423 sb->s_qcop = &xfs_quotactl_operations;
1703 sb->s_op = &xfs_super_operations; 1424 sb->s_op = &xfs_super_operations;
1704 1425
1705 error = xfs_dmops_get(mp, args); 1426 error = xfs_dmops_get(mp);
1706 if (error) 1427 if (error)
1707 goto out_free_mp; 1428 goto out_free_fsname;
1708 error = xfs_qmops_get(mp, args); 1429 error = xfs_qmops_get(mp);
1709 if (error) 1430 if (error)
1710 goto out_put_dmops; 1431 goto out_put_dmops;
1711 1432
1712 if (args->flags & XFSMNT_QUIET) 1433 if (silent)
1713 flags |= XFS_MFSI_QUIET; 1434 flags |= XFS_MFSI_QUIET;
1714 1435
1715 error = xfs_open_devices(mp, args); 1436 error = xfs_open_devices(mp);
1716 if (error) 1437 if (error)
1717 goto out_put_qmops; 1438 goto out_put_qmops;
1718 1439
1719 if (xfs_icsb_init_counters(mp)) 1440 if (xfs_icsb_init_counters(mp))
1720 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 1441 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1721 1442
1722 /*
1723 * Setup flags based on mount(2) options and then the superblock
1724 */
1725 error = xfs_start_flags(args, mp);
1726 if (error)
1727 goto out_free_fsname;
1728 error = xfs_readsb(mp, flags); 1443 error = xfs_readsb(mp, flags);
1729 if (error) 1444 if (error)
1730 goto out_free_fsname; 1445 goto out_destroy_counters;
1731 error = xfs_finish_flags(args, mp); 1446
1447 error = xfs_finish_flags(mp);
1732 if (error) 1448 if (error)
1733 goto out_free_sb; 1449 goto out_free_sb;
1734 1450
@@ -1747,7 +1463,7 @@ xfs_fs_fill_super(
1747 if (error) 1463 if (error)
1748 goto out_filestream_unmount; 1464 goto out_filestream_unmount;
1749 1465
1750 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); 1466 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
1751 1467
1752 sb->s_dirt = 1; 1468 sb->s_dirt = 1;
1753 sb->s_magic = XFS_SB_MAGIC; 1469 sb->s_magic = XFS_SB_MAGIC;
@@ -1772,35 +1488,31 @@ xfs_fs_fill_super(
1772 goto fail_vnrele; 1488 goto fail_vnrele;
1773 } 1489 }
1774 1490
1775 mp->m_sync_work.w_syncer = xfs_sync_worker; 1491 error = xfs_syncd_init(mp);
1776 mp->m_sync_work.w_mount = mp; 1492 if (error)
1777 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
1778 if (IS_ERR(mp->m_sync_task)) {
1779 error = -PTR_ERR(mp->m_sync_task);
1780 goto fail_vnrele; 1493 goto fail_vnrele;
1781 }
1782 1494
1783 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1495 kfree(mtpt);
1784 1496
1785 kfree(args); 1497 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1786 return 0; 1498 return 0;
1787 1499
1788 out_filestream_unmount: 1500 out_filestream_unmount:
1789 xfs_filestream_unmount(mp); 1501 xfs_filestream_unmount(mp);
1790 out_free_sb: 1502 out_free_sb:
1791 xfs_freesb(mp); 1503 xfs_freesb(mp);
1792 out_free_fsname: 1504 out_destroy_counters:
1793 xfs_free_fsname(mp);
1794 xfs_icsb_destroy_counters(mp); 1505 xfs_icsb_destroy_counters(mp);
1795 xfs_close_devices(mp); 1506 xfs_close_devices(mp);
1796 out_put_qmops: 1507 out_put_qmops:
1797 xfs_qmops_put(mp); 1508 xfs_qmops_put(mp);
1798 out_put_dmops: 1509 out_put_dmops:
1799 xfs_dmops_put(mp); 1510 xfs_dmops_put(mp);
1800 out_free_mp: 1511 out_free_fsname:
1512 xfs_free_fsname(mp);
1513 kfree(mtpt);
1801 kfree(mp); 1514 kfree(mp);
1802 out_free_args: 1515 out:
1803 kfree(args);
1804 return -error; 1516 return -error;
1805 1517
1806 fail_vnrele: 1518 fail_vnrele:
@@ -1820,8 +1532,6 @@ xfs_fs_fill_super(
1820 xfs_filestream_unmount(mp); 1532 xfs_filestream_unmount(mp);
1821 1533
1822 XFS_bflush(mp->m_ddev_targp); 1534 XFS_bflush(mp->m_ddev_targp);
1823 error = xfs_unmount_flush(mp, 0);
1824 WARN_ON(error);
1825 1535
1826 xfs_unmountfs(mp); 1536 xfs_unmountfs(mp);
1827 goto out_free_sb; 1537 goto out_free_sb;
@@ -1882,10 +1592,19 @@ xfs_alloc_trace_bufs(void)
1882 if (!xfs_bmap_trace_buf) 1592 if (!xfs_bmap_trace_buf)
1883 goto out_free_alloc_trace; 1593 goto out_free_alloc_trace;
1884#endif 1594#endif
1885#ifdef XFS_BMBT_TRACE 1595#ifdef XFS_BTREE_TRACE
1596 xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE,
1597 KM_MAYFAIL);
1598 if (!xfs_allocbt_trace_buf)
1599 goto out_free_bmap_trace;
1600
1601 xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL);
1602 if (!xfs_inobt_trace_buf)
1603 goto out_free_allocbt_trace;
1604
1886 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); 1605 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1887 if (!xfs_bmbt_trace_buf) 1606 if (!xfs_bmbt_trace_buf)
1888 goto out_free_bmap_trace; 1607 goto out_free_inobt_trace;
1889#endif 1608#endif
1890#ifdef XFS_ATTR_TRACE 1609#ifdef XFS_ATTR_TRACE
1891 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); 1610 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
@@ -1907,8 +1626,12 @@ xfs_alloc_trace_bufs(void)
1907 ktrace_free(xfs_attr_trace_buf); 1626 ktrace_free(xfs_attr_trace_buf);
1908 out_free_bmbt_trace: 1627 out_free_bmbt_trace:
1909#endif 1628#endif
1910#ifdef XFS_BMBT_TRACE 1629#ifdef XFS_BTREE_TRACE
1911 ktrace_free(xfs_bmbt_trace_buf); 1630 ktrace_free(xfs_bmbt_trace_buf);
1631 out_free_inobt_trace:
1632 ktrace_free(xfs_inobt_trace_buf);
1633 out_free_allocbt_trace:
1634 ktrace_free(xfs_allocbt_trace_buf);
1912 out_free_bmap_trace: 1635 out_free_bmap_trace:
1913#endif 1636#endif
1914#ifdef XFS_BMAP_TRACE 1637#ifdef XFS_BMAP_TRACE
@@ -1931,8 +1654,10 @@ xfs_free_trace_bufs(void)
1931#ifdef XFS_ATTR_TRACE 1654#ifdef XFS_ATTR_TRACE
1932 ktrace_free(xfs_attr_trace_buf); 1655 ktrace_free(xfs_attr_trace_buf);
1933#endif 1656#endif
1934#ifdef XFS_BMBT_TRACE 1657#ifdef XFS_BTREE_TRACE
1935 ktrace_free(xfs_bmbt_trace_buf); 1658 ktrace_free(xfs_bmbt_trace_buf);
1659 ktrace_free(xfs_inobt_trace_buf);
1660 ktrace_free(xfs_allocbt_trace_buf);
1936#endif 1661#endif
1937#ifdef XFS_BMAP_TRACE 1662#ifdef XFS_BMAP_TRACE
1938 ktrace_free(xfs_bmap_trace_buf); 1663 ktrace_free(xfs_bmap_trace_buf);
@@ -1945,16 +1670,10 @@ xfs_free_trace_bufs(void)
1945STATIC int __init 1670STATIC int __init
1946xfs_init_zones(void) 1671xfs_init_zones(void)
1947{ 1672{
1948 xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode",
1949 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
1950 KM_ZONE_SPREAD,
1951 xfs_fs_inode_init_once);
1952 if (!xfs_vnode_zone)
1953 goto out;
1954 1673
1955 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); 1674 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
1956 if (!xfs_ioend_zone) 1675 if (!xfs_ioend_zone)
1957 goto out_destroy_vnode_zone; 1676 goto out;
1958 1677
1959 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, 1678 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1960 xfs_ioend_zone); 1679 xfs_ioend_zone);
@@ -1970,6 +1689,7 @@ xfs_init_zones(void)
1970 "xfs_bmap_free_item"); 1689 "xfs_bmap_free_item");
1971 if (!xfs_bmap_free_item_zone) 1690 if (!xfs_bmap_free_item_zone)
1972 goto out_destroy_log_ticket_zone; 1691 goto out_destroy_log_ticket_zone;
1692
1973 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 1693 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1974 "xfs_btree_cur"); 1694 "xfs_btree_cur");
1975 if (!xfs_btree_cur_zone) 1695 if (!xfs_btree_cur_zone)
@@ -2017,8 +1737,8 @@ xfs_init_zones(void)
2017 1737
2018 xfs_inode_zone = 1738 xfs_inode_zone =
2019 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", 1739 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2020 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | 1740 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
2021 KM_ZONE_SPREAD, NULL); 1741 xfs_fs_inode_init_once);
2022 if (!xfs_inode_zone) 1742 if (!xfs_inode_zone)
2023 goto out_destroy_efi_zone; 1743 goto out_destroy_efi_zone;
2024 1744
@@ -2066,8 +1786,6 @@ xfs_init_zones(void)
2066 mempool_destroy(xfs_ioend_pool); 1786 mempool_destroy(xfs_ioend_pool);
2067 out_destroy_ioend_zone: 1787 out_destroy_ioend_zone:
2068 kmem_zone_destroy(xfs_ioend_zone); 1788 kmem_zone_destroy(xfs_ioend_zone);
2069 out_destroy_vnode_zone:
2070 kmem_zone_destroy(xfs_vnode_zone);
2071 out: 1789 out:
2072 return -ENOMEM; 1790 return -ENOMEM;
2073} 1791}
@@ -2092,7 +1810,6 @@ xfs_destroy_zones(void)
2092 kmem_zone_destroy(xfs_log_ticket_zone); 1810 kmem_zone_destroy(xfs_log_ticket_zone);
2093 mempool_destroy(xfs_ioend_pool); 1811 mempool_destroy(xfs_ioend_pool);
2094 kmem_zone_destroy(xfs_ioend_zone); 1812 kmem_zone_destroy(xfs_ioend_zone);
2095 kmem_zone_destroy(xfs_vnode_zone);
2096 1813
2097} 1814}
2098 1815
@@ -2100,13 +1817,12 @@ STATIC int __init
2100init_xfs_fs(void) 1817init_xfs_fs(void)
2101{ 1818{
2102 int error; 1819 int error;
2103 static char message[] __initdata = KERN_INFO \
2104 XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
2105 1820
2106 printk(message); 1821 printk(KERN_INFO XFS_VERSION_STRING " with "
1822 XFS_BUILD_OPTIONS " enabled\n");
2107 1823
2108 ktrace_init(64); 1824 ktrace_init(64);
2109 vn_init(); 1825 xfs_ioend_init();
2110 xfs_dir_startup(); 1826 xfs_dir_startup();
2111 1827
2112 error = xfs_init_zones(); 1828 error = xfs_init_zones();
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index fe2ef4e6a0f9..d5d776d4cd67 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -20,24 +20,12 @@
20 20
21#include <linux/exportfs.h> 21#include <linux/exportfs.h>
22 22
23#ifdef CONFIG_XFS_DMAPI
24# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops)
25# define vfs_initdmapi() dmapi_init()
26# define vfs_exitdmapi() dmapi_uninit()
27#else
28# define vfs_insertdmapi(vfs) do { } while (0)
29# define vfs_initdmapi() do { } while (0)
30# define vfs_exitdmapi() do { } while (0)
31#endif
32
33#ifdef CONFIG_XFS_QUOTA 23#ifdef CONFIG_XFS_QUOTA
34# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops)
35extern void xfs_qm_init(void); 24extern void xfs_qm_init(void);
36extern void xfs_qm_exit(void); 25extern void xfs_qm_exit(void);
37# define vfs_initquota() xfs_qm_init() 26# define vfs_initquota() xfs_qm_init()
38# define vfs_exitquota() xfs_qm_exit() 27# define vfs_exitquota() xfs_qm_exit()
39#else 28#else
40# define vfs_insertquota(vfs) do { } while (0)
41# define vfs_initquota() do { } while (0) 29# define vfs_initquota() do { } while (0)
42# define vfs_exitquota() do { } while (0) 30# define vfs_exitquota() do { } while (0)
43#endif 31#endif
@@ -101,9 +89,6 @@ struct block_device;
101 89
102extern __uint64_t xfs_max_file_offset(unsigned int); 90extern __uint64_t xfs_max_file_offset(unsigned int);
103 91
104extern void xfs_flush_inode(struct xfs_inode *);
105extern void xfs_flush_device(struct xfs_inode *);
106
107extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 92extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
108 93
109extern const struct export_operations xfs_export_operations; 94extern const struct export_operations xfs_export_operations;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
new file mode 100644
index 000000000000..2ed035354c26
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -0,0 +1,762 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h"
37#include "xfs_dinode.h"
38#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h"
41#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46
47#include <linux/kthread.h>
48#include <linux/freezer.h>
49
50/*
51 * Sync all the inodes in the given AG according to the
52 * direction given by the flags.
53 */
54STATIC int
55xfs_sync_inodes_ag(
56 xfs_mount_t *mp,
57 int ag,
58 int flags)
59{
60 xfs_perag_t *pag = &mp->m_perag[ag];
61 int nr_found;
62 uint32_t first_index = 0;
63 int error = 0;
64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71
72 do {
73 struct inode *inode;
74 xfs_inode_t *ip = NULL;
75 int lock_flags = XFS_ILOCK_SHARED;
76
77 /*
78 * use a gang lookup to find the next inode in the tree
79 * as the tree is sparse and a gang lookup walks to find
80 * the number of objects requested.
81 */
82 read_lock(&pag->pag_ici_lock);
83 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
84 (void**)&ip, first_index, 1);
85
86 if (!nr_found) {
87 read_unlock(&pag->pag_ici_lock);
88 break;
89 }
90
91 /*
92 * Update the index for the next lookup. Catch overflows
93 * into the next AG range which can occur if we have inodes
94 * in the last block of the AG and we are currently
95 * pointing to the last inode.
96 */
97 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
98 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
99 read_unlock(&pag->pag_ici_lock);
100 break;
101 }
102
103 /* nothing to sync during shutdown */
104 if (XFS_FORCED_SHUTDOWN(mp)) {
105 read_unlock(&pag->pag_ici_lock);
106 return 0;
107 }
108
109 /*
110 * If we can't get a reference on the inode, it must be
111 * in reclaim. Leave it for the reclaim code to flush.
112 */
113 inode = VFS_I(ip);
114 if (!igrab(inode)) {
115 read_unlock(&pag->pag_ici_lock);
116 continue;
117 }
118 read_unlock(&pag->pag_ici_lock);
119
120 /* avoid new or bad inodes */
121 if (is_bad_inode(inode) ||
122 xfs_iflags_test(ip, XFS_INEW)) {
123 IRELE(ip);
124 continue;
125 }
126
127 /*
128 * If we have to flush data or wait for I/O completion
129 * we need to hold the iolock.
130 */
131 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
132 xfs_ilock(ip, XFS_IOLOCK_SHARED);
133 lock_flags |= XFS_IOLOCK_SHARED;
134 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
135 if (flags & SYNC_IOWAIT)
136 xfs_ioend_wait(ip);
137 }
138 xfs_ilock(ip, XFS_ILOCK_SHARED);
139
140 if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
141 if (flags & SYNC_WAIT) {
142 xfs_iflock(ip);
143 if (!xfs_inode_clean(ip))
144 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
145 else
146 xfs_ifunlock(ip);
147 } else if (xfs_iflock_nowait(ip)) {
148 if (!xfs_inode_clean(ip))
149 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
150 else
151 xfs_ifunlock(ip);
152 }
153 }
154 xfs_iput(ip, lock_flags);
155
156 if (error)
157 last_error = error;
158 /*
159 * bail out if the filesystem is corrupted.
160 */
161 if (error == EFSCORRUPTED)
162 return XFS_ERROR(error);
163
164 } while (nr_found);
165
166 return last_error;
167}
168
169int
170xfs_sync_inodes(
171 xfs_mount_t *mp,
172 int flags)
173{
174 int error;
175 int last_error;
176 int i;
177 int lflags = XFS_LOG_FORCE;
178
179 if (mp->m_flags & XFS_MOUNT_RDONLY)
180 return 0;
181 error = 0;
182 last_error = 0;
183
184 if (flags & SYNC_WAIT)
185 lflags |= XFS_LOG_SYNC;
186
187 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
188 if (!mp->m_perag[i].pag_ici_init)
189 continue;
190 error = xfs_sync_inodes_ag(mp, i, flags);
191 if (error)
192 last_error = error;
193 if (error == EFSCORRUPTED)
194 break;
195 }
196 if (flags & SYNC_DELWRI)
197 xfs_log_force(mp, 0, lflags);
198
199 return XFS_ERROR(last_error);
200}
201
202STATIC int
203xfs_commit_dummy_trans(
204 struct xfs_mount *mp,
205 uint log_flags)
206{
207 struct xfs_inode *ip = mp->m_rootip;
208 struct xfs_trans *tp;
209 int error;
210
211 /*
212 * Put a dummy transaction in the log to tell recovery
213 * that all others are OK.
214 */
215 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
216 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
217 if (error) {
218 xfs_trans_cancel(tp, 0);
219 return error;
220 }
221
222 xfs_ilock(ip, XFS_ILOCK_EXCL);
223
224 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
225 xfs_trans_ihold(tp, ip);
226 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
227 /* XXX(hch): ignoring the error here.. */
228 error = xfs_trans_commit(tp, 0);
229
230 xfs_iunlock(ip, XFS_ILOCK_EXCL);
231
232 xfs_log_force(mp, 0, log_flags);
233 return 0;
234}
235
236int
237xfs_sync_fsdata(
238 struct xfs_mount *mp,
239 int flags)
240{
241 struct xfs_buf *bp;
242 struct xfs_buf_log_item *bip;
243 int error = 0;
244
245 /*
246 * If this is xfssyncd() then only sync the superblock if we can
247 * lock it without sleeping and it is not pinned.
248 */
249 if (flags & SYNC_BDFLUSH) {
250 ASSERT(!(flags & SYNC_WAIT));
251
252 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
253 if (!bp)
254 goto out;
255
256 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
257 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
258 goto out_brelse;
259 } else {
260 bp = xfs_getsb(mp, 0);
261
262 /*
263 * If the buffer is pinned then push on the log so we won't
264 * get stuck waiting in the write for someone, maybe
265 * ourselves, to flush the log.
266 *
267 * Even though we just pushed the log above, we did not have
268 * the superblock buffer locked at that point so it can
269 * become pinned in between there and here.
270 */
271 if (XFS_BUF_ISPINNED(bp))
272 xfs_log_force(mp, 0, XFS_LOG_FORCE);
273 }
274
275
276 if (flags & SYNC_WAIT)
277 XFS_BUF_UNASYNC(bp);
278 else
279 XFS_BUF_ASYNC(bp);
280
281 return xfs_bwrite(mp, bp);
282
283 out_brelse:
284 xfs_buf_relse(bp);
285 out:
286 return error;
287}
288
289/*
290 * When remounting a filesystem read-only or freezing the filesystem, we have
291 * two phases to execute. This first phase is syncing the data before we
292 * quiesce the filesystem, and the second is flushing all the inodes out after
293 * we've waited for all the transactions created by the first phase to
294 * complete. The second phase ensures that the inodes are written to their
295 * location on disk rather than just existing in transactions in the log. This
296 * means after a quiesce there is no log replay required to write the inodes to
297 * disk (this is the main difference between a sync and a quiesce).
298 */
299/*
300 * First stage of freeze - no writers will make progress now we are here,
301 * so we flush delwri and delalloc buffers here, then wait for all I/O to
302 * complete. Data is frozen at that point. Metadata is not frozen,
303 * transactions can still occur here so don't bother flushing the buftarg
304 * because it'll just get dirty again.
305 */
306int
307xfs_quiesce_data(
308 struct xfs_mount *mp)
309{
310 int error;
311
312 /* push non-blocking */
313 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
314 XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
315 xfs_filestream_flush(mp);
316
317 /* push and block */
318 xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
319 XFS_QM_DQSYNC(mp, SYNC_WAIT);
320
321 /* write superblock and hoover up shutdown errors */
322 error = xfs_sync_fsdata(mp, 0);
323
324 /* flush data-only devices */
325 if (mp->m_rtdev_targp)
326 XFS_bflush(mp->m_rtdev_targp);
327
328 return error;
329}
330
331STATIC void
332xfs_quiesce_fs(
333 struct xfs_mount *mp)
334{
335 int count = 0, pincount;
336
337 xfs_flush_buftarg(mp->m_ddev_targp, 0);
338 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
339
340 /*
341 * This loop must run at least twice. The first instance of the loop
342 * will flush most meta data but that will generate more meta data
343 * (typically directory updates). Which then must be flushed and
344 * logged before we can write the unmount record.
345 */
346 do {
347 xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
348 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
349 if (!pincount) {
350 delay(50);
351 count++;
352 }
353 } while (count < 2);
354}
355
356/*
357 * Second stage of a quiesce. The data is already synced, now we have to take
358 * care of the metadata. New transactions are already blocked, so we need to
359 * wait for any remaining transactions to drain out before proceding.
360 */
361void
362xfs_quiesce_attr(
363 struct xfs_mount *mp)
364{
365 int error = 0;
366
367 /* wait for all modifications to complete */
368 while (atomic_read(&mp->m_active_trans) > 0)
369 delay(100);
370
371 /* flush inodes and push all remaining buffers out to disk */
372 xfs_quiesce_fs(mp);
373
374 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
375
376 /* Push the superblock and write an unmount record */
377 error = xfs_log_sbcount(mp, 1);
378 if (error)
379 xfs_fs_cmn_err(CE_WARN, mp,
380 "xfs_attr_quiesce: failed to log sb changes. "
381 "Frozen image may not be consistent.");
382 xfs_log_unmount_write(mp);
383 xfs_unmountfs_writesb(mp);
384}
385
386/*
387 * Enqueue a work item to be picked up by the vfs xfssyncd thread.
388 * Doing this has two advantages:
389 * - It saves on stack space, which is tight in certain situations
390 * - It can be used (with care) as a mechanism to avoid deadlocks.
391 * Flushing while allocating in a full filesystem requires both.
392 */
393STATIC void
394xfs_syncd_queue_work(
395 struct xfs_mount *mp,
396 void *data,
397 void (*syncer)(struct xfs_mount *, void *))
398{
399 struct bhv_vfs_sync_work *work;
400
401 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
402 INIT_LIST_HEAD(&work->w_list);
403 work->w_syncer = syncer;
404 work->w_data = data;
405 work->w_mount = mp;
406 spin_lock(&mp->m_sync_lock);
407 list_add_tail(&work->w_list, &mp->m_sync_list);
408 spin_unlock(&mp->m_sync_lock);
409 wake_up_process(mp->m_sync_task);
410}
411
412/*
413 * Flush delayed allocate data, attempting to free up reserved space
414 * from existing allocations. At this point a new allocation attempt
415 * has failed with ENOSPC and we are in the process of scratching our
416 * heads, looking about for more room...
417 */
418STATIC void
419xfs_flush_inode_work(
420 struct xfs_mount *mp,
421 void *arg)
422{
423 struct inode *inode = arg;
424 filemap_flush(inode->i_mapping);
425 iput(inode);
426}
427
428void
429xfs_flush_inode(
430 xfs_inode_t *ip)
431{
432 struct inode *inode = VFS_I(ip);
433
434 igrab(inode);
435 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
436 delay(msecs_to_jiffies(500));
437}
438
439/*
440 * This is the "bigger hammer" version of xfs_flush_inode_work...
441 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
442 */
443STATIC void
444xfs_flush_device_work(
445 struct xfs_mount *mp,
446 void *arg)
447{
448 struct inode *inode = arg;
449 sync_blockdev(mp->m_super->s_bdev);
450 iput(inode);
451}
452
453void
454xfs_flush_device(
455 xfs_inode_t *ip)
456{
457 struct inode *inode = VFS_I(ip);
458
459 igrab(inode);
460 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
461 delay(msecs_to_jiffies(500));
462 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
463}
464
465/*
466 * Every sync period we need to unpin all items, reclaim inodes, sync
467 * quota and write out the superblock. We might need to cover the log
468 * to indicate it is idle.
469 */
470STATIC void
471xfs_sync_worker(
472 struct xfs_mount *mp,
473 void *unused)
474{
475 int error;
476
477 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
478 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
479 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
480 /* dgc: errors ignored here */
481 error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
482 error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
483 if (xfs_log_need_covered(mp))
484 error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
485 }
486 mp->m_sync_seq++;
487 wake_up(&mp->m_wait_single_sync_task);
488}
489
490STATIC int
491xfssyncd(
492 void *arg)
493{
494 struct xfs_mount *mp = arg;
495 long timeleft;
496 bhv_vfs_sync_work_t *work, *n;
497 LIST_HEAD (tmp);
498
499 set_freezable();
500 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
501 for (;;) {
502 timeleft = schedule_timeout_interruptible(timeleft);
503 /* swsusp */
504 try_to_freeze();
505 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
506 break;
507
508 spin_lock(&mp->m_sync_lock);
509 /*
510 * We can get woken by laptop mode, to do a sync -
511 * that's the (only!) case where the list would be
512 * empty with time remaining.
513 */
514 if (!timeleft || list_empty(&mp->m_sync_list)) {
515 if (!timeleft)
516 timeleft = xfs_syncd_centisecs *
517 msecs_to_jiffies(10);
518 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
519 list_add_tail(&mp->m_sync_work.w_list,
520 &mp->m_sync_list);
521 }
522 list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
523 list_move(&work->w_list, &tmp);
524 spin_unlock(&mp->m_sync_lock);
525
526 list_for_each_entry_safe(work, n, &tmp, w_list) {
527 (*work->w_syncer)(mp, work->w_data);
528 list_del(&work->w_list);
529 if (work == &mp->m_sync_work)
530 continue;
531 kmem_free(work);
532 }
533 }
534
535 return 0;
536}
537
538int
539xfs_syncd_init(
540 struct xfs_mount *mp)
541{
542 mp->m_sync_work.w_syncer = xfs_sync_worker;
543 mp->m_sync_work.w_mount = mp;
544 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
545 if (IS_ERR(mp->m_sync_task))
546 return -PTR_ERR(mp->m_sync_task);
547 return 0;
548}
549
550void
551xfs_syncd_stop(
552 struct xfs_mount *mp)
553{
554 kthread_stop(mp->m_sync_task);
555}
556
557int
558xfs_reclaim_inode(
559 xfs_inode_t *ip,
560 int locked,
561 int sync_mode)
562{
563 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
564
565 /* The hash lock here protects a thread in xfs_iget_core from
566 * racing with us on linking the inode back with a vnode.
567 * Once we have the XFS_IRECLAIM flag set it will not touch
568 * us.
569 */
570 write_lock(&pag->pag_ici_lock);
571 spin_lock(&ip->i_flags_lock);
572 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
573 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
574 spin_unlock(&ip->i_flags_lock);
575 write_unlock(&pag->pag_ici_lock);
576 if (locked) {
577 xfs_ifunlock(ip);
578 xfs_iunlock(ip, XFS_ILOCK_EXCL);
579 }
580 return 1;
581 }
582 __xfs_iflags_set(ip, XFS_IRECLAIM);
583 spin_unlock(&ip->i_flags_lock);
584 write_unlock(&pag->pag_ici_lock);
585 xfs_put_perag(ip->i_mount, pag);
586
587 /*
588 * If the inode is still dirty, then flush it out. If the inode
589 * is not in the AIL, then it will be OK to flush it delwri as
590 * long as xfs_iflush() does not keep any references to the inode.
591 * We leave that decision up to xfs_iflush() since it has the
592 * knowledge of whether it's OK to simply do a delwri flush of
593 * the inode or whether we need to wait until the inode is
594 * pulled from the AIL.
595 * We get the flush lock regardless, though, just to make sure
596 * we don't free it while it is being flushed.
597 */
598 if (!locked) {
599 xfs_ilock(ip, XFS_ILOCK_EXCL);
600 xfs_iflock(ip);
601 }
602
603 /*
604 * In the case of a forced shutdown we rely on xfs_iflush() to
605 * wait for the inode to be unpinned before returning an error.
606 */
607 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
608 /* synchronize with xfs_iflush_done */
609 xfs_iflock(ip);
610 xfs_ifunlock(ip);
611 }
612
613 xfs_iunlock(ip, XFS_ILOCK_EXCL);
614 xfs_ireclaim(ip);
615 return 0;
616}
617
618/*
619 * We set the inode flag atomically with the radix tree tag.
620 * Once we get tag lookups on the radix tree, this inode flag
621 * can go away.
622 */
623void
624xfs_inode_set_reclaim_tag(
625 xfs_inode_t *ip)
626{
627 xfs_mount_t *mp = ip->i_mount;
628 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
629
630 read_lock(&pag->pag_ici_lock);
631 spin_lock(&ip->i_flags_lock);
632 radix_tree_tag_set(&pag->pag_ici_root,
633 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
634 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
635 spin_unlock(&ip->i_flags_lock);
636 read_unlock(&pag->pag_ici_lock);
637 xfs_put_perag(mp, pag);
638}
639
640void
641__xfs_inode_clear_reclaim_tag(
642 xfs_mount_t *mp,
643 xfs_perag_t *pag,
644 xfs_inode_t *ip)
645{
646 radix_tree_tag_clear(&pag->pag_ici_root,
647 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
648}
649
650void
651xfs_inode_clear_reclaim_tag(
652 xfs_inode_t *ip)
653{
654 xfs_mount_t *mp = ip->i_mount;
655 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
656
657 read_lock(&pag->pag_ici_lock);
658 spin_lock(&ip->i_flags_lock);
659 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
660 spin_unlock(&ip->i_flags_lock);
661 read_unlock(&pag->pag_ici_lock);
662 xfs_put_perag(mp, pag);
663}
664
665
666STATIC void
667xfs_reclaim_inodes_ag(
668 xfs_mount_t *mp,
669 int ag,
670 int noblock,
671 int mode)
672{
673 xfs_inode_t *ip = NULL;
674 xfs_perag_t *pag = &mp->m_perag[ag];
675 int nr_found;
676 uint32_t first_index;
677 int skipped;
678
679restart:
680 first_index = 0;
681 skipped = 0;
682 do {
683 /*
684 * use a gang lookup to find the next inode in the tree
685 * as the tree is sparse and a gang lookup walks to find
686 * the number of objects requested.
687 */
688 read_lock(&pag->pag_ici_lock);
689 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
690 (void**)&ip, first_index, 1,
691 XFS_ICI_RECLAIM_TAG);
692
693 if (!nr_found) {
694 read_unlock(&pag->pag_ici_lock);
695 break;
696 }
697
698 /*
699 * Update the index for the next lookup. Catch overflows
700 * into the next AG range which can occur if we have inodes
701 * in the last block of the AG and we are currently
702 * pointing to the last inode.
703 */
704 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
705 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
706 read_unlock(&pag->pag_ici_lock);
707 break;
708 }
709
710 /* ignore if already under reclaim */
711 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
712 read_unlock(&pag->pag_ici_lock);
713 continue;
714 }
715
716 if (noblock) {
717 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
718 read_unlock(&pag->pag_ici_lock);
719 continue;
720 }
721 if (xfs_ipincount(ip) ||
722 !xfs_iflock_nowait(ip)) {
723 xfs_iunlock(ip, XFS_ILOCK_EXCL);
724 read_unlock(&pag->pag_ici_lock);
725 continue;
726 }
727 }
728 read_unlock(&pag->pag_ici_lock);
729
730 /*
731 * hmmm - this is an inode already in reclaim. Do
732 * we even bother catching it here?
733 */
734 if (xfs_reclaim_inode(ip, noblock, mode))
735 skipped++;
736 } while (nr_found);
737
738 if (skipped) {
739 delay(1);
740 goto restart;
741 }
742 return;
743
744}
745
746int
747xfs_reclaim_inodes(
748 xfs_mount_t *mp,
749 int noblock,
750 int mode)
751{
752 int i;
753
754 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
755 if (!mp->m_perag[i].pag_ici_init)
756 continue;
757 xfs_reclaim_inodes_ag(mp, i, noblock, mode);
758 }
759 return 0;
760}
761
762
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
new file mode 100644
index 000000000000..5f6de1efe1f6
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef XFS_SYNC_H
19#define XFS_SYNC_H 1
20
21struct xfs_mount;
22
23typedef struct bhv_vfs_sync_work {
24 struct list_head w_list;
25 struct xfs_mount *w_mount;
26 void *w_data; /* syncer routine argument */
27 void (*w_syncer)(struct xfs_mount *, void *);
28} bhv_vfs_sync_work_t;
29
30#define SYNC_ATTR 0x0001 /* sync attributes */
31#define SYNC_DELWRI 0x0002 /* look at delayed writes */
32#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
33#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
34#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
35
36int xfs_syncd_init(struct xfs_mount *mp);
37void xfs_syncd_stop(struct xfs_mount *mp);
38
39int xfs_sync_inodes(struct xfs_mount *mp, int flags);
40int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
41
42int xfs_quiesce_data(struct xfs_mount *mp);
43void xfs_quiesce_attr(struct xfs_mount *mp);
44
45void xfs_flush_inode(struct xfs_inode *ip);
46void xfs_flush_device(struct xfs_inode *ip);
47
48int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
49int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
50
51void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
52void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
53void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54 struct xfs_inode *ip);
55#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 7dacb5bbde3f..916c0ffb6083 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler(
56 56
57static ctl_table xfs_table[] = { 57static ctl_table xfs_table[] = {
58 { 58 {
59 .ctl_name = XFS_RESTRICT_CHOWN,
60 .procname = "restrict_chown",
61 .data = &xfs_params.restrict_chown.val,
62 .maxlen = sizeof(int),
63 .mode = 0644,
64 .proc_handler = &proc_dointvec_minmax,
65 .strategy = &sysctl_intvec,
66 .extra1 = &xfs_params.restrict_chown.min,
67 .extra2 = &xfs_params.restrict_chown.max
68 },
69 {
70 .ctl_name = XFS_SGID_INHERIT, 59 .ctl_name = XFS_SGID_INHERIT,
71 .procname = "irix_sgid_inherit", 60 .procname = "irix_sgid_inherit",
72 .data = &xfs_params.sgid_inherit.val, 61 .data = &xfs_params.sgid_inherit.val,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 4aadb8056c37..b9937d450f8e 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val {
31} xfs_sysctl_val_t; 31} xfs_sysctl_val_t;
32 32
33typedef struct xfs_param { 33typedef struct xfs_param {
34 xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/
35 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is 34 xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
36 * not a member of parent dir GID. */ 35 * not a member of parent dir GID. */
37 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ 36 xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
@@ -68,7 +67,7 @@ typedef struct xfs_param {
68enum { 67enum {
69 /* XFS_REFCACHE_SIZE = 1 */ 68 /* XFS_REFCACHE_SIZE = 1 */
70 /* XFS_REFCACHE_PURGE = 2 */ 69 /* XFS_REFCACHE_PURGE = 2 */
71 XFS_RESTRICT_CHOWN = 3, 70 /* XFS_RESTRICT_CHOWN = 3 */
72 XFS_SGID_INHERIT = 4, 71 XFS_SGID_INHERIT = 4,
73 XFS_SYMLINK_MODE = 5, 72 XFS_SYMLINK_MODE = 5,
74 XFS_PANIC_MASK = 6, 73 XFS_PANIC_MASK = 6,
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
deleted file mode 100644
index 7e60c7776b1c..000000000000
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ /dev/null
@@ -1,77 +0,0 @@
1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_VFS_H__
19#define __XFS_VFS_H__
20
21#include <linux/vfs.h>
22#include "xfs_fs.h"
23
24struct inode;
25
26struct fid;
27struct cred;
28struct seq_file;
29struct super_block;
30struct xfs_inode;
31struct xfs_mount;
32struct xfs_mount_args;
33
34typedef struct kstatfs bhv_statvfs_t;
35
36typedef struct bhv_vfs_sync_work {
37 struct list_head w_list;
38 struct xfs_mount *w_mount;
39 void *w_data; /* syncer routine argument */
40 void (*w_syncer)(struct xfs_mount *, void *);
41} bhv_vfs_sync_work_t;
42
43#define SYNC_ATTR 0x0001 /* sync attributes */
44#define SYNC_CLOSE 0x0002 /* close file system down */
45#define SYNC_DELWRI 0x0004 /* look at delayed writes */
46#define SYNC_WAIT 0x0008 /* wait for i/o to complete */
47#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */
48#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
49#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
50#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
51#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
52
53/*
54 * When remounting a filesystem read-only or freezing the filesystem,
55 * we have two phases to execute. This first phase is syncing the data
56 * before we quiesce the fielsystem, and the second is flushing all the
57 * inodes out after we've waited for all the transactions created by
58 * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE
59 * to ensure that the inodes are written to their location on disk
60 * rather than just existing in transactions in the log. This means
61 * after a quiesce there is no log replay required to write the inodes
62 * to disk (this is the main difference between a sync and a quiesce).
63 */
64#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT)
65#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT)
66
67#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
68#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
69#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
70#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
71#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
72#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
73
74#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
75#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
76
77#endif /* __XFS_VFS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
deleted file mode 100644
index b52528bbbfff..000000000000
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ /dev/null
@@ -1,145 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_vnodeops.h"
20#include "xfs_bmap_btree.h"
21#include "xfs_inode.h"
22
23/*
24 * And this gunk is needed for xfs_mount.h"
25 */
26#include "xfs_log.h"
27#include "xfs_trans.h"
28#include "xfs_sb.h"
29#include "xfs_dmapi.h"
30#include "xfs_inum.h"
31#include "xfs_ag.h"
32#include "xfs_mount.h"
33
34
35/*
36 * Dedicated vnode inactive/reclaim sync wait queues.
37 * Prime number of hash buckets since address is used as the key.
38 */
39#define NVSYNC 37
40#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
41static wait_queue_head_t vsync[NVSYNC];
42
43void __init
44vn_init(void)
45{
46 int i;
47
48 for (i = 0; i < NVSYNC; i++)
49 init_waitqueue_head(&vsync[i]);
50}
51
52void
53vn_iowait(
54 xfs_inode_t *ip)
55{
56 wait_queue_head_t *wq = vptosync(ip);
57
58 wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
59}
60
61void
62vn_iowake(
63 xfs_inode_t *ip)
64{
65 if (atomic_dec_and_test(&ip->i_iocount))
66 wake_up(vptosync(ip));
67}
68
69/*
70 * Volume managers supporting multiple paths can send back ENODEV when the
71 * final path disappears. In this case continuing to fill the page cache
72 * with dirty data which cannot be written out is evil, so prevent that.
73 */
74void
75vn_ioerror(
76 xfs_inode_t *ip,
77 int error,
78 char *f,
79 int l)
80{
81 if (unlikely(error == -ENODEV))
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83}
84
85#ifdef XFS_INODE_TRACE
86
87/*
88 * Reference count of Linux inode if present, -1 if the xfs_inode
89 * has no associated Linux inode.
90 */
91static inline int xfs_icount(struct xfs_inode *ip)
92{
93 struct inode *vp = VFS_I(ip);
94
95 if (vp)
96 return vn_count(vp);
97 return -1;
98}
99
100#define KTRACE_ENTER(ip, vk, s, line, ra) \
101 ktrace_enter( (ip)->i_trace, \
102/* 0 */ (void *)(__psint_t)(vk), \
103/* 1 */ (void *)(s), \
104/* 2 */ (void *)(__psint_t) line, \
105/* 3 */ (void *)(__psint_t)xfs_icount(ip), \
106/* 4 */ (void *)(ra), \
107/* 5 */ NULL, \
108/* 6 */ (void *)(__psint_t)current_cpu(), \
109/* 7 */ (void *)(__psint_t)current_pid(), \
110/* 8 */ (void *)__return_address, \
111/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
112
113/*
114 * Vnode tracing code.
115 */
116void
117_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
118{
119 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
120}
121
122void
123_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
124{
125 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
126}
127
128void
129xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
130{
131 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
132}
133
134void
135_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
136{
137 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
138}
139
140void
141xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
142{
143 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
144}
145#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 683ce16210ff..f65983a230d3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -18,7 +18,10 @@
18#ifndef __XFS_VNODE_H__ 18#ifndef __XFS_VNODE_H__
19#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
20 20
21#include "xfs_fs.h"
22
21struct file; 23struct file;
24struct xfs_inode;
22struct xfs_iomap; 25struct xfs_iomap;
23struct attrlist_cursor_kern; 26struct attrlist_cursor_kern;
24 27
@@ -51,40 +54,6 @@ struct attrlist_cursor_kern;
51 Prevent VM access to the pages until 54 Prevent VM access to the pages until
52 the operation completes. */ 55 the operation completes. */
53 56
54
55extern void vn_init(void);
56
57/*
58 * Yeah, these don't take vnode anymore at all, all this should be
59 * cleaned up at some point.
60 */
61extern void vn_iowait(struct xfs_inode *ip);
62extern void vn_iowake(struct xfs_inode *ip);
63extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l);
64
65static inline int vn_count(struct inode *vp)
66{
67 return atomic_read(&vp->i_count);
68}
69
70#define IHOLD(ip) \
71do { \
72 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
73 atomic_inc(&(VFS_I(ip)->i_count)); \
74 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
75} while (0)
76
77#define IRELE(ip) \
78do { \
79 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
80 iput(VFS_I(ip)); \
81} while (0)
82
83static inline struct inode *vn_grab(struct inode *vp)
84{
85 return igrab(vp);
86}
87
88/* 57/*
89 * Dealing with bad inodes 58 * Dealing with bad inodes
90 */ 59 */
@@ -121,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt)
121 PAGECACHE_TAG_DIRTY) 90 PAGECACHE_TAG_DIRTY)
122 91
123 92
124/*
125 * Tracking vnode activity.
126 */
127#if defined(XFS_INODE_TRACE)
128
129#define INODE_TRACE_SIZE 16 /* number of trace entries */
130#define INODE_KTRACE_ENTRY 1
131#define INODE_KTRACE_EXIT 2
132#define INODE_KTRACE_HOLD 3
133#define INODE_KTRACE_REF 4
134#define INODE_KTRACE_RELE 5
135
136extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
137extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
138extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
139extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
140extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
141#define xfs_itrace_entry(ip) \
142 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
143#define xfs_itrace_exit(ip) \
144 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
145#define xfs_itrace_exit_tag(ip, tag) \
146 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
147#define xfs_itrace_ref(ip) \
148 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
149
150#else
151#define xfs_itrace_entry(a)
152#define xfs_itrace_exit(a)
153#define xfs_itrace_exit_tag(a, b)
154#define xfs_itrace_hold(a, b, c, d)
155#define xfs_itrace_ref(a)
156#define xfs_itrace_rele(a, b, c, d)
157#endif
158
159#endif /* __XFS_VNODE_H__ */ 93#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index f2705f2fd43c..591ca6602bfb 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
101 if (brandnewdquot) { 101 if (brandnewdquot) {
102 dqp->dq_flnext = dqp->dq_flprev = dqp; 102 dqp->dq_flnext = dqp->dq_flprev = dqp;
103 mutex_init(&dqp->q_qlock); 103 mutex_init(&dqp->q_qlock);
104 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); 104 init_waitqueue_head(&dqp->q_pinwait);
105 105
106 /* 106 /*
107 * Because we want to use a counting completion, complete 107 * Because we want to use a counting completion, complete
@@ -131,7 +131,7 @@ xfs_qm_dqinit(
131 dqp->q_res_bcount = 0; 131 dqp->q_res_bcount = 0;
132 dqp->q_res_icount = 0; 132 dqp->q_res_icount = 0;
133 dqp->q_res_rtbcount = 0; 133 dqp->q_res_rtbcount = 0;
134 dqp->q_pincount = 0; 134 atomic_set(&dqp->q_pincount, 0);
135 dqp->q_hash = NULL; 135 dqp->q_hash = NULL;
136 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 136 ASSERT(dqp->dq_flnext == dqp->dq_flprev);
137 137
@@ -1221,16 +1221,14 @@ xfs_qm_dqflush(
1221 xfs_dqtrace_entry(dqp, "DQFLUSH"); 1221 xfs_dqtrace_entry(dqp, "DQFLUSH");
1222 1222
1223 /* 1223 /*
1224 * If not dirty, nada. 1224 * If not dirty, or it's pinned and we are not supposed to
1225 * block, nada.
1225 */ 1226 */
1226 if (!XFS_DQ_IS_DIRTY(dqp)) { 1227 if (!XFS_DQ_IS_DIRTY(dqp) ||
1228 (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) {
1227 xfs_dqfunlock(dqp); 1229 xfs_dqfunlock(dqp);
1228 return (0); 1230 return 0;
1229 } 1231 }
1230
1231 /*
1232 * Cant flush a pinned dquot. Wait for it.
1233 */
1234 xfs_qm_dqunpin_wait(dqp); 1232 xfs_qm_dqunpin_wait(dqp);
1235 1233
1236 /* 1234 /*
@@ -1274,10 +1272,8 @@ xfs_qm_dqflush(
1274 dqp->dq_flags &= ~(XFS_DQ_DIRTY); 1272 dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1275 mp = dqp->q_mount; 1273 mp = dqp->q_mount;
1276 1274
1277 /* lsn is 64 bits */ 1275 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
1278 spin_lock(&mp->m_ail_lock); 1276 &dqp->q_logitem.qli_item.li_lsn);
1279 dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
1280 spin_unlock(&mp->m_ail_lock);
1281 1277
1282 /* 1278 /*
1283 * Attach an iodone routine so that we can remove this dquot from the 1279 * Attach an iodone routine so that we can remove this dquot from the
@@ -1323,8 +1319,10 @@ xfs_qm_dqflush_done(
1323 xfs_dq_logitem_t *qip) 1319 xfs_dq_logitem_t *qip)
1324{ 1320{
1325 xfs_dquot_t *dqp; 1321 xfs_dquot_t *dqp;
1322 struct xfs_ail *ailp;
1326 1323
1327 dqp = qip->qli_dquot; 1324 dqp = qip->qli_dquot;
1325 ailp = qip->qli_item.li_ailp;
1328 1326
1329 /* 1327 /*
1330 * We only want to pull the item from the AIL if its 1328 * We only want to pull the item from the AIL if its
@@ -1337,15 +1335,12 @@ xfs_qm_dqflush_done(
1337 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && 1335 if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1338 qip->qli_item.li_lsn == qip->qli_flush_lsn) { 1336 qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1339 1337
1340 spin_lock(&dqp->q_mount->m_ail_lock); 1338 /* xfs_trans_ail_delete() drops the AIL lock. */
1341 /* 1339 spin_lock(&ailp->xa_lock);
1342 * xfs_trans_delete_ail() drops the AIL lock.
1343 */
1344 if (qip->qli_item.li_lsn == qip->qli_flush_lsn) 1340 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1345 xfs_trans_delete_ail(dqp->q_mount, 1341 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip);
1346 (xfs_log_item_t*)qip);
1347 else 1342 else
1348 spin_unlock(&dqp->q_mount->m_ail_lock); 1343 spin_unlock(&ailp->xa_lock);
1349 } 1344 }
1350 1345
1351 /* 1346 /*
@@ -1375,7 +1370,7 @@ xfs_dqunlock(
1375 mutex_unlock(&(dqp->q_qlock)); 1370 mutex_unlock(&(dqp->q_qlock));
1376 if (dqp->q_logitem.qli_dquot == dqp) { 1371 if (dqp->q_logitem.qli_dquot == dqp) {
1377 /* Once was dqp->q_mount, but might just have been cleared */ 1372 /* Once was dqp->q_mount, but might just have been cleared */
1378 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, 1373 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1379 (xfs_log_item_t*)&(dqp->q_logitem)); 1374 (xfs_log_item_t*)&(dqp->q_logitem));
1380 } 1375 }
1381} 1376}
@@ -1489,7 +1484,7 @@ xfs_qm_dqpurge(
1489 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1484 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
1490 xfs_dqflock(dqp); 1485 xfs_dqflock(dqp);
1491 } 1486 }
1492 ASSERT(dqp->q_pincount == 0); 1487 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1493 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1488 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1494 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1489 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1495 1490
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 8958d0faf8d3..7e455337e2ba 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -83,8 +83,8 @@ typedef struct xfs_dquot {
83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ 83 xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
84 mutex_t q_qlock; /* quota lock */ 84 mutex_t q_qlock; /* quota lock */
85 struct completion q_flush; /* flush completion queue */ 85 struct completion q_flush; /* flush completion queue */
86 uint q_pincount; /* pin count for this dquot */ 86 atomic_t q_pincount; /* dquot pin count */
87 sv_t q_pinwait; /* sync var for pinning */ 87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88#ifdef XFS_DQUOT_TRACE 88#ifdef XFS_DQUOT_TRACE
89 struct ktrace *q_trace; /* trace header structure */ 89 struct ktrace *q_trace; /* trace header structure */
90#endif 90#endif
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index f028644caa5e..1728f6a7c4f5 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format(
88 88
89/* 89/*
90 * Increment the pin count of the given dquot. 90 * Increment the pin count of the given dquot.
91 * This value is protected by pinlock spinlock in the xQM structure.
92 */ 91 */
93STATIC void 92STATIC void
94xfs_qm_dquot_logitem_pin( 93xfs_qm_dquot_logitem_pin(
95 xfs_dq_logitem_t *logitem) 94 xfs_dq_logitem_t *logitem)
96{ 95{
97 xfs_dquot_t *dqp; 96 xfs_dquot_t *dqp = logitem->qli_dquot;
98 97
99 dqp = logitem->qli_dquot;
100 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 98 ASSERT(XFS_DQ_IS_LOCKED(dqp));
101 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 99 atomic_inc(&dqp->q_pincount);
102 dqp->q_pincount++;
103 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
104} 100}
105 101
106/* 102/*
107 * Decrement the pin count of the given dquot, and wake up 103 * Decrement the pin count of the given dquot, and wake up
108 * anyone in xfs_dqwait_unpin() if the count goes to 0. The 104 * anyone in xfs_dqwait_unpin() if the count goes to 0. The
109 * dquot must have been previously pinned with a call to xfs_dqpin(). 105 * dquot must have been previously pinned with a call to
106 * xfs_qm_dquot_logitem_pin().
110 */ 107 */
111/* ARGSUSED */ 108/* ARGSUSED */
112STATIC void 109STATIC void
@@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin(
114 xfs_dq_logitem_t *logitem, 111 xfs_dq_logitem_t *logitem,
115 int stale) 112 int stale)
116{ 113{
117 xfs_dquot_t *dqp; 114 xfs_dquot_t *dqp = logitem->qli_dquot;
118 115
119 dqp = logitem->qli_dquot; 116 ASSERT(atomic_read(&dqp->q_pincount) > 0);
120 ASSERT(dqp->q_pincount > 0); 117 if (atomic_dec_and_test(&dqp->q_pincount))
121 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 118 wake_up(&dqp->q_pinwait);
122 dqp->q_pincount--;
123 if (dqp->q_pincount == 0) {
124 sv_broadcast(&dqp->q_pinwait);
125 }
126 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
127} 119}
128 120
129/* ARGSUSED */ 121/* ARGSUSED */
@@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait(
193 xfs_dquot_t *dqp) 185 xfs_dquot_t *dqp)
194{ 186{
195 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 187 ASSERT(XFS_DQ_IS_LOCKED(dqp));
196 if (dqp->q_pincount == 0) { 188 if (atomic_read(&dqp->q_pincount) == 0)
197 return; 189 return;
198 }
199 190
200 /* 191 /*
201 * Give the log a push so we don't wait here too long. 192 * Give the log a push so we don't wait here too long.
202 */ 193 */
203 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); 194 xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE);
204 spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); 195 wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
205 if (dqp->q_pincount == 0) {
206 spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock));
207 return;
208 }
209 sv_wait(&(dqp->q_pinwait), PINOD,
210 &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s);
211} 196}
212 197
213/* 198/*
@@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock(
310 uint retval; 295 uint retval;
311 296
312 dqp = qip->qli_dquot; 297 dqp = qip->qli_dquot;
313 if (dqp->q_pincount > 0) 298 if (atomic_read(&dqp->q_pincount) > 0)
314 return (XFS_ITEM_PINNED); 299 return (XFS_ITEM_PINNED);
315 300
316 if (! xfs_qm_dqlock_nowait(dqp)) 301 if (! xfs_qm_dqlock_nowait(dqp))
@@ -568,14 +553,16 @@ xfs_qm_qoffend_logitem_committed(
568 xfs_lsn_t lsn) 553 xfs_lsn_t lsn)
569{ 554{
570 xfs_qoff_logitem_t *qfs; 555 xfs_qoff_logitem_t *qfs;
556 struct xfs_ail *ailp;
571 557
572 qfs = qfe->qql_start_lip; 558 qfs = qfe->qql_start_lip;
573 spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); 559 ailp = qfs->qql_item.li_ailp;
560 spin_lock(&ailp->xa_lock);
574 /* 561 /*
575 * Delete the qoff-start logitem from the AIL. 562 * Delete the qoff-start logitem from the AIL.
576 * xfs_trans_delete_ail() drops the AIL lock. 563 * xfs_trans_ail_delete() drops the AIL lock.
577 */ 564 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 565 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs); 566 kmem_free(qfs);
580 kmem_free(qfe); 567 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 568 return (xfs_lsn_t)-1;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index df0ffef9775a..6b13960cf318 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
@@ -396,13 +395,10 @@ xfs_qm_mount_quotas(
396/* 395/*
397 * Called from the vfsops layer. 396 * Called from the vfsops layer.
398 */ 397 */
399int 398void
400xfs_qm_unmount_quotas( 399xfs_qm_unmount_quotas(
401 xfs_mount_t *mp) 400 xfs_mount_t *mp)
402{ 401{
403 xfs_inode_t *uqp, *gqp;
404 int error = 0;
405
406 /* 402 /*
407 * Release the dquots that root inode, et al might be holding, 403 * Release the dquots that root inode, et al might be holding,
408 * before we flush quotas and blow away the quotainfo structure. 404 * before we flush quotas and blow away the quotainfo structure.
@@ -415,43 +411,18 @@ xfs_qm_unmount_quotas(
415 xfs_qm_dqdetach(mp->m_rsumip); 411 xfs_qm_dqdetach(mp->m_rsumip);
416 412
417 /* 413 /*
418 * Flush out the quota inodes. 414 * Release the quota inodes.
419 */ 415 */
420 uqp = gqp = NULL;
421 if (mp->m_quotainfo) { 416 if (mp->m_quotainfo) {
422 if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) { 417 if (mp->m_quotainfo->qi_uquotaip) {
423 xfs_ilock(uqp, XFS_ILOCK_EXCL); 418 IRELE(mp->m_quotainfo->qi_uquotaip);
424 xfs_iflock(uqp); 419 mp->m_quotainfo->qi_uquotaip = NULL;
425 error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
426 xfs_iunlock(uqp, XFS_ILOCK_EXCL);
427 if (unlikely(error == EFSCORRUPTED)) {
428 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
429 XFS_ERRLEVEL_LOW, mp);
430 goto out;
431 }
432 } 420 }
433 if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) { 421 if (mp->m_quotainfo->qi_gquotaip) {
434 xfs_ilock(gqp, XFS_ILOCK_EXCL); 422 IRELE(mp->m_quotainfo->qi_gquotaip);
435 xfs_iflock(gqp); 423 mp->m_quotainfo->qi_gquotaip = NULL;
436 error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
437 xfs_iunlock(gqp, XFS_ILOCK_EXCL);
438 if (unlikely(error == EFSCORRUPTED)) {
439 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
440 XFS_ERRLEVEL_LOW, mp);
441 goto out;
442 }
443 } 424 }
444 } 425 }
445 if (uqp) {
446 IRELE(uqp);
447 mp->m_quotainfo->qi_uquotaip = NULL;
448 }
449 if (gqp) {
450 IRELE(gqp);
451 mp->m_quotainfo->qi_gquotaip = NULL;
452 }
453out:
454 return XFS_ERROR(error);
455} 426}
456 427
457/* 428/*
@@ -987,14 +958,10 @@ xfs_qm_dqdetach(
987} 958}
988 959
989/* 960/*
990 * This is called by VFS_SYNC and flags arg determines the caller, 961 * This is called to sync quotas. We can be told to use non-blocking
991 * and its motives, as done in xfs_sync. 962 * semantics by either the SYNC_BDFLUSH flag or the absence of the
992 * 963 * SYNC_WAIT flag.
993 * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
994 * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
995 * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
996 */ 964 */
997
998int 965int
999xfs_qm_sync( 966xfs_qm_sync(
1000 xfs_mount_t *mp, 967 xfs_mount_t *mp,
@@ -1137,7 +1104,6 @@ xfs_qm_init_quotainfo(
1137 return error; 1104 return error;
1138 } 1105 }
1139 1106
1140 spin_lock_init(&qinf->qi_pinlock);
1141 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); 1107 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1142 qinf->qi_dqreclaims = 0; 1108 qinf->qi_dqreclaims = 0;
1143 1109
@@ -1234,7 +1200,6 @@ xfs_qm_destroy_quotainfo(
1234 */ 1200 */
1235 xfs_qm_rele_quotafs_ref(mp); 1201 xfs_qm_rele_quotafs_ref(mp);
1236 1202
1237 spinlock_destroy(&qi->qi_pinlock);
1238 xfs_qm_list_destroy(&qi->qi_dqlist); 1203 xfs_qm_list_destroy(&qi->qi_dqlist);
1239 1204
1240 if (qi->qi_uquotaip) { 1205 if (qi->qi_uquotaip) {
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 44f25349e478..ddf09166387c 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -106,7 +106,6 @@ typedef struct xfs_qm {
106typedef struct xfs_quotainfo { 106typedef struct xfs_quotainfo {
107 xfs_inode_t *qi_uquotaip; /* user quota inode */ 107 xfs_inode_t *qi_uquotaip; /* user quota inode */
108 xfs_inode_t *qi_gquotaip; /* group quota inode */ 108 xfs_inode_t *qi_gquotaip; /* group quota inode */
109 spinlock_t qi_pinlock; /* dquot pinning lock */
110 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ 109 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */
111 int qi_dqreclaims; /* a change here indicates 110 int qi_dqreclaims; /* a change here indicates
112 a removal in the dqlist */ 111 a removal in the dqlist */
@@ -168,7 +167,7 @@ extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
168extern void xfs_qm_mount_quotas(xfs_mount_t *); 167extern void xfs_qm_mount_quotas(xfs_mount_t *);
169extern int xfs_qm_quotacheck(xfs_mount_t *); 168extern int xfs_qm_quotacheck(xfs_mount_t *);
170extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); 169extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
171extern int xfs_qm_unmount_quotas(xfs_mount_t *); 170extern void xfs_qm_unmount_quotas(xfs_mount_t *);
172extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); 171extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
173extern int xfs_qm_sync(xfs_mount_t *, int); 172extern int xfs_qm_sync(xfs_mount_t *, int);
174 173
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index eea2e60b456b..bc6c5cca3e12 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -20,7 +20,6 @@
20#include "xfs_bit.h" 20#include "xfs_bit.h"
21#include "xfs_log.h" 21#include "xfs_log.h"
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_clnt.h"
24#include "xfs_trans.h" 23#include "xfs_trans.h"
25#include "xfs_sb.h" 24#include "xfs_sb.h"
26#include "xfs_ag.h" 25#include "xfs_ag.h"
@@ -51,7 +50,7 @@
51 50
52STATIC void 51STATIC void
53xfs_fill_statvfs_from_dquot( 52xfs_fill_statvfs_from_dquot(
54 bhv_statvfs_t *statp, 53 struct kstatfs *statp,
55 xfs_disk_dquot_t *dp) 54 xfs_disk_dquot_t *dp)
56{ 55{
57 __uint64_t limit; 56 __uint64_t limit;
@@ -88,7 +87,7 @@ xfs_fill_statvfs_from_dquot(
88STATIC void 87STATIC void
89xfs_qm_statvfs( 88xfs_qm_statvfs(
90 xfs_inode_t *ip, 89 xfs_inode_t *ip,
91 bhv_statvfs_t *statp) 90 struct kstatfs *statp)
92{ 91{
93 xfs_mount_t *mp = ip->i_mount; 92 xfs_mount_t *mp = ip->i_mount;
94 xfs_dquot_t *dqp; 93 xfs_dquot_t *dqp;
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 1a3b803dfa55..68139b38aede 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -127,7 +127,7 @@ xfs_qm_quotactl(
127 break; 127 break;
128 128
129 case Q_XQUOTASYNC: 129 case Q_XQUOTASYNC:
130 return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); 130 return xfs_sync_inodes(mp, SYNC_DELWRI);
131 131
132 default: 132 default:
133 break; 133 break;
@@ -1022,101 +1022,104 @@ xfs_qm_export_flags(
1022 1022
1023 1023
1024/* 1024/*
1025 * Go thru all the inodes in the file system, releasing their dquots. 1025 * Release all the dquots on the inodes in an AG.
1026 * Note that the mount structure gets modified to indicate that quotas are off
1027 * AFTER this, in the case of quotaoff. This also gets called from
1028 * xfs_rootumount.
1029 */ 1026 */
1030void 1027STATIC void
1031xfs_qm_dqrele_all_inodes( 1028xfs_qm_dqrele_inodes_ag(
1032 struct xfs_mount *mp, 1029 xfs_mount_t *mp,
1033 uint flags) 1030 int ag,
1031 uint flags)
1034{ 1032{
1035 xfs_inode_t *ip, *topino; 1033 xfs_inode_t *ip = NULL;
1036 uint ireclaims; 1034 xfs_perag_t *pag = &mp->m_perag[ag];
1037 struct inode *vp; 1035 int first_index = 0;
1038 boolean_t vnode_refd; 1036 int nr_found;
1039 1037
1040 ASSERT(mp->m_quotainfo);
1041
1042 XFS_MOUNT_ILOCK(mp);
1043again:
1044 ip = mp->m_inodes;
1045 if (ip == NULL) {
1046 XFS_MOUNT_IUNLOCK(mp);
1047 return;
1048 }
1049 do { 1038 do {
1050 /* Skip markers inserted by xfs_sync */ 1039 /*
1051 if (ip->i_mount == NULL) { 1040 * use a gang lookup to find the next inode in the tree
1052 ip = ip->i_mnext; 1041 * as the tree is sparse and a gang lookup walks to find
1053 continue; 1042 * the number of objects requested.
1043 */
1044 read_lock(&pag->pag_ici_lock);
1045 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
1046 (void**)&ip, first_index, 1);
1047
1048 if (!nr_found) {
1049 read_unlock(&pag->pag_ici_lock);
1050 break;
1054 } 1051 }
1055 /* Root inode, rbmip and rsumip have associated blocks */ 1052
1053 /*
1054 * Update the index for the next lookup. Catch overflows
1055 * into the next AG range which can occur if we have inodes
1056 * in the last block of the AG and we are currently
1057 * pointing to the last inode.
1058 */
1059 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
1060 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
1061 read_unlock(&pag->pag_ici_lock);
1062 break;
1063 }
1064
1065 /* skip quota inodes */
1056 if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { 1066 if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
1057 ASSERT(ip->i_udquot == NULL); 1067 ASSERT(ip->i_udquot == NULL);
1058 ASSERT(ip->i_gdquot == NULL); 1068 ASSERT(ip->i_gdquot == NULL);
1059 ip = ip->i_mnext; 1069 read_unlock(&pag->pag_ici_lock);
1060 continue; 1070 continue;
1061 } 1071 }
1062 vp = VFS_I(ip); 1072
1063 if (!vp) { 1073 /*
1064 ASSERT(ip->i_udquot == NULL); 1074 * If we can't get a reference on the inode, it must be
1065 ASSERT(ip->i_gdquot == NULL); 1075 * in reclaim. Leave it for the reclaim code to flush.
1066 ip = ip->i_mnext; 1076 */
1077 if (!igrab(VFS_I(ip))) {
1078 read_unlock(&pag->pag_ici_lock);
1067 continue; 1079 continue;
1068 } 1080 }
1069 vnode_refd = B_FALSE; 1081 read_unlock(&pag->pag_ici_lock);
1070 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { 1082
1071 ireclaims = mp->m_ireclaims; 1083 /* avoid new inodes though we shouldn't find any here */
1072 topino = mp->m_inodes; 1084 if (xfs_iflags_test(ip, XFS_INEW)) {
1073 vp = vn_grab(vp); 1085 IRELE(ip);
1074 if (!vp) 1086 continue;
1075 goto again;
1076
1077 XFS_MOUNT_IUNLOCK(mp);
1078 /* XXX restart limit ? */
1079 xfs_ilock(ip, XFS_ILOCK_EXCL);
1080 vnode_refd = B_TRUE;
1081 } else {
1082 ireclaims = mp->m_ireclaims;
1083 topino = mp->m_inodes;
1084 XFS_MOUNT_IUNLOCK(mp);
1085 } 1087 }
1086 1088
1087 /* 1089 xfs_ilock(ip, XFS_ILOCK_EXCL);
1088 * We don't keep the mountlock across the dqrele() call,
1089 * since it can take a while..
1090 */
1091 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { 1090 if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
1092 xfs_qm_dqrele(ip->i_udquot); 1091 xfs_qm_dqrele(ip->i_udquot);
1093 ip->i_udquot = NULL; 1092 ip->i_udquot = NULL;
1094 } 1093 }
1095 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { 1094 if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
1095 ip->i_gdquot) {
1096 xfs_qm_dqrele(ip->i_gdquot); 1096 xfs_qm_dqrele(ip->i_gdquot);
1097 ip->i_gdquot = NULL; 1097 ip->i_gdquot = NULL;
1098 } 1098 }
1099 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1099 xfs_iput(ip, XFS_ILOCK_EXCL);
1100 /* 1100
1101 * Wait until we've dropped the ilock and mountlock to 1101 } while (nr_found);
1102 * do the vn_rele. Or be condemned to an eternity in the 1102}
1103 * inactive code in hell. 1103
1104 */ 1104/*
1105 if (vnode_refd) 1105 * Go thru all the inodes in the file system, releasing their dquots.
1106 IRELE(ip); 1106 * Note that the mount structure gets modified to indicate that quotas are off
1107 XFS_MOUNT_ILOCK(mp); 1107 * AFTER this, in the case of quotaoff. This also gets called from
1108 /* 1108 * xfs_rootumount.
1109 * If an inode was inserted or removed, we gotta 1109 */
1110 * start over again. 1110void
1111 */ 1111xfs_qm_dqrele_all_inodes(
1112 if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { 1112 struct xfs_mount *mp,
1113 /* XXX use a sentinel */ 1113 uint flags)
1114 goto again; 1114{
1115 } 1115 int i;
1116 ip = ip->i_mnext;
1117 } while (ip != mp->m_inodes);
1118 1116
1119 XFS_MOUNT_IUNLOCK(mp); 1117 ASSERT(mp->m_quotainfo);
1118 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
1119 if (!mp->m_perag[i].pag_ici_init)
1120 continue;
1121 xfs_qm_dqrele_inodes_ag(mp, i, flags);
1122 }
1120} 1123}
1121 1124
1122/*------------------------------------------------------------------------*/ 1125/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index c27abef7b84f..ae5482965424 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -18,6 +18,13 @@
18#include <xfs.h> 18#include <xfs.h>
19#include "debug.h" 19#include "debug.h"
20 20
21/* xfs_mount.h drags a lot of crap in, sorry.. */
22#include "xfs_sb.h"
23#include "xfs_inum.h"
24#include "xfs_ag.h"
25#include "xfs_dmapi.h"
26#include "xfs_mount.h"
27
21static char message[1024]; /* keep it off the stack */ 28static char message[1024]; /* keep it off the stack */
22static DEFINE_SPINLOCK(xfs_err_lock); 29static DEFINE_SPINLOCK(xfs_err_lock);
23 30
@@ -55,22 +62,42 @@ cmn_err(register int level, char *fmt, ...)
55} 62}
56 63
57void 64void
58icmn_err(register int level, char *fmt, va_list ap) 65xfs_fs_vcmn_err(
66 int level,
67 struct xfs_mount *mp,
68 char *fmt,
69 va_list ap)
59{ 70{
60 ulong flags; 71 unsigned long flags;
61 int len; 72 int len = 0;
62 73
63 level &= XFS_ERR_MASK; 74 level &= XFS_ERR_MASK;
64 if(level > XFS_MAX_ERR_LEVEL) 75 if (level > XFS_MAX_ERR_LEVEL)
65 level = XFS_MAX_ERR_LEVEL; 76 level = XFS_MAX_ERR_LEVEL;
77
66 spin_lock_irqsave(&xfs_err_lock,flags); 78 spin_lock_irqsave(&xfs_err_lock,flags);
67 len = vsnprintf(message, sizeof(message), fmt, ap); 79
80 if (mp) {
81 len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname);
82
83 /*
84 * Skip the printk if we can't print anything useful
85 * due to an over-long device name.
86 */
87 if (len >= sizeof(message))
88 goto out;
89 }
90
91 len = vsnprintf(message + len, sizeof(message) - len, fmt, ap);
68 if (len >= sizeof(message)) 92 if (len >= sizeof(message))
69 len = sizeof(message) - 1; 93 len = sizeof(message) - 1;
70 if (message[len-1] == '\n') 94 if (message[len-1] == '\n')
71 message[len-1] = 0; 95 message[len-1] = 0;
96
72 printk("%s%s\n", err_level[level], message); 97 printk("%s%s\n", err_level[level], message);
98 out:
73 spin_unlock_irqrestore(&xfs_err_lock,flags); 99 spin_unlock_irqrestore(&xfs_err_lock,flags);
100
74 BUG_ON(level == CE_PANIC); 101 BUG_ON(level == CE_PANIC);
75} 102}
76 103
@@ -84,5 +111,5 @@ assfail(char *expr, char *file, int line)
84void 111void
85xfs_hex_dump(void *p, int length) 112xfs_hex_dump(void *p, int length)
86{ 113{
87 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); 114 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
88} 115}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 75845f950814..6f4fd37c67af 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -27,8 +27,6 @@
27#define CE_ALERT 1 /* alert */ 27#define CE_ALERT 1 /* alert */
28#define CE_PANIC 0 /* panic */ 28#define CE_PANIC 0 /* panic */
29 29
30extern void icmn_err(int, char *, va_list)
31 __attribute__ ((format (printf, 2, 0)));
32extern void cmn_err(int, char *, ...) 30extern void cmn_err(int, char *, ...)
33 __attribute__ ((format (printf, 2, 3))); 31 __attribute__ ((format (printf, 2, 3)));
34extern void assfail(char *expr, char *f, int l); 32extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index a34ef05489b1..2d494c26717f 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -113,21 +113,16 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
113void 113void
114ktrace_free(ktrace_t *ktp) 114ktrace_free(ktrace_t *ktp)
115{ 115{
116 int entries_size;
117
118 if (ktp == (ktrace_t *)NULL) 116 if (ktp == (ktrace_t *)NULL)
119 return; 117 return;
120 118
121 /* 119 /*
122 * Special treatment for the Vnode trace buffer. 120 * Special treatment for the Vnode trace buffer.
123 */ 121 */
124 if (ktp->kt_nentries == ktrace_zentries) { 122 if (ktp->kt_nentries == ktrace_zentries)
125 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); 123 kmem_zone_free(ktrace_ent_zone, ktp->kt_entries);
126 } else { 124 else
127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
128
129 kmem_free(ktp->kt_entries); 125 kmem_free(ktp->kt_entries);
130 }
131 126
132 kmem_zone_free(ktrace_hdr_zone, ktp); 127 kmem_zone_free(ktrace_hdr_zone, ktp);
133} 128}
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 540e4c989825..17254b529c54 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -30,7 +30,7 @@
30#define XFS_ATTR_TRACE 1 30#define XFS_ATTR_TRACE 1
31#define XFS_BLI_TRACE 1 31#define XFS_BLI_TRACE 1
32#define XFS_BMAP_TRACE 1 32#define XFS_BMAP_TRACE 1
33#define XFS_BMBT_TRACE 1 33#define XFS_BTREE_TRACE 1
34#define XFS_DIR2_TRACE 1 34#define XFS_DIR2_TRACE 1
35#define XFS_DQUOT_TRACE 1 35#define XFS_DQUOT_TRACE 1
36#define XFS_ILOCK_TRACE 1 36#define XFS_ILOCK_TRACE 1
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 91d69338d3b2..a8cdd73999a4 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -758,7 +758,7 @@ xfs_acl_setmode(
758 if (gap && nomask) 758 if (gap && nomask)
759 iattr.ia_mode |= gap->ae_perm << 3; 759 iattr.ia_mode |= gap->ae_perm << 3;
760 760
761 return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); 761 return xfs_setattr(XFS_I(vp), &iattr, 0);
762} 762}
763 763
764/* 764/*
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 61b292a9fb41..f2e21817a226 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -91,6 +91,8 @@ typedef struct xfs_agf {
91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) 91#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
92#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) 92#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp))
93 93
94extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
95 xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
94 96
95/* 97/*
96 * Size of the unlinked inode hash table in the agi. 98 * Size of the unlinked inode hash table in the agi.
@@ -142,6 +144,9 @@ typedef struct xfs_agi {
142#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) 144#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
143#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) 145#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp))
144 146
147extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
148 xfs_agnumber_t agno, struct xfs_buf **bpp);
149
145/* 150/*
146 * The third a.g. block contains the a.g. freelist, an array 151 * The third a.g. block contains the a.g. freelist, an array
147 * of block pointers to blocks owned by the allocation btree code. 152 * of block pointers to blocks owned by the allocation btree code.
@@ -192,17 +197,23 @@ typedef struct xfs_perag
192 xfs_agino_t pagi_freecount; /* number of free inodes */ 197 xfs_agino_t pagi_freecount; /* number of free inodes */
193 xfs_agino_t pagi_count; /* number of allocated inodes */ 198 xfs_agino_t pagi_count; /* number of allocated inodes */
194 int pagb_count; /* pagb slots in use */ 199 int pagb_count; /* pagb slots in use */
200 xfs_perag_busy_t *pagb_list; /* unstable blocks */
195#ifdef __KERNEL__ 201#ifdef __KERNEL__
196 spinlock_t pagb_lock; /* lock for pagb_list */ 202 spinlock_t pagb_lock; /* lock for pagb_list */
197#endif 203
198 xfs_perag_busy_t *pagb_list; /* unstable blocks */
199 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 204 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
200 205
201 int pag_ici_init; /* incore inode cache initialised */ 206 int pag_ici_init; /* incore inode cache initialised */
202 rwlock_t pag_ici_lock; /* incore inode lock */ 207 rwlock_t pag_ici_lock; /* incore inode lock */
203 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 208 struct radix_tree_root pag_ici_root; /* incore inode cache root */
209#endif
204} xfs_perag_t; 210} xfs_perag_t;
205 211
212/*
213 * tags for inode radix tree
214 */
215#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
216
206#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) 217#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
207#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ 218#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
208 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) 219 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 1956f83489f1..028e44e58ea9 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -90,6 +90,92 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
90 */ 90 */
91 91
92/* 92/*
93 * Lookup the record equal to [bno, len] in the btree given by cur.
94 */
95STATIC int /* error */
96xfs_alloc_lookup_eq(
97 struct xfs_btree_cur *cur, /* btree cursor */
98 xfs_agblock_t bno, /* starting block of extent */
99 xfs_extlen_t len, /* length of extent */
100 int *stat) /* success/failure */
101{
102 cur->bc_rec.a.ar_startblock = bno;
103 cur->bc_rec.a.ar_blockcount = len;
104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
105}
106
107/*
108 * Lookup the first record greater than or equal to [bno, len]
109 * in the btree given by cur.
110 */
111STATIC int /* error */
112xfs_alloc_lookup_ge(
113 struct xfs_btree_cur *cur, /* btree cursor */
114 xfs_agblock_t bno, /* starting block of extent */
115 xfs_extlen_t len, /* length of extent */
116 int *stat) /* success/failure */
117{
118 cur->bc_rec.a.ar_startblock = bno;
119 cur->bc_rec.a.ar_blockcount = len;
120 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
121}
122
123/*
124 * Lookup the first record less than or equal to [bno, len]
125 * in the btree given by cur.
126 */
127STATIC int /* error */
128xfs_alloc_lookup_le(
129 struct xfs_btree_cur *cur, /* btree cursor */
130 xfs_agblock_t bno, /* starting block of extent */
131 xfs_extlen_t len, /* length of extent */
132 int *stat) /* success/failure */
133{
134 cur->bc_rec.a.ar_startblock = bno;
135 cur->bc_rec.a.ar_blockcount = len;
136 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
137}
138
139/*
140 * Update the record referred to by cur to the value given
141 * by [bno, len].
142 * This either works (return 0) or gets an EFSCORRUPTED error.
143 */
144STATIC int /* error */
145xfs_alloc_update(
146 struct xfs_btree_cur *cur, /* btree cursor */
147 xfs_agblock_t bno, /* starting block of extent */
148 xfs_extlen_t len) /* length of extent */
149{
150 union xfs_btree_rec rec;
151
152 rec.alloc.ar_startblock = cpu_to_be32(bno);
153 rec.alloc.ar_blockcount = cpu_to_be32(len);
154 return xfs_btree_update(cur, &rec);
155}
156
157/*
158 * Get the data from the pointed-to record.
159 */
160STATIC int /* error */
161xfs_alloc_get_rec(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agblock_t *bno, /* output: starting block of extent */
164 xfs_extlen_t *len, /* output: length of extent */
165 int *stat) /* output: success/failure */
166{
167 union xfs_btree_rec *rec;
168 int error;
169
170 error = xfs_btree_get_rec(cur, &rec, stat);
171 if (!error && *stat == 1) {
172 *bno = be32_to_cpu(rec->alloc.ar_startblock);
173 *len = be32_to_cpu(rec->alloc.ar_blockcount);
174 }
175 return error;
176}
177
178/*
93 * Compute aligned version of the found extent. 179 * Compute aligned version of the found extent.
94 * Takes alignment and min length into account. 180 * Takes alignment and min length into account.
95 */ 181 */
@@ -294,21 +380,20 @@ xfs_alloc_fixup_trees(
294 return error; 380 return error;
295 XFS_WANT_CORRUPTED_RETURN(i == 1); 381 XFS_WANT_CORRUPTED_RETURN(i == 1);
296 } 382 }
383
297#ifdef DEBUG 384#ifdef DEBUG
298 { 385 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
299 xfs_alloc_block_t *bnoblock; 386 struct xfs_btree_block *bnoblock;
300 xfs_alloc_block_t *cntblock; 387 struct xfs_btree_block *cntblock;
301 388
302 if (bno_cur->bc_nlevels == 1 && 389 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
303 cnt_cur->bc_nlevels == 1) { 390 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
304 bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); 391
305 cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); 392 XFS_WANT_CORRUPTED_RETURN(
306 XFS_WANT_CORRUPTED_RETURN( 393 bnoblock->bb_numrecs == cntblock->bb_numrecs);
307 be16_to_cpu(bnoblock->bb_numrecs) ==
308 be16_to_cpu(cntblock->bb_numrecs));
309 }
310 } 394 }
311#endif 395#endif
396
312 /* 397 /*
313 * Deal with all four cases: the allocated record is contained 398 * Deal with all four cases: the allocated record is contained
314 * within the freespace record, so we can have new freespace 399 * within the freespace record, so we can have new freespace
@@ -333,7 +418,7 @@ xfs_alloc_fixup_trees(
333 /* 418 /*
334 * Delete the entry from the by-size btree. 419 * Delete the entry from the by-size btree.
335 */ 420 */
336 if ((error = xfs_alloc_delete(cnt_cur, &i))) 421 if ((error = xfs_btree_delete(cnt_cur, &i)))
337 return error; 422 return error;
338 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 XFS_WANT_CORRUPTED_RETURN(i == 1);
339 /* 424 /*
@@ -343,7 +428,7 @@ xfs_alloc_fixup_trees(
343 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 428 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
344 return error; 429 return error;
345 XFS_WANT_CORRUPTED_RETURN(i == 0); 430 XFS_WANT_CORRUPTED_RETURN(i == 0);
346 if ((error = xfs_alloc_insert(cnt_cur, &i))) 431 if ((error = xfs_btree_insert(cnt_cur, &i)))
347 return error; 432 return error;
348 XFS_WANT_CORRUPTED_RETURN(i == 1); 433 XFS_WANT_CORRUPTED_RETURN(i == 1);
349 } 434 }
@@ -351,7 +436,7 @@ xfs_alloc_fixup_trees(
351 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 436 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
352 return error; 437 return error;
353 XFS_WANT_CORRUPTED_RETURN(i == 0); 438 XFS_WANT_CORRUPTED_RETURN(i == 0);
354 if ((error = xfs_alloc_insert(cnt_cur, &i))) 439 if ((error = xfs_btree_insert(cnt_cur, &i)))
355 return error; 440 return error;
356 XFS_WANT_CORRUPTED_RETURN(i == 1); 441 XFS_WANT_CORRUPTED_RETURN(i == 1);
357 } 442 }
@@ -362,7 +447,7 @@ xfs_alloc_fixup_trees(
362 /* 447 /*
363 * No remaining freespace, just delete the by-block tree entry. 448 * No remaining freespace, just delete the by-block tree entry.
364 */ 449 */
365 if ((error = xfs_alloc_delete(bno_cur, &i))) 450 if ((error = xfs_btree_delete(bno_cur, &i)))
366 return error; 451 return error;
367 XFS_WANT_CORRUPTED_RETURN(i == 1); 452 XFS_WANT_CORRUPTED_RETURN(i == 1);
368 } else { 453 } else {
@@ -379,7 +464,7 @@ xfs_alloc_fixup_trees(
379 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 464 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
380 return error; 465 return error;
381 XFS_WANT_CORRUPTED_RETURN(i == 0); 466 XFS_WANT_CORRUPTED_RETURN(i == 0);
382 if ((error = xfs_alloc_insert(bno_cur, &i))) 467 if ((error = xfs_btree_insert(bno_cur, &i)))
383 return error; 468 return error;
384 XFS_WANT_CORRUPTED_RETURN(i == 1); 469 XFS_WANT_CORRUPTED_RETURN(i == 1);
385 } 470 }
@@ -640,8 +725,8 @@ xfs_alloc_ag_vextent_exact(
640 /* 725 /*
641 * Allocate/initialize a cursor for the by-number freespace btree. 726 * Allocate/initialize a cursor for the by-number freespace btree.
642 */ 727 */
643 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 728 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
644 args->agno, XFS_BTNUM_BNO, NULL, 0); 729 args->agno, XFS_BTNUM_BNO);
645 /* 730 /*
646 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 731 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
647 * Look for the closest free block <= bno, it must contain bno 732 * Look for the closest free block <= bno, it must contain bno
@@ -696,8 +781,8 @@ xfs_alloc_ag_vextent_exact(
696 * We are allocating agbno for rlen [agbno .. end] 781 * We are allocating agbno for rlen [agbno .. end]
697 * Allocate/initialize a cursor for the by-size btree. 782 * Allocate/initialize a cursor for the by-size btree.
698 */ 783 */
699 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 784 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
700 args->agno, XFS_BTNUM_CNT, NULL, 0); 785 args->agno, XFS_BTNUM_CNT);
701 ASSERT(args->agbno + args->len <= 786 ASSERT(args->agbno + args->len <=
702 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 787 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
703 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 788 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
@@ -759,8 +844,8 @@ xfs_alloc_ag_vextent_near(
759 /* 844 /*
760 * Get a cursor for the by-size btree. 845 * Get a cursor for the by-size btree.
761 */ 846 */
762 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 847 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
763 args->agno, XFS_BTNUM_CNT, NULL, 0); 848 args->agno, XFS_BTNUM_CNT);
764 ltlen = 0; 849 ltlen = 0;
765 bno_cur_lt = bno_cur_gt = NULL; 850 bno_cur_lt = bno_cur_gt = NULL;
766 /* 851 /*
@@ -818,7 +903,7 @@ xfs_alloc_ag_vextent_near(
818 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 903 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
819 if (ltlen >= args->minlen) 904 if (ltlen >= args->minlen)
820 break; 905 break;
821 if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) 906 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
822 goto error0; 907 goto error0;
823 } while (i); 908 } while (i);
824 ASSERT(ltlen >= args->minlen); 909 ASSERT(ltlen >= args->minlen);
@@ -828,7 +913,7 @@ xfs_alloc_ag_vextent_near(
828 i = cnt_cur->bc_ptrs[0]; 913 i = cnt_cur->bc_ptrs[0];
829 for (j = 1, blen = 0, bdiff = 0; 914 for (j = 1, blen = 0, bdiff = 0;
830 !error && j && (blen < args->maxlen || bdiff > 0); 915 !error && j && (blen < args->maxlen || bdiff > 0);
831 error = xfs_alloc_increment(cnt_cur, 0, &j)) { 916 error = xfs_btree_increment(cnt_cur, 0, &j)) {
832 /* 917 /*
833 * For each entry, decide if it's better than 918 * For each entry, decide if it's better than
834 * the previous best entry. 919 * the previous best entry.
@@ -886,8 +971,8 @@ xfs_alloc_ag_vextent_near(
886 /* 971 /*
887 * Set up a cursor for the by-bno tree. 972 * Set up a cursor for the by-bno tree.
888 */ 973 */
889 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, 974 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
890 args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); 975 args->agbp, args->agno, XFS_BTNUM_BNO);
891 /* 976 /*
892 * Fix up the btree entries. 977 * Fix up the btree entries.
893 */ 978 */
@@ -914,8 +999,8 @@ xfs_alloc_ag_vextent_near(
914 /* 999 /*
915 * Allocate and initialize the cursor for the leftward search. 1000 * Allocate and initialize the cursor for the leftward search.
916 */ 1001 */
917 bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1002 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
918 args->agno, XFS_BTNUM_BNO, NULL, 0); 1003 args->agno, XFS_BTNUM_BNO);
919 /* 1004 /*
920 * Lookup <= bno to find the leftward search's starting point. 1005 * Lookup <= bno to find the leftward search's starting point.
921 */ 1006 */
@@ -938,7 +1023,7 @@ xfs_alloc_ag_vextent_near(
938 * Increment the cursor, so we will point at the entry just right 1023 * Increment the cursor, so we will point at the entry just right
939 * of the leftward entry if any, or to the leftmost entry. 1024 * of the leftward entry if any, or to the leftmost entry.
940 */ 1025 */
941 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1026 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
942 goto error0; 1027 goto error0;
943 if (!i) { 1028 if (!i) {
944 /* 1029 /*
@@ -961,7 +1046,7 @@ xfs_alloc_ag_vextent_near(
961 args->minlen, &ltbnoa, &ltlena); 1046 args->minlen, &ltbnoa, &ltlena);
962 if (ltlena >= args->minlen) 1047 if (ltlena >= args->minlen)
963 break; 1048 break;
964 if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) 1049 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
965 goto error0; 1050 goto error0;
966 if (!i) { 1051 if (!i) {
967 xfs_btree_del_cursor(bno_cur_lt, 1052 xfs_btree_del_cursor(bno_cur_lt,
@@ -977,7 +1062,7 @@ xfs_alloc_ag_vextent_near(
977 args->minlen, &gtbnoa, &gtlena); 1062 args->minlen, &gtbnoa, &gtlena);
978 if (gtlena >= args->minlen) 1063 if (gtlena >= args->minlen)
979 break; 1064 break;
980 if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) 1065 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
981 goto error0; 1066 goto error0;
982 if (!i) { 1067 if (!i) {
983 xfs_btree_del_cursor(bno_cur_gt, 1068 xfs_btree_del_cursor(bno_cur_gt,
@@ -1066,7 +1151,7 @@ xfs_alloc_ag_vextent_near(
1066 /* 1151 /*
1067 * Fell off the right end. 1152 * Fell off the right end.
1068 */ 1153 */
1069 if ((error = xfs_alloc_increment( 1154 if ((error = xfs_btree_increment(
1070 bno_cur_gt, 0, &i))) 1155 bno_cur_gt, 0, &i)))
1071 goto error0; 1156 goto error0;
1072 if (!i) { 1157 if (!i) {
@@ -1162,7 +1247,7 @@ xfs_alloc_ag_vextent_near(
1162 /* 1247 /*
1163 * Fell off the left end. 1248 * Fell off the left end.
1164 */ 1249 */
1165 if ((error = xfs_alloc_decrement( 1250 if ((error = xfs_btree_decrement(
1166 bno_cur_lt, 0, &i))) 1251 bno_cur_lt, 0, &i)))
1167 goto error0; 1252 goto error0;
1168 if (!i) { 1253 if (!i) {
@@ -1267,8 +1352,8 @@ xfs_alloc_ag_vextent_size(
1267 /* 1352 /*
1268 * Allocate and initialize a cursor for the by-size btree. 1353 * Allocate and initialize a cursor for the by-size btree.
1269 */ 1354 */
1270 cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1355 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1271 args->agno, XFS_BTNUM_CNT, NULL, 0); 1356 args->agno, XFS_BTNUM_CNT);
1272 bno_cur = NULL; 1357 bno_cur = NULL;
1273 /* 1358 /*
1274 * Look for an entry >= maxlen+alignment-1 blocks. 1359 * Look for an entry >= maxlen+alignment-1 blocks.
@@ -1321,7 +1406,7 @@ xfs_alloc_ag_vextent_size(
1321 bestflen = flen; 1406 bestflen = flen;
1322 bestfbno = fbno; 1407 bestfbno = fbno;
1323 for (;;) { 1408 for (;;) {
1324 if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) 1409 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1325 goto error0; 1410 goto error0;
1326 if (i == 0) 1411 if (i == 0)
1327 break; 1412 break;
@@ -1372,8 +1457,8 @@ xfs_alloc_ag_vextent_size(
1372 /* 1457 /*
1373 * Allocate and initialize a cursor for the by-block tree. 1458 * Allocate and initialize a cursor for the by-block tree.
1374 */ 1459 */
1375 bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, 1460 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1376 args->agno, XFS_BTNUM_BNO, NULL, 0); 1461 args->agno, XFS_BTNUM_BNO);
1377 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1462 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1378 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1463 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1379 goto error0; 1464 goto error0;
@@ -1416,7 +1501,7 @@ xfs_alloc_ag_vextent_small(
1416 xfs_extlen_t flen; 1501 xfs_extlen_t flen;
1417 int i; 1502 int i;
1418 1503
1419 if ((error = xfs_alloc_decrement(ccur, 0, &i))) 1504 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1420 goto error0; 1505 goto error0;
1421 if (i) { 1506 if (i) {
1422 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1507 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
@@ -1515,8 +1600,7 @@ xfs_free_ag_extent(
1515 /* 1600 /*
1516 * Allocate and initialize a cursor for the by-block btree. 1601 * Allocate and initialize a cursor for the by-block btree.
1517 */ 1602 */
1518 bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, 1603 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1519 0);
1520 cnt_cur = NULL; 1604 cnt_cur = NULL;
1521 /* 1605 /*
1522 * Look for a neighboring block on the left (lower block numbers) 1606 * Look for a neighboring block on the left (lower block numbers)
@@ -1549,7 +1633,7 @@ xfs_free_ag_extent(
1549 * Look for a neighboring block on the right (higher block numbers) 1633 * Look for a neighboring block on the right (higher block numbers)
1550 * that is contiguous with this space. 1634 * that is contiguous with this space.
1551 */ 1635 */
1552 if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) 1636 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1553 goto error0; 1637 goto error0;
1554 if (haveright) { 1638 if (haveright) {
1555 /* 1639 /*
@@ -1575,8 +1659,7 @@ xfs_free_ag_extent(
1575 /* 1659 /*
1576 * Now allocate and initialize a cursor for the by-size tree. 1660 * Now allocate and initialize a cursor for the by-size tree.
1577 */ 1661 */
1578 cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, 1662 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1579 0);
1580 /* 1663 /*
1581 * Have both left and right contiguous neighbors. 1664 * Have both left and right contiguous neighbors.
1582 * Merge all three into a single free block. 1665 * Merge all three into a single free block.
@@ -1588,7 +1671,7 @@ xfs_free_ag_extent(
1588 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1671 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1589 goto error0; 1672 goto error0;
1590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1673 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1591 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1674 if ((error = xfs_btree_delete(cnt_cur, &i)))
1592 goto error0; 1675 goto error0;
1593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1594 /* 1677 /*
@@ -1597,19 +1680,19 @@ xfs_free_ag_extent(
1597 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1680 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1598 goto error0; 1681 goto error0;
1599 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1682 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1600 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1683 if ((error = xfs_btree_delete(cnt_cur, &i)))
1601 goto error0; 1684 goto error0;
1602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1603 /* 1686 /*
1604 * Delete the old by-block entry for the right block. 1687 * Delete the old by-block entry for the right block.
1605 */ 1688 */
1606 if ((error = xfs_alloc_delete(bno_cur, &i))) 1689 if ((error = xfs_btree_delete(bno_cur, &i)))
1607 goto error0; 1690 goto error0;
1608 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1691 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1609 /* 1692 /*
1610 * Move the by-block cursor back to the left neighbor. 1693 * Move the by-block cursor back to the left neighbor.
1611 */ 1694 */
1612 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1695 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1613 goto error0; 1696 goto error0;
1614 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1697 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1615#ifdef DEBUG 1698#ifdef DEBUG
@@ -1648,14 +1731,14 @@ xfs_free_ag_extent(
1648 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1731 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1649 goto error0; 1732 goto error0;
1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1733 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1651 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1734 if ((error = xfs_btree_delete(cnt_cur, &i)))
1652 goto error0; 1735 goto error0;
1653 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1654 /* 1737 /*
1655 * Back up the by-block cursor to the left neighbor, and 1738 * Back up the by-block cursor to the left neighbor, and
1656 * update its length. 1739 * update its length.
1657 */ 1740 */
1658 if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) 1741 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1659 goto error0; 1742 goto error0;
1660 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1743 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1661 nbno = ltbno; 1744 nbno = ltbno;
@@ -1674,7 +1757,7 @@ xfs_free_ag_extent(
1674 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1757 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1675 goto error0; 1758 goto error0;
1676 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1759 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1677 if ((error = xfs_alloc_delete(cnt_cur, &i))) 1760 if ((error = xfs_btree_delete(cnt_cur, &i)))
1678 goto error0; 1761 goto error0;
1679 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1762 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1680 /* 1763 /*
@@ -1693,7 +1776,7 @@ xfs_free_ag_extent(
1693 else { 1776 else {
1694 nbno = bno; 1777 nbno = bno;
1695 nlen = len; 1778 nlen = len;
1696 if ((error = xfs_alloc_insert(bno_cur, &i))) 1779 if ((error = xfs_btree_insert(bno_cur, &i)))
1697 goto error0; 1780 goto error0;
1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1781 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1699 } 1782 }
@@ -1705,7 +1788,7 @@ xfs_free_ag_extent(
1705 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1788 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1706 goto error0; 1789 goto error0;
1707 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1790 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1708 if ((error = xfs_alloc_insert(cnt_cur, &i))) 1791 if ((error = xfs_btree_insert(cnt_cur, &i)))
1709 goto error0; 1792 goto error0;
1710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1793 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1711 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1794 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -2150,51 +2233,83 @@ xfs_alloc_put_freelist(
2150 * Read in the allocation group header (free/alloc section). 2233 * Read in the allocation group header (free/alloc section).
2151 */ 2234 */
2152int /* error */ 2235int /* error */
2153xfs_alloc_read_agf( 2236xfs_read_agf(
2154 xfs_mount_t *mp, /* mount point structure */ 2237 struct xfs_mount *mp, /* mount point structure */
2155 xfs_trans_t *tp, /* transaction pointer */ 2238 struct xfs_trans *tp, /* transaction pointer */
2156 xfs_agnumber_t agno, /* allocation group number */ 2239 xfs_agnumber_t agno, /* allocation group number */
2157 int flags, /* XFS_ALLOC_FLAG_... */ 2240 int flags, /* XFS_BUF_ */
2158 xfs_buf_t **bpp) /* buffer for the ag freelist header */ 2241 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2159{ 2242{
2160 xfs_agf_t *agf; /* ag freelist header */ 2243 struct xfs_agf *agf; /* ag freelist header */
2161 int agf_ok; /* set if agf is consistent */ 2244 int agf_ok; /* set if agf is consistent */
2162 xfs_buf_t *bp; /* return value */
2163 xfs_perag_t *pag; /* per allocation group data */
2164 int error; 2245 int error;
2165 2246
2166 ASSERT(agno != NULLAGNUMBER); 2247 ASSERT(agno != NULLAGNUMBER);
2167 error = xfs_trans_read_buf( 2248 error = xfs_trans_read_buf(
2168 mp, tp, mp->m_ddev_targp, 2249 mp, tp, mp->m_ddev_targp,
2169 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2250 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2170 XFS_FSS_TO_BB(mp, 1), 2251 XFS_FSS_TO_BB(mp, 1), flags, bpp);
2171 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
2172 &bp);
2173 if (error) 2252 if (error)
2174 return error; 2253 return error;
2175 ASSERT(!bp || !XFS_BUF_GETERROR(bp)); 2254 if (!*bpp)
2176 if (!bp) {
2177 *bpp = NULL;
2178 return 0; 2255 return 0;
2179 } 2256
2257 ASSERT(!XFS_BUF_GETERROR(*bpp));
2258 agf = XFS_BUF_TO_AGF(*bpp);
2259
2180 /* 2260 /*
2181 * Validate the magic number of the agf block. 2261 * Validate the magic number of the agf block.
2182 */ 2262 */
2183 agf = XFS_BUF_TO_AGF(bp);
2184 agf_ok = 2263 agf_ok =
2185 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && 2264 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2186 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2265 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2187 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2266 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2188 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2267 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2189 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2268 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2190 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); 2269 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
2270 be32_to_cpu(agf->agf_seqno) == agno;
2271 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2272 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2273 be32_to_cpu(agf->agf_length);
2191 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2274 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2192 XFS_RANDOM_ALLOC_READ_AGF))) { 2275 XFS_RANDOM_ALLOC_READ_AGF))) {
2193 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", 2276 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2194 XFS_ERRLEVEL_LOW, mp, agf); 2277 XFS_ERRLEVEL_LOW, mp, agf);
2195 xfs_trans_brelse(tp, bp); 2278 xfs_trans_brelse(tp, *bpp);
2196 return XFS_ERROR(EFSCORRUPTED); 2279 return XFS_ERROR(EFSCORRUPTED);
2197 } 2280 }
2281
2282 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2283 return 0;
2284}
2285
2286/*
2287 * Read in the allocation group header (free/alloc section).
2288 */
2289int /* error */
2290xfs_alloc_read_agf(
2291 struct xfs_mount *mp, /* mount point structure */
2292 struct xfs_trans *tp, /* transaction pointer */
2293 xfs_agnumber_t agno, /* allocation group number */
2294 int flags, /* XFS_ALLOC_FLAG_... */
2295 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2296{
2297 struct xfs_agf *agf; /* ag freelist header */
2298 struct xfs_perag *pag; /* per allocation group data */
2299 int error;
2300
2301 ASSERT(agno != NULLAGNUMBER);
2302
2303 error = xfs_read_agf(mp, tp, agno,
2304 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0,
2305 bpp);
2306 if (error)
2307 return error;
2308 if (!*bpp)
2309 return 0;
2310 ASSERT(!XFS_BUF_GETERROR(*bpp));
2311
2312 agf = XFS_BUF_TO_AGF(*bpp);
2198 pag = &mp->m_perag[agno]; 2313 pag = &mp->m_perag[agno];
2199 if (!pag->pagf_init) { 2314 if (!pag->pagf_init) {
2200 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2315 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
@@ -2213,6 +2328,7 @@ xfs_alloc_read_agf(
2213#ifdef DEBUG 2328#ifdef DEBUG
2214 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2329 else if (!XFS_FORCED_SHUTDOWN(mp)) {
2215 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2330 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2331 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2216 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2332 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2217 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2333 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2218 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2334 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
@@ -2221,8 +2337,6 @@ xfs_alloc_read_agf(
2221 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2337 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2222 } 2338 }
2223#endif 2339#endif
2224 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2225 *bpp = bp;
2226 return 0; 2340 return 0;
2227} 2341}
2228 2342
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 5aec15d0651e..588172796f7b 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf;
121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6 121#define XFS_ALLOC_KTRACE_BUSYSEARCH 6
122#endif 122#endif
123 123
124void
125xfs_alloc_mark_busy(xfs_trans_t *tp,
126 xfs_agnumber_t agno,
127 xfs_agblock_t bno,
128 xfs_extlen_t len);
129
130void
131xfs_alloc_clear_busy(xfs_trans_t *tp,
132 xfs_agnumber_t ag,
133 int idx);
134
135#endif /* __KERNEL__ */
136
124/* 137/*
125 * Compute and fill in value of m_ag_maxlevels. 138 * Compute and fill in value of m_ag_maxlevels.
126 */ 139 */
@@ -196,18 +209,4 @@ xfs_free_extent(
196 xfs_fsblock_t bno, /* starting block number of extent */ 209 xfs_fsblock_t bno, /* starting block number of extent */
197 xfs_extlen_t len); /* length of extent */ 210 xfs_extlen_t len); /* length of extent */
198 211
199void
200xfs_alloc_mark_busy(xfs_trans_t *tp,
201 xfs_agnumber_t agno,
202 xfs_agblock_t bno,
203 xfs_extlen_t len);
204
205void
206xfs_alloc_clear_busy(xfs_trans_t *tp,
207 xfs_agnumber_t ag,
208 int idx);
209
210
211#endif /* __KERNEL__ */
212
213#endif /* __XFS_ALLOC_H__ */ 212#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3ce2645508ae..733cb75a8c5d 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -35,2177 +35,464 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42/*
43 * Prototypes for internal functions.
44 */
45 43
46STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); 44STATIC struct xfs_btree_cur *
47STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); 45xfs_allocbt_dup_cursor(
48STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 46 struct xfs_btree_cur *cur)
49STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); 47{
50STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); 48 return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
51STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); 49 cur->bc_private.a.agbp, cur->bc_private.a.agno,
52STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); 50 cur->bc_btnum);
53STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, 51}
54 xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
55STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
56 52
57/* 53STATIC void
58 * Internal functions. 54xfs_allocbt_set_root(
59 */ 55 struct xfs_btree_cur *cur,
56 union xfs_btree_ptr *ptr,
57 int inc)
58{
59 struct xfs_buf *agbp = cur->bc_private.a.agbp;
60 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
61 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
62 int btnum = cur->bc_btnum;
60 63
61/* 64 ASSERT(ptr->s != 0);
62 * Single level of the xfs_alloc_delete record deletion routine. 65
63 * Delete record pointed to by cur/level. 66 agf->agf_roots[btnum] = ptr->s;
64 * Remove the record from its block then rebalance the tree. 67 be32_add_cpu(&agf->agf_levels[btnum], inc);
65 * Return 0 for error, 1 for done, 2 to go on to the next level. 68 cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc;
66 */ 69
67STATIC int /* error */ 70 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
68xfs_alloc_delrec( 71}
69 xfs_btree_cur_t *cur, /* btree cursor */ 72
70 int level, /* level removing record from */ 73STATIC int
71 int *stat) /* fail/done/go-on */ 74xfs_allocbt_alloc_block(
75 struct xfs_btree_cur *cur,
76 union xfs_btree_ptr *start,
77 union xfs_btree_ptr *new,
78 int length,
79 int *stat)
72{ 80{
73 xfs_agf_t *agf; /* allocation group freelist header */ 81 int error;
74 xfs_alloc_block_t *block; /* btree block record/key lives in */ 82 xfs_agblock_t bno;
75 xfs_agblock_t bno; /* btree block number */
76 xfs_buf_t *bp; /* buffer for block */
77 int error; /* error return value */
78 int i; /* loop index */
79 xfs_alloc_key_t key; /* kp points here if block is level 0 */
80 xfs_agblock_t lbno; /* left block's block number */
81 xfs_buf_t *lbp; /* left block's buffer pointer */
82 xfs_alloc_block_t *left; /* left btree block */
83 xfs_alloc_key_t *lkp=NULL; /* left block key pointer */
84 xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */
85 int lrecs=0; /* number of records in left block */
86 xfs_alloc_rec_t *lrp; /* left block record pointer */
87 xfs_mount_t *mp; /* mount structure */
88 int ptr; /* index in btree block for this rec */
89 xfs_agblock_t rbno; /* right block's block number */
90 xfs_buf_t *rbp; /* right block's buffer pointer */
91 xfs_alloc_block_t *right; /* right btree block */
92 xfs_alloc_key_t *rkp; /* right block key pointer */
93 xfs_alloc_ptr_t *rpp; /* right block address pointer */
94 int rrecs=0; /* number of records in right block */
95 int numrecs;
96 xfs_alloc_rec_t *rrp; /* right block record pointer */
97 xfs_btree_cur_t *tcur; /* temporary btree cursor */
98 83
99 /* 84 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
100 * Get the index of the entry being deleted, check for nothing there. 85
101 */ 86 /* Allocate the new block from the freelist. If we can't, give up. */
102 ptr = cur->bc_ptrs[level]; 87 error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
103 if (ptr == 0) { 88 &bno, 1);
104 *stat = 0; 89 if (error) {
105 return 0; 90 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
106 }
107 /*
108 * Get the buffer & block containing the record or key/ptr.
109 */
110 bp = cur->bc_bufs[level];
111 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
112#ifdef DEBUG
113 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
114 return error; 91 return error;
115#endif 92 }
116 /* 93
117 * Fail if we're off the end of the block. 94 if (bno == NULLAGBLOCK) {
118 */ 95 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
119 numrecs = be16_to_cpu(block->bb_numrecs);
120 if (ptr > numrecs) {
121 *stat = 0; 96 *stat = 0;
122 return 0; 97 return 0;
123 } 98 }
124 XFS_STATS_INC(xs_abt_delrec);
125 /*
126 * It's a nonleaf. Excise the key and ptr being deleted, by
127 * sliding the entries past them down one.
128 * Log the changed areas of the block.
129 */
130 if (level > 0) {
131 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
132 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
133#ifdef DEBUG
134 for (i = ptr; i < numrecs; i++) {
135 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
136 return error;
137 }
138#endif
139 if (ptr < numrecs) {
140 memmove(&lkp[ptr - 1], &lkp[ptr],
141 (numrecs - ptr) * sizeof(*lkp));
142 memmove(&lpp[ptr - 1], &lpp[ptr],
143 (numrecs - ptr) * sizeof(*lpp));
144 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1);
145 xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1);
146 }
147 }
148 /*
149 * It's a leaf. Excise the record being deleted, by sliding the
150 * entries past it down one. Log the changed areas of the block.
151 */
152 else {
153 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
154 if (ptr < numrecs) {
155 memmove(&lrp[ptr - 1], &lrp[ptr],
156 (numrecs - ptr) * sizeof(*lrp));
157 xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1);
158 }
159 /*
160 * If it's the first record in the block, we'll need a key
161 * structure to pass up to the next level (updkey).
162 */
163 if (ptr == 1) {
164 key.ar_startblock = lrp->ar_startblock;
165 key.ar_blockcount = lrp->ar_blockcount;
166 lkp = &key;
167 }
168 }
169 /*
170 * Decrement and log the number of entries in the block.
171 */
172 numrecs--;
173 block->bb_numrecs = cpu_to_be16(numrecs);
174 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175 /*
176 * See if the longest free extent in the allocation group was
177 * changed by this operation. True if it's the by-size btree, and
178 * this is the leaf level, and there is no right sibling block,
179 * and this was the last record.
180 */
181 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
182 mp = cur->bc_mp;
183 99
184 if (level == 0 && 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
185 cur->bc_btnum == XFS_BTNUM_CNT && 101 new->s = cpu_to_be32(bno);
186 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
187 ptr > numrecs) {
188 ASSERT(ptr == numrecs + 1);
189 /*
190 * There are still records in the block. Grab the size
191 * from the last one.
192 */
193 if (numrecs) {
194 rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur);
195 agf->agf_longest = rrp->ar_blockcount;
196 }
197 /*
198 * No free extents left.
199 */
200 else
201 agf->agf_longest = 0;
202 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
203 be32_to_cpu(agf->agf_longest);
204 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
205 XFS_AGF_LONGEST);
206 }
207 /*
208 * Is this the root level? If so, we're almost done.
209 */
210 if (level == cur->bc_nlevels - 1) {
211 /*
212 * If this is the root level,
213 * and there's only one entry left,
214 * and it's NOT the leaf level,
215 * then we can get rid of this level.
216 */
217 if (numrecs == 1 && level > 0) {
218 /*
219 * lpp is still set to the first pointer in the block.
220 * Make it the new root of the btree.
221 */
222 bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
223 agf->agf_roots[cur->bc_btnum] = *lpp;
224 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1);
225 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
226 /*
227 * Put this buffer/block on the ag's freelist.
228 */
229 error = xfs_alloc_put_freelist(cur->bc_tp,
230 cur->bc_private.a.agbp, NULL, bno, 1);
231 if (error)
232 return error;
233 /*
234 * Since blocks move to the free list without the
235 * coordination used in xfs_bmap_finish, we can't allow
236 * block to be available for reallocation and
237 * non-transaction writing (user data) until we know
238 * that the transaction that moved it to the free list
239 * is permanently on disk. We track the blocks by
240 * declaring these blocks as "busy"; the busy list is
241 * maintained on a per-ag basis and each transaction
242 * records which entries should be removed when the
243 * iclog commits to disk. If a busy block is
244 * allocated, the iclog is pushed up to the LSN
245 * that freed the block.
246 */
247 xfs_alloc_mark_busy(cur->bc_tp,
248 be32_to_cpu(agf->agf_seqno), bno, 1);
249 102
250 xfs_trans_agbtree_delta(cur->bc_tp, -1); 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
251 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, 104 *stat = 1;
252 XFS_AGF_ROOTS | XFS_AGF_LEVELS); 105 return 0;
253 /* 106}
254 * Update the cursor so there's one fewer level.
255 */
256 xfs_btree_setbuf(cur, level, NULL);
257 cur->bc_nlevels--;
258 } else if (level > 0 &&
259 (error = xfs_alloc_decrement(cur, level, &i)))
260 return error;
261 *stat = 1;
262 return 0;
263 }
264 /*
265 * If we deleted the leftmost entry in the block, update the
266 * key values above us in the tree.
267 */
268 if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
269 return error;
270 /*
271 * If the number of records remaining in the block is at least
272 * the minimum, we're done.
273 */
274 if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
275 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
276 return error;
277 *stat = 1;
278 return 0;
279 }
280 /*
281 * Otherwise, we have to move some records around to keep the
282 * tree balanced. Look at the left and right sibling blocks to
283 * see if we can re-balance by moving only one record.
284 */
285 rbno = be32_to_cpu(block->bb_rightsib);
286 lbno = be32_to_cpu(block->bb_leftsib);
287 bno = NULLAGBLOCK;
288 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
289 /*
290 * Duplicate the cursor so our btree manipulations here won't
291 * disrupt the next level up.
292 */
293 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
294 return error;
295 /*
296 * If there's a right sibling, see if it's ok to shift an entry
297 * out of it.
298 */
299 if (rbno != NULLAGBLOCK) {
300 /*
301 * Move the temp cursor to the last entry in the next block.
302 * Actually any entry but the first would suffice.
303 */
304 i = xfs_btree_lastrec(tcur, level);
305 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
306 if ((error = xfs_alloc_increment(tcur, level, &i)))
307 goto error0;
308 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
309 i = xfs_btree_lastrec(tcur, level);
310 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
311 /*
312 * Grab a pointer to the block.
313 */
314 rbp = tcur->bc_bufs[level];
315 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
316#ifdef DEBUG
317 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
318 goto error0;
319#endif
320 /*
321 * Grab the current block number, for future use.
322 */
323 bno = be32_to_cpu(right->bb_leftsib);
324 /*
325 * If right block is full enough so that removing one entry
326 * won't make it too empty, and left-shifting an entry out
327 * of right to us works, we're done.
328 */
329 if (be16_to_cpu(right->bb_numrecs) - 1 >=
330 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
331 if ((error = xfs_alloc_lshift(tcur, level, &i)))
332 goto error0;
333 if (i) {
334 ASSERT(be16_to_cpu(block->bb_numrecs) >=
335 XFS_ALLOC_BLOCK_MINRECS(level, cur));
336 xfs_btree_del_cursor(tcur,
337 XFS_BTREE_NOERROR);
338 if (level > 0 &&
339 (error = xfs_alloc_decrement(cur, level,
340 &i)))
341 return error;
342 *stat = 1;
343 return 0;
344 }
345 }
346 /*
347 * Otherwise, grab the number of records in right for
348 * future reference, and fix up the temp cursor to point
349 * to our block again (last record).
350 */
351 rrecs = be16_to_cpu(right->bb_numrecs);
352 if (lbno != NULLAGBLOCK) {
353 i = xfs_btree_firstrec(tcur, level);
354 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
355 if ((error = xfs_alloc_decrement(tcur, level, &i)))
356 goto error0;
357 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
358 }
359 }
360 /*
361 * If there's a left sibling, see if it's ok to shift an entry
362 * out of it.
363 */
364 if (lbno != NULLAGBLOCK) {
365 /*
366 * Move the temp cursor to the first entry in the
367 * previous block.
368 */
369 i = xfs_btree_firstrec(tcur, level);
370 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
371 if ((error = xfs_alloc_decrement(tcur, level, &i)))
372 goto error0;
373 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
374 xfs_btree_firstrec(tcur, level);
375 /*
376 * Grab a pointer to the block.
377 */
378 lbp = tcur->bc_bufs[level];
379 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
380#ifdef DEBUG
381 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
382 goto error0;
383#endif
384 /*
385 * Grab the current block number, for future use.
386 */
387 bno = be32_to_cpu(left->bb_rightsib);
388 /*
389 * If left block is full enough so that removing one entry
390 * won't make it too empty, and right-shifting an entry out
391 * of left to us works, we're done.
392 */
393 if (be16_to_cpu(left->bb_numrecs) - 1 >=
394 XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
395 if ((error = xfs_alloc_rshift(tcur, level, &i)))
396 goto error0;
397 if (i) {
398 ASSERT(be16_to_cpu(block->bb_numrecs) >=
399 XFS_ALLOC_BLOCK_MINRECS(level, cur));
400 xfs_btree_del_cursor(tcur,
401 XFS_BTREE_NOERROR);
402 if (level == 0)
403 cur->bc_ptrs[0]++;
404 *stat = 1;
405 return 0;
406 }
407 }
408 /*
409 * Otherwise, grab the number of records in right for
410 * future reference.
411 */
412 lrecs = be16_to_cpu(left->bb_numrecs);
413 }
414 /*
415 * Delete the temp cursor, we're done with it.
416 */
417 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
418 /*
419 * If here, we need to do a join to keep the tree balanced.
420 */
421 ASSERT(bno != NULLAGBLOCK);
422 /*
423 * See if we can join with the left neighbor block.
424 */
425 if (lbno != NULLAGBLOCK &&
426 lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
427 /*
428 * Set "right" to be the starting block,
429 * "left" to be the left neighbor.
430 */
431 rbno = bno;
432 right = block;
433 rrecs = be16_to_cpu(right->bb_numrecs);
434 rbp = bp;
435 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
436 cur->bc_private.a.agno, lbno, 0, &lbp,
437 XFS_ALLOC_BTREE_REF)))
438 return error;
439 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
440 lrecs = be16_to_cpu(left->bb_numrecs);
441 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
442 return error;
443 }
444 /*
445 * If that won't work, see if we can join with the right neighbor block.
446 */
447 else if (rbno != NULLAGBLOCK &&
448 rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
449 /*
450 * Set "left" to be the starting block,
451 * "right" to be the right neighbor.
452 */
453 lbno = bno;
454 left = block;
455 lrecs = be16_to_cpu(left->bb_numrecs);
456 lbp = bp;
457 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
458 cur->bc_private.a.agno, rbno, 0, &rbp,
459 XFS_ALLOC_BTREE_REF)))
460 return error;
461 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
462 rrecs = be16_to_cpu(right->bb_numrecs);
463 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
464 return error;
465 }
466 /*
467 * Otherwise, we can't fix the imbalance.
468 * Just return. This is probably a logic error, but it's not fatal.
469 */
470 else {
471 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
472 return error;
473 *stat = 1;
474 return 0;
475 }
476 /*
477 * We're now going to join "left" and "right" by moving all the stuff
478 * in "right" to "left" and deleting "right".
479 */
480 if (level > 0) {
481 /*
482 * It's a non-leaf. Move keys and pointers.
483 */
484 lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur);
485 lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur);
486 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
487 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
488#ifdef DEBUG
489 for (i = 0; i < rrecs; i++) {
490 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
491 return error;
492 }
493#endif
494 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
495 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
496 xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
497 xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
498 } else {
499 /*
500 * It's a leaf. Move records.
501 */
502 lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur);
503 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
504 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
505 xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
506 }
507 /*
508 * If we joined with the left neighbor, set the buffer in the
509 * cursor to the left block, and fix up the index.
510 */
511 if (bp != lbp) {
512 xfs_btree_setbuf(cur, level, lbp);
513 cur->bc_ptrs[level] += lrecs;
514 }
515 /*
516 * If we joined with the right neighbor and there's a level above
517 * us, increment the cursor at that level.
518 */
519 else if (level + 1 < cur->bc_nlevels &&
520 (error = xfs_alloc_increment(cur, level + 1, &i)))
521 return error;
522 /*
523 * Fix up the number of records in the surviving block.
524 */
525 lrecs += rrecs;
526 left->bb_numrecs = cpu_to_be16(lrecs);
527 /*
528 * Fix up the right block pointer in the surviving block, and log it.
529 */
530 left->bb_rightsib = right->bb_rightsib;
531 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
532 /*
533 * If there is a right sibling now, make it point to the
534 * remaining block.
535 */
536 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
537 xfs_alloc_block_t *rrblock;
538 xfs_buf_t *rrbp;
539 107
540 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 108STATIC int
541 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 109xfs_allocbt_free_block(
542 &rrbp, XFS_ALLOC_BTREE_REF))) 110 struct xfs_btree_cur *cur,
543 return error; 111 struct xfs_buf *bp)
544 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 112{
545 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 113 struct xfs_buf *agbp = cur->bc_private.a.agbp;
546 return error; 114 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
547 rrblock->bb_leftsib = cpu_to_be32(lbno); 115 xfs_agblock_t bno;
548 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 116 int error;
549 } 117
550 /* 118 bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp));
551 * Free the deleting block by putting it on the freelist. 119 error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
552 */
553 error = xfs_alloc_put_freelist(cur->bc_tp,
554 cur->bc_private.a.agbp, NULL, rbno, 1);
555 if (error) 120 if (error)
556 return error; 121 return error;
122
557 /* 123 /*
558 * Since blocks move to the free list without the coordination 124 * Since blocks move to the free list without the coordination used in
559 * used in xfs_bmap_finish, we can't allow block to be available 125 * xfs_bmap_finish, we can't allow block to be available for
560 * for reallocation and non-transaction writing (user data) 126 * reallocation and non-transaction writing (user data) until we know
561 * until we know that the transaction that moved it to the free 127 * that the transaction that moved it to the free list is permanently
562 * list is permanently on disk. We track the blocks by declaring 128 * on disk. We track the blocks by declaring these blocks as "busy";
563 * these blocks as "busy"; the busy list is maintained on a 129 * the busy list is maintained on a per-ag basis and each transaction
564 * per-ag basis and each transaction records which entries 130 * records which entries should be removed when the iclog commits to
565 * should be removed when the iclog commits to disk. If a 131 * disk. If a busy block is allocated, the iclog is pushed up to the
566 * busy block is allocated, the iclog is pushed up to the
567 * LSN that freed the block. 132 * LSN that freed the block.
568 */ 133 */
569 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 134 xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
570 xfs_trans_agbtree_delta(cur->bc_tp, -1); 135 xfs_trans_agbtree_delta(cur->bc_tp, -1);
571
572 /*
573 * Adjust the current level's cursor so that we're left referring
574 * to the right node, after we're done.
575 * If this leaves the ptr value 0 our caller will fix it up.
576 */
577 if (level > 0)
578 cur->bc_ptrs[level]--;
579 /*
580 * Return value means the next level up has something to do.
581 */
582 *stat = 2;
583 return 0; 136 return 0;
584
585error0:
586 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
587 return error;
588} 137}
589 138
590/* 139/*
591 * Insert one record/level. Return information to the caller 140 * Update the longest extent in the AGF
592 * allowing the next level up to proceed if necessary.
593 */ 141 */
594STATIC int /* error */ 142STATIC void
595xfs_alloc_insrec( 143xfs_allocbt_update_lastrec(
596 xfs_btree_cur_t *cur, /* btree cursor */ 144 struct xfs_btree_cur *cur,
597 int level, /* level to insert record at */ 145 struct xfs_btree_block *block,
598 xfs_agblock_t *bnop, /* i/o: block number inserted */ 146 union xfs_btree_rec *rec,
599 xfs_alloc_rec_t *recp, /* i/o: record data inserted */ 147 int ptr,
600 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ 148 int reason)
601 int *stat) /* output: success/failure */
602{ 149{
603 xfs_agf_t *agf; /* allocation group freelist header */ 150 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
604 xfs_alloc_block_t *block; /* btree block record/key lives in */ 151 xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
605 xfs_buf_t *bp; /* buffer for block */ 152 __be32 len;
606 int error; /* error return value */
607 int i; /* loop index */
608 xfs_alloc_key_t key; /* key value being inserted */
609 xfs_alloc_key_t *kp; /* pointer to btree keys */
610 xfs_agblock_t nbno; /* block number of allocated block */
611 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
612 xfs_alloc_key_t nkey; /* new key value, from split */
613 xfs_alloc_rec_t nrec; /* new record value, for caller */
614 int numrecs; 153 int numrecs;
615 int optr; /* old ptr value */
616 xfs_alloc_ptr_t *pp; /* pointer to btree addresses */
617 int ptr; /* index in btree block for this rec */
618 xfs_alloc_rec_t *rp; /* pointer to btree records */
619 154
620 ASSERT(be32_to_cpu(recp->ar_blockcount) > 0); 155 ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
156
157 switch (reason) {
158 case LASTREC_UPDATE:
159 /*
160 * If this is the last leaf block and it's the last record,
161 * then update the size of the longest extent in the AG.
162 */
163 if (ptr != xfs_btree_get_numrecs(block))
164 return;
165 len = rec->alloc.ar_blockcount;
166 break;
167 case LASTREC_INSREC:
168 if (be32_to_cpu(rec->alloc.ar_blockcount) <=
169 be32_to_cpu(agf->agf_longest))
170 return;
171 len = rec->alloc.ar_blockcount;
172 break;
173 case LASTREC_DELREC:
174 numrecs = xfs_btree_get_numrecs(block);
175 if (ptr <= numrecs)
176 return;
177 ASSERT(ptr == numrecs + 1);
621 178
622 /* 179 if (numrecs) {
623 * GCC doesn't understand the (arguably complex) control flow in 180 xfs_alloc_rec_t *rrp;
624 * this function and complains about uninitialized structure fields
625 * without this.
626 */
627 memset(&nrec, 0, sizeof(nrec));
628 181
629 /* 182 rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs);
630 * If we made it to the root level, allocate a new root block 183 len = rrp->ar_blockcount;
631 * and we're done.
632 */
633 if (level >= cur->bc_nlevels) {
634 XFS_STATS_INC(xs_abt_insrec);
635 if ((error = xfs_alloc_newroot(cur, &i)))
636 return error;
637 *bnop = NULLAGBLOCK;
638 *stat = i;
639 return 0;
640 }
641 /*
642 * Make a key out of the record data to be inserted, and save it.
643 */
644 key.ar_startblock = recp->ar_startblock;
645 key.ar_blockcount = recp->ar_blockcount;
646 optr = ptr = cur->bc_ptrs[level];
647 /*
648 * If we're off the left edge, return failure.
649 */
650 if (ptr == 0) {
651 *stat = 0;
652 return 0;
653 }
654 XFS_STATS_INC(xs_abt_insrec);
655 /*
656 * Get pointers to the btree buffer and block.
657 */
658 bp = cur->bc_bufs[level];
659 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
660 numrecs = be16_to_cpu(block->bb_numrecs);
661#ifdef DEBUG
662 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
663 return error;
664 /*
665 * Check that the new entry is being inserted in the right place.
666 */
667 if (ptr <= numrecs) {
668 if (level == 0) {
669 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
670 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
671 } else { 184 } else {
672 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); 185 len = 0;
673 xfs_btree_check_key(cur->bc_btnum, &key, kp);
674 }
675 }
676#endif
677 nbno = NULLAGBLOCK;
678 ncur = NULL;
679 /*
680 * If the block is full, we can't insert the new entry until we
681 * make the block un-full.
682 */
683 if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
684 /*
685 * First, try shifting an entry to the right neighbor.
686 */
687 if ((error = xfs_alloc_rshift(cur, level, &i)))
688 return error;
689 if (i) {
690 /* nothing */
691 }
692 /*
693 * Next, try shifting an entry to the left neighbor.
694 */
695 else {
696 if ((error = xfs_alloc_lshift(cur, level, &i)))
697 return error;
698 if (i)
699 optr = ptr = cur->bc_ptrs[level];
700 else {
701 /*
702 * Next, try splitting the current block in
703 * half. If this works we have to re-set our
704 * variables because we could be in a
705 * different block now.
706 */
707 if ((error = xfs_alloc_split(cur, level, &nbno,
708 &nkey, &ncur, &i)))
709 return error;
710 if (i) {
711 bp = cur->bc_bufs[level];
712 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
713#ifdef DEBUG
714 if ((error =
715 xfs_btree_check_sblock(cur,
716 block, level, bp)))
717 return error;
718#endif
719 ptr = cur->bc_ptrs[level];
720 nrec.ar_startblock = nkey.ar_startblock;
721 nrec.ar_blockcount = nkey.ar_blockcount;
722 }
723 /*
724 * Otherwise the insert fails.
725 */
726 else {
727 *stat = 0;
728 return 0;
729 }
730 }
731 }
732 }
733 /*
734 * At this point we know there's room for our new entry in the block
735 * we're pointing at.
736 */
737 numrecs = be16_to_cpu(block->bb_numrecs);
738 if (level > 0) {
739 /*
740 * It's a non-leaf entry. Make a hole for the new data
741 * in the key and ptr regions of the block.
742 */
743 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
744 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
745#ifdef DEBUG
746 for (i = numrecs; i >= ptr; i--) {
747 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
748 return error;
749 } 186 }
750#endif
751 memmove(&kp[ptr], &kp[ptr - 1],
752 (numrecs - ptr + 1) * sizeof(*kp));
753 memmove(&pp[ptr], &pp[ptr - 1],
754 (numrecs - ptr + 1) * sizeof(*pp));
755#ifdef DEBUG
756 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
757 return error;
758#endif
759 /*
760 * Now stuff the new data in, bump numrecs and log the new data.
761 */
762 kp[ptr - 1] = key;
763 pp[ptr - 1] = cpu_to_be32(*bnop);
764 numrecs++;
765 block->bb_numrecs = cpu_to_be16(numrecs);
766 xfs_alloc_log_keys(cur, bp, ptr, numrecs);
767 xfs_alloc_log_ptrs(cur, bp, ptr, numrecs);
768#ifdef DEBUG
769 if (ptr < numrecs)
770 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
771 kp + ptr);
772#endif
773 } else {
774 /*
775 * It's a leaf entry. Make a hole for the new record.
776 */
777 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
778 memmove(&rp[ptr], &rp[ptr - 1],
779 (numrecs - ptr + 1) * sizeof(*rp));
780 /*
781 * Now stuff the new record in, bump numrecs
782 * and log the new data.
783 */
784 rp[ptr - 1] = *recp;
785 numrecs++;
786 block->bb_numrecs = cpu_to_be16(numrecs);
787 xfs_alloc_log_recs(cur, bp, ptr, numrecs);
788#ifdef DEBUG
789 if (ptr < numrecs)
790 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
791 rp + ptr);
792#endif
793 }
794 /*
795 * Log the new number of records in the btree header.
796 */
797 xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
798 /*
799 * If we inserted at the start of a block, update the parents' keys.
800 */
801 if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
802 return error;
803 /*
804 * Look to see if the longest extent in the allocation group
805 * needs to be updated.
806 */
807 187
808 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); 188 break;
809 if (level == 0 && 189 default:
810 cur->bc_btnum == XFS_BTNUM_CNT && 190 ASSERT(0);
811 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && 191 return;
812 be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
813 /*
814 * If this is a leaf in the by-size btree and there
815 * is no right sibling block and this block is bigger
816 * than the previous longest block, update it.
817 */
818 agf->agf_longest = recp->ar_blockcount;
819 cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
820 = be32_to_cpu(recp->ar_blockcount);
821 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
822 XFS_AGF_LONGEST);
823 } 192 }
824 /* 193
825 * Return the new block number, if any. 194 agf->agf_longest = len;
826 * If there is one, give back a record value and a cursor too. 195 cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len);
827 */ 196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST);
828 *bnop = nbno;
829 if (nbno != NULLAGBLOCK) {
830 *recp = nrec;
831 *curp = ncur;
832 }
833 *stat = 1;
834 return 0;
835} 197}
836 198
837/* 199STATIC int
838 * Log header fields from a btree block. 200xfs_allocbt_get_minrecs(
839 */ 201 struct xfs_btree_cur *cur,
840STATIC void 202 int level)
841xfs_alloc_log_block(
842 xfs_trans_t *tp, /* transaction pointer */
843 xfs_buf_t *bp, /* buffer containing btree block */
844 int fields) /* mask of fields: XFS_BB_... */
845{ 203{
846 int first; /* first byte offset logged */ 204 return cur->bc_mp->m_alloc_mnr[level != 0];
847 int last; /* last byte offset logged */ 205}
848 static const short offsets[] = { /* table of offsets */
849 offsetof(xfs_alloc_block_t, bb_magic),
850 offsetof(xfs_alloc_block_t, bb_level),
851 offsetof(xfs_alloc_block_t, bb_numrecs),
852 offsetof(xfs_alloc_block_t, bb_leftsib),
853 offsetof(xfs_alloc_block_t, bb_rightsib),
854 sizeof(xfs_alloc_block_t)
855 };
856 206
857 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 207STATIC int
858 xfs_trans_log_buf(tp, bp, first, last); 208xfs_allocbt_get_maxrecs(
209 struct xfs_btree_cur *cur,
210 int level)
211{
212 return cur->bc_mp->m_alloc_mxr[level != 0];
859} 213}
860 214
861/*
862 * Log keys from a btree block (nonleaf).
863 */
864STATIC void 215STATIC void
865xfs_alloc_log_keys( 216xfs_allocbt_init_key_from_rec(
866 xfs_btree_cur_t *cur, /* btree cursor */ 217 union xfs_btree_key *key,
867 xfs_buf_t *bp, /* buffer containing btree block */ 218 union xfs_btree_rec *rec)
868 int kfirst, /* index of first key to log */
869 int klast) /* index of last key to log */
870{ 219{
871 xfs_alloc_block_t *block; /* btree block to log from */ 220 ASSERT(rec->alloc.ar_startblock != 0);
872 int first; /* first byte offset logged */
873 xfs_alloc_key_t *kp; /* key pointer in btree block */
874 int last; /* last byte offset logged */
875 221
876 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 222 key->alloc.ar_startblock = rec->alloc.ar_startblock;
877 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); 223 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
878 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
879 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
880 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
881} 224}
882 225
883/*
884 * Log block pointer fields from a btree block (nonleaf).
885 */
886STATIC void 226STATIC void
887xfs_alloc_log_ptrs( 227xfs_allocbt_init_rec_from_key(
888 xfs_btree_cur_t *cur, /* btree cursor */ 228 union xfs_btree_key *key,
889 xfs_buf_t *bp, /* buffer containing btree block */ 229 union xfs_btree_rec *rec)
890 int pfirst, /* index of first pointer to log */
891 int plast) /* index of last pointer to log */
892{ 230{
893 xfs_alloc_block_t *block; /* btree block to log from */ 231 ASSERT(key->alloc.ar_startblock != 0);
894 int first; /* first byte offset logged */
895 int last; /* last byte offset logged */
896 xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */
897 232
898 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 233 rec->alloc.ar_startblock = key->alloc.ar_startblock;
899 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); 234 rec->alloc.ar_blockcount = key->alloc.ar_blockcount;
900 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
901 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
902 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
903} 235}
904 236
905/*
906 * Log records from a btree block (leaf).
907 */
908STATIC void 237STATIC void
909xfs_alloc_log_recs( 238xfs_allocbt_init_rec_from_cur(
910 xfs_btree_cur_t *cur, /* btree cursor */ 239 struct xfs_btree_cur *cur,
911 xfs_buf_t *bp, /* buffer containing btree block */ 240 union xfs_btree_rec *rec)
912 int rfirst, /* index of first record to log */
913 int rlast) /* index of last record to log */
914{ 241{
915 xfs_alloc_block_t *block; /* btree block to log from */ 242 ASSERT(cur->bc_rec.a.ar_startblock != 0);
916 int first; /* first byte offset logged */
917 int last; /* last byte offset logged */
918 xfs_alloc_rec_t *rp; /* record pointer for btree block */
919
920 243
921 block = XFS_BUF_TO_ALLOC_BLOCK(bp); 244 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
922 rp = XFS_ALLOC_REC_ADDR(block, 1, cur); 245 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
923#ifdef DEBUG
924 {
925 xfs_agf_t *agf;
926 xfs_alloc_rec_t *p;
927
928 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
929 for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
930 ASSERT(be32_to_cpu(p->ar_startblock) +
931 be32_to_cpu(p->ar_blockcount) <=
932 be32_to_cpu(agf->agf_length));
933 }
934#endif
935 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
936 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
937 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
938} 246}
939 247
940/* 248STATIC void
941 * Lookup the record. The cursor is made to point to it, based on dir. 249xfs_allocbt_init_ptr_from_cur(
942 * Return 0 if can't find any such record, 1 for success. 250 struct xfs_btree_cur *cur,
943 */ 251 union xfs_btree_ptr *ptr)
944STATIC int /* error */
945xfs_alloc_lookup(
946 xfs_btree_cur_t *cur, /* btree cursor */
947 xfs_lookup_t dir, /* <=, ==, or >= */
948 int *stat) /* success/failure */
949{ 252{
950 xfs_agblock_t agbno; /* a.g. relative btree block number */ 253 struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
951 xfs_agnumber_t agno; /* allocation group number */
952 xfs_alloc_block_t *block=NULL; /* current btree block */
953 int diff; /* difference for the current key */
954 int error; /* error return value */
955 int keyno=0; /* current key number */
956 int level; /* level in the btree */
957 xfs_mount_t *mp; /* file system mount point */
958
959 XFS_STATS_INC(xs_abt_lookup);
960 /*
961 * Get the allocation group header, and the root block number.
962 */
963 mp = cur->bc_mp;
964
965 {
966 xfs_agf_t *agf; /* a.g. freespace header */
967
968 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
969 agno = be32_to_cpu(agf->agf_seqno);
970 agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
971 }
972 /*
973 * Iterate over each level in the btree, starting at the root.
974 * For each level above the leaves, find the key we need, based
975 * on the lookup record, then follow the corresponding block
976 * pointer down to the next level.
977 */
978 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
979 xfs_buf_t *bp; /* buffer pointer for btree block */
980 xfs_daddr_t d; /* disk address of btree block */
981
982 /*
983 * Get the disk address we're looking for.
984 */
985 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
986 /*
987 * If the old buffer at this level is for a different block,
988 * throw it away, otherwise just use it.
989 */
990 bp = cur->bc_bufs[level];
991 if (bp && XFS_BUF_ADDR(bp) != d)
992 bp = NULL;
993 if (!bp) {
994 /*
995 * Need to get a new buffer. Read it, then
996 * set it in the cursor, releasing the old one.
997 */
998 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
999 agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
1000 return error;
1001 xfs_btree_setbuf(cur, level, bp);
1002 /*
1003 * Point to the btree block, now that we have the buffer
1004 */
1005 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1006 if ((error = xfs_btree_check_sblock(cur, block, level,
1007 bp)))
1008 return error;
1009 } else
1010 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1011 /*
1012 * If we already had a key match at a higher level, we know
1013 * we need to use the first entry in this block.
1014 */
1015 if (diff == 0)
1016 keyno = 1;
1017 /*
1018 * Otherwise we need to search this block. Do a binary search.
1019 */
1020 else {
1021 int high; /* high entry number */
1022 xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1023 xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1024 int low; /* low entry number */
1025
1026 /*
1027 * Get a pointer to keys or records.
1028 */
1029 if (level > 0)
1030 kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1031 else
1032 krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1033 /*
1034 * Set low and high entry numbers, 1-based.
1035 */
1036 low = 1;
1037 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1038 /*
1039 * If the block is empty, the tree must
1040 * be an empty leaf.
1041 */
1042 ASSERT(level == 0 && cur->bc_nlevels == 1);
1043 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1044 *stat = 0;
1045 return 0;
1046 }
1047 /*
1048 * Binary search the block.
1049 */
1050 while (low <= high) {
1051 xfs_extlen_t blockcount; /* key value */
1052 xfs_agblock_t startblock; /* key value */
1053
1054 XFS_STATS_INC(xs_abt_compare);
1055 /*
1056 * keyno is average of low and high.
1057 */
1058 keyno = (low + high) >> 1;
1059 /*
1060 * Get startblock & blockcount.
1061 */
1062 if (level > 0) {
1063 xfs_alloc_key_t *kkp;
1064
1065 kkp = kkbase + keyno - 1;
1066 startblock = be32_to_cpu(kkp->ar_startblock);
1067 blockcount = be32_to_cpu(kkp->ar_blockcount);
1068 } else {
1069 xfs_alloc_rec_t *krp;
1070 254
1071 krp = krbase + keyno - 1; 255 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
1072 startblock = be32_to_cpu(krp->ar_startblock); 256 ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
1073 blockcount = be32_to_cpu(krp->ar_blockcount);
1074 }
1075 /*
1076 * Compute difference to get next direction.
1077 */
1078 if (cur->bc_btnum == XFS_BTNUM_BNO)
1079 diff = (int)startblock -
1080 (int)cur->bc_rec.a.ar_startblock;
1081 else if (!(diff = (int)blockcount -
1082 (int)cur->bc_rec.a.ar_blockcount))
1083 diff = (int)startblock -
1084 (int)cur->bc_rec.a.ar_startblock;
1085 /*
1086 * Less than, move right.
1087 */
1088 if (diff < 0)
1089 low = keyno + 1;
1090 /*
1091 * Greater than, move left.
1092 */
1093 else if (diff > 0)
1094 high = keyno - 1;
1095 /*
1096 * Equal, we're done.
1097 */
1098 else
1099 break;
1100 }
1101 }
1102 /*
1103 * If there are more levels, set up for the next level
1104 * by getting the block number and filling in the cursor.
1105 */
1106 if (level > 0) {
1107 /*
1108 * If we moved left, need the previous key number,
1109 * unless there isn't one.
1110 */
1111 if (diff > 0 && --keyno < 1)
1112 keyno = 1;
1113 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1114#ifdef DEBUG
1115 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1116 return error;
1117#endif
1118 cur->bc_ptrs[level] = keyno;
1119 }
1120 }
1121 /*
1122 * Done with the search.
1123 * See if we need to adjust the results.
1124 */
1125 if (dir != XFS_LOOKUP_LE && diff < 0) {
1126 keyno++;
1127 /*
1128 * If ge search and we went off the end of the block, but it's
1129 * not the last block, we're in the wrong block.
1130 */
1131 if (dir == XFS_LOOKUP_GE &&
1132 keyno > be16_to_cpu(block->bb_numrecs) &&
1133 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1134 int i;
1135 257
1136 cur->bc_ptrs[0] = keyno; 258 ptr->s = agf->agf_roots[cur->bc_btnum];
1137 if ((error = xfs_alloc_increment(cur, 0, &i)))
1138 return error;
1139 XFS_WANT_CORRUPTED_RETURN(i == 1);
1140 *stat = 1;
1141 return 0;
1142 }
1143 }
1144 else if (dir == XFS_LOOKUP_LE && diff > 0)
1145 keyno--;
1146 cur->bc_ptrs[0] = keyno;
1147 /*
1148 * Return if we succeeded or not.
1149 */
1150 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1151 *stat = 0;
1152 else
1153 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1154 return 0;
1155} 259}
1156 260
1157/* 261STATIC __int64_t
1158 * Move 1 record left from cur/level if possible. 262xfs_allocbt_key_diff(
1159 * Update cur to reflect the new path. 263 struct xfs_btree_cur *cur,
1160 */ 264 union xfs_btree_key *key)
1161STATIC int /* error */
1162xfs_alloc_lshift(
1163 xfs_btree_cur_t *cur, /* btree cursor */
1164 int level, /* level to shift record on */
1165 int *stat) /* success/failure */
1166{ 265{
1167 int error; /* error return value */ 266 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
1168#ifdef DEBUG 267 xfs_alloc_key_t *kp = &key->alloc;
1169 int i; /* loop index */ 268 __int64_t diff;
1170#endif
1171 xfs_alloc_key_t key; /* key value for leaf level upward */
1172 xfs_buf_t *lbp; /* buffer for left neighbor block */
1173 xfs_alloc_block_t *left; /* left neighbor btree block */
1174 int nrec; /* new number of left block entries */
1175 xfs_buf_t *rbp; /* buffer for right (current) block */
1176 xfs_alloc_block_t *right; /* right (current) btree block */
1177 xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */
1178 xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */
1179 xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */
1180 269
1181 /* 270 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1182 * Set up variables for this block as "right". 271 return (__int64_t)be32_to_cpu(kp->ar_startblock) -
1183 */ 272 rec->ar_startblock;
1184 rbp = cur->bc_bufs[level];
1185 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1186#ifdef DEBUG
1187 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1188 return error;
1189#endif
1190 /*
1191 * If we've got no left sibling then we can't shift an entry left.
1192 */
1193 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1194 *stat = 0;
1195 return 0;
1196 }
1197 /*
1198 * If the cursor entry is the one that would be moved, don't
1199 * do it... it's too complicated.
1200 */
1201 if (cur->bc_ptrs[level] <= 1) {
1202 *stat = 0;
1203 return 0;
1204 }
1205 /*
1206 * Set up the left neighbor as "left".
1207 */
1208 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1209 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1210 0, &lbp, XFS_ALLOC_BTREE_REF)))
1211 return error;
1212 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1213 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1214 return error;
1215 /*
1216 * If it's full, it can't take another entry.
1217 */
1218 if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1219 *stat = 0;
1220 return 0;
1221 } 273 }
1222 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1223 /*
1224 * If non-leaf, copy a key and a ptr to the left block.
1225 */
1226 if (level > 0) {
1227 xfs_alloc_key_t *lkp; /* key pointer for left block */
1228 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1229 274
1230 lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); 275 diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
1231 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); 276 if (diff)
1232 *lkp = *rkp; 277 return diff;
1233 xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1234 lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1235 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1236#ifdef DEBUG
1237 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1238 return error;
1239#endif
1240 *lpp = *rpp;
1241 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1242 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1243 }
1244 /*
1245 * If leaf, copy a record to the left block.
1246 */
1247 else {
1248 xfs_alloc_rec_t *lrp; /* record pointer for left block */
1249 278
1250 lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); 279 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
1251 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1252 *lrp = *rrp;
1253 xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1254 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1255 }
1256 /*
1257 * Bump and log left's numrecs, decrement and log right's numrecs.
1258 */
1259 be16_add_cpu(&left->bb_numrecs, 1);
1260 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1261 be16_add_cpu(&right->bb_numrecs, -1);
1262 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1263 /*
1264 * Slide the contents of right down one entry.
1265 */
1266 if (level > 0) {
1267#ifdef DEBUG
1268 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1269 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1270 level)))
1271 return error;
1272 }
1273#endif
1274 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1275 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1276 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1277 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1278 } else {
1279 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1280 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1281 key.ar_startblock = rrp->ar_startblock;
1282 key.ar_blockcount = rrp->ar_blockcount;
1283 rkp = &key;
1284 }
1285 /*
1286 * Update the parent key values of right.
1287 */
1288 if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1289 return error;
1290 /*
1291 * Slide the cursor value left one.
1292 */
1293 cur->bc_ptrs[level]--;
1294 *stat = 1;
1295 return 0;
1296} 280}
1297 281
1298/* 282STATIC int
1299 * Allocate a new root block, fill it in. 283xfs_allocbt_kill_root(
1300 */ 284 struct xfs_btree_cur *cur,
1301STATIC int /* error */ 285 struct xfs_buf *bp,
1302xfs_alloc_newroot( 286 int level,
1303 xfs_btree_cur_t *cur, /* btree cursor */ 287 union xfs_btree_ptr *newroot)
1304 int *stat) /* success/failure */
1305{ 288{
1306 int error; /* error return value */ 289 int error;
1307 xfs_agblock_t lbno; /* left block number */
1308 xfs_buf_t *lbp; /* left btree buffer */
1309 xfs_alloc_block_t *left; /* left btree block */
1310 xfs_mount_t *mp; /* mount structure */
1311 xfs_agblock_t nbno; /* new block number */
1312 xfs_buf_t *nbp; /* new (root) buffer */
1313 xfs_alloc_block_t *new; /* new (root) btree block */
1314 int nptr; /* new value for key index, 1 or 2 */
1315 xfs_agblock_t rbno; /* right block number */
1316 xfs_buf_t *rbp; /* right btree buffer */
1317 xfs_alloc_block_t *right; /* right btree block */
1318
1319 mp = cur->bc_mp;
1320 290
1321 ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); 291 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1322 /* 292 XFS_BTREE_STATS_INC(cur, killroot);
1323 * Get a buffer from the freelist blocks, for the new root.
1324 */
1325 error = xfs_alloc_get_freelist(cur->bc_tp,
1326 cur->bc_private.a.agbp, &nbno, 1);
1327 if (error)
1328 return error;
1329 /*
1330 * None available, we fail.
1331 */
1332 if (nbno == NULLAGBLOCK) {
1333 *stat = 0;
1334 return 0;
1335 }
1336 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1337 nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1338 0);
1339 new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1340 /*
1341 * Set the root data in the a.g. freespace structure.
1342 */
1343 {
1344 xfs_agf_t *agf; /* a.g. freespace header */
1345 xfs_agnumber_t seqno;
1346 293
1347 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1348 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1349 be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1);
1350 seqno = be32_to_cpu(agf->agf_seqno);
1351 mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1352 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1353 XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1354 }
1355 /* 294 /*
1356 * At the previous root level there are now two blocks: the old 295 * Update the root pointer, decreasing the level by 1 and then
1357 * root, and the new block generated when it was split. 296 * free the old root.
1358 * We don't know which one the cursor is pointing at, so we
1359 * set up variables "left" and "right" for each case.
1360 */ 297 */
1361 lbp = cur->bc_bufs[cur->bc_nlevels - 1]; 298 xfs_allocbt_set_root(cur, newroot, -1);
1362 left = XFS_BUF_TO_ALLOC_BLOCK(lbp); 299 error = xfs_allocbt_free_block(cur, bp);
1363#ifdef DEBUG 300 if (error) {
1364 if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) 301 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1365 return error; 302 return error;
1366#endif
1367 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1368 /*
1369 * Our block is left, pick up the right block.
1370 */
1371 lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1372 rbno = be32_to_cpu(left->bb_rightsib);
1373 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1374 cur->bc_private.a.agno, rbno, 0, &rbp,
1375 XFS_ALLOC_BTREE_REF)))
1376 return error;
1377 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1378 if ((error = xfs_btree_check_sblock(cur, right,
1379 cur->bc_nlevels - 1, rbp)))
1380 return error;
1381 nptr = 1;
1382 } else {
1383 /*
1384 * Our block is right, pick up the left block.
1385 */
1386 rbp = lbp;
1387 right = left;
1388 rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1389 lbno = be32_to_cpu(right->bb_leftsib);
1390 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1391 cur->bc_private.a.agno, lbno, 0, &lbp,
1392 XFS_ALLOC_BTREE_REF)))
1393 return error;
1394 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1395 if ((error = xfs_btree_check_sblock(cur, left,
1396 cur->bc_nlevels - 1, lbp)))
1397 return error;
1398 nptr = 2;
1399 } 303 }
1400 /*
1401 * Fill in the new block's btree header and log it.
1402 */
1403 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1404 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1405 new->bb_numrecs = cpu_to_be16(2);
1406 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1407 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1408 xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1409 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1410 /*
1411 * Fill in the key data in the new root.
1412 */
1413 {
1414 xfs_alloc_key_t *kp; /* btree key pointer */
1415 304
1416 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); 305 XFS_BTREE_STATS_INC(cur, free);
1417 if (be16_to_cpu(left->bb_level) > 0) {
1418 kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur);
1419 kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);
1420 } else {
1421 xfs_alloc_rec_t *rp; /* btree record pointer */
1422 306
1423 rp = XFS_ALLOC_REC_ADDR(left, 1, cur); 307 xfs_btree_setbuf(cur, level, NULL);
1424 kp[0].ar_startblock = rp->ar_startblock; 308 cur->bc_nlevels--;
1425 kp[0].ar_blockcount = rp->ar_blockcount;
1426 rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1427 kp[1].ar_startblock = rp->ar_startblock;
1428 kp[1].ar_blockcount = rp->ar_blockcount;
1429 }
1430 }
1431 xfs_alloc_log_keys(cur, nbp, 1, 2);
1432 /*
1433 * Fill in the pointer data in the new root.
1434 */
1435 {
1436 xfs_alloc_ptr_t *pp; /* btree address pointer */
1437 309
1438 pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); 310 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1439 pp[0] = cpu_to_be32(lbno);
1440 pp[1] = cpu_to_be32(rbno);
1441 }
1442 xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1443 /*
1444 * Fix up the cursor.
1445 */
1446 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1447 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1448 cur->bc_nlevels++;
1449 *stat = 1;
1450 return 0; 311 return 0;
1451} 312}
1452 313
1453/*
1454 * Move 1 record right from cur/level if possible.
1455 * Update cur to reflect the new path.
1456 */
1457STATIC int /* error */
1458xfs_alloc_rshift(
1459 xfs_btree_cur_t *cur, /* btree cursor */
1460 int level, /* level to shift record on */
1461 int *stat) /* success/failure */
1462{
1463 int error; /* error return value */
1464 int i; /* loop index */
1465 xfs_alloc_key_t key; /* key value for leaf level upward */
1466 xfs_buf_t *lbp; /* buffer for left (current) block */
1467 xfs_alloc_block_t *left; /* left (current) btree block */
1468 xfs_buf_t *rbp; /* buffer for right neighbor block */
1469 xfs_alloc_block_t *right; /* right neighbor btree block */
1470 xfs_alloc_key_t *rkp; /* key pointer for right block */
1471 xfs_btree_cur_t *tcur; /* temporary cursor */
1472
1473 /*
1474 * Set up variables for this block as "left".
1475 */
1476 lbp = cur->bc_bufs[level];
1477 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1478#ifdef DEBUG
1479 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1480 return error;
1481#endif
1482 /*
1483 * If we've got no right sibling then we can't shift an entry right.
1484 */
1485 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1486 *stat = 0;
1487 return 0;
1488 }
1489 /*
1490 * If the cursor entry is the one that would be moved, don't
1491 * do it... it's too complicated.
1492 */
1493 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1494 *stat = 0;
1495 return 0;
1496 }
1497 /*
1498 * Set up the right neighbor as "right".
1499 */
1500 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1501 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1502 0, &rbp, XFS_ALLOC_BTREE_REF)))
1503 return error;
1504 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1505 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1506 return error;
1507 /*
1508 * If it's full, it can't take another entry.
1509 */
1510 if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1511 *stat = 0;
1512 return 0;
1513 }
1514 /*
1515 * Make a hole at the start of the right neighbor block, then
1516 * copy the last left block entry to the hole.
1517 */
1518 if (level > 0) {
1519 xfs_alloc_key_t *lkp; /* key pointer for left block */
1520 xfs_alloc_ptr_t *lpp; /* address pointer for left block */
1521 xfs_alloc_ptr_t *rpp; /* address pointer for right block */
1522
1523 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1524 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1525 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1526 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1527#ifdef DEBUG 314#ifdef DEBUG
1528 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { 315STATIC int
1529 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) 316xfs_allocbt_keys_inorder(
1530 return error; 317 struct xfs_btree_cur *cur,
1531 } 318 union xfs_btree_key *k1,
1532#endif 319 union xfs_btree_key *k2)
1533 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); 320{
1534 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); 321 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1535#ifdef DEBUG 322 return be32_to_cpu(k1->alloc.ar_startblock) <
1536 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) 323 be32_to_cpu(k2->alloc.ar_startblock);
1537 return error;
1538#endif
1539 *rkp = *lkp;
1540 *rpp = *lpp;
1541 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1542 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1543 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1544 } else { 324 } else {
1545 xfs_alloc_rec_t *lrp; /* record pointer for left block */ 325 return be32_to_cpu(k1->alloc.ar_blockcount) <
1546 xfs_alloc_rec_t *rrp; /* record pointer for right block */ 326 be32_to_cpu(k2->alloc.ar_blockcount) ||
1547 327 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
1548 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); 328 be32_to_cpu(k1->alloc.ar_startblock) <
1549 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 329 be32_to_cpu(k2->alloc.ar_startblock));
1550 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1551 *rrp = *lrp;
1552 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1553 key.ar_startblock = rrp->ar_startblock;
1554 key.ar_blockcount = rrp->ar_blockcount;
1555 rkp = &key;
1556 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1557 } 330 }
1558 /*
1559 * Decrement and log left's numrecs, bump and log right's numrecs.
1560 */
1561 be16_add_cpu(&left->bb_numrecs, -1);
1562 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1563 be16_add_cpu(&right->bb_numrecs, 1);
1564 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1565 /*
1566 * Using a temporary cursor, update the parent key values of the
1567 * block on the right.
1568 */
1569 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1570 return error;
1571 i = xfs_btree_lastrec(tcur, level);
1572 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573 if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1574 (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1575 goto error0;
1576 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1577 *stat = 1;
1578 return 0;
1579error0:
1580 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1581 return error;
1582} 331}
1583 332
1584/* 333STATIC int
1585 * Split cur/level block in half. 334xfs_allocbt_recs_inorder(
1586 * Return new block number and its first record (to be inserted into parent). 335 struct xfs_btree_cur *cur,
1587 */ 336 union xfs_btree_rec *r1,
1588STATIC int /* error */ 337 union xfs_btree_rec *r2)
1589xfs_alloc_split(
1590 xfs_btree_cur_t *cur, /* btree cursor */
1591 int level, /* level to split */
1592 xfs_agblock_t *bnop, /* output: block number allocated */
1593 xfs_alloc_key_t *keyp, /* output: first key of new block */
1594 xfs_btree_cur_t **curp, /* output: new cursor */
1595 int *stat) /* success/failure */
1596{ 338{
1597 int error; /* error return value */ 339 if (cur->bc_btnum == XFS_BTNUM_BNO) {
1598 int i; /* loop index/record number */ 340 return be32_to_cpu(r1->alloc.ar_startblock) +
1599 xfs_agblock_t lbno; /* left (current) block number */ 341 be32_to_cpu(r1->alloc.ar_blockcount) <=
1600 xfs_buf_t *lbp; /* buffer for left block */ 342 be32_to_cpu(r2->alloc.ar_startblock);
1601 xfs_alloc_block_t *left; /* left (current) btree block */ 343 } else {
1602 xfs_agblock_t rbno; /* right (new) block number */ 344 return be32_to_cpu(r1->alloc.ar_blockcount) <
1603 xfs_buf_t *rbp; /* buffer for right block */ 345 be32_to_cpu(r2->alloc.ar_blockcount) ||
1604 xfs_alloc_block_t *right; /* right (new) btree block */ 346 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
1605 347 be32_to_cpu(r1->alloc.ar_startblock) <
1606 /* 348 be32_to_cpu(r2->alloc.ar_startblock));
1607 * Allocate the new block from the freelist.
1608 * If we can't do it, we're toast. Give up.
1609 */
1610 error = xfs_alloc_get_freelist(cur->bc_tp,
1611 cur->bc_private.a.agbp, &rbno, 1);
1612 if (error)
1613 return error;
1614 if (rbno == NULLAGBLOCK) {
1615 *stat = 0;
1616 return 0;
1617 }
1618 xfs_trans_agbtree_delta(cur->bc_tp, 1);
1619 rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1620 rbno, 0);
1621 /*
1622 * Set up the new block as "right".
1623 */
1624 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1625 /*
1626 * "Left" is the current (according to the cursor) block.
1627 */
1628 lbp = cur->bc_bufs[level];
1629 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1630#ifdef DEBUG
1631 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1632 return error;
1633#endif
1634 /*
1635 * Fill in the btree header for the new block.
1636 */
1637 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1638 right->bb_level = left->bb_level;
1639 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1640 /*
1641 * Make sure that if there's an odd number of entries now, that
1642 * each new block will have the same number of entries.
1643 */
1644 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1645 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1646 be16_add_cpu(&right->bb_numrecs, 1);
1647 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1648 /*
1649 * For non-leaf blocks, copy keys and addresses over to the new block.
1650 */
1651 if (level > 0) {
1652 xfs_alloc_key_t *lkp; /* left btree key pointer */
1653 xfs_alloc_ptr_t *lpp; /* left btree address pointer */
1654 xfs_alloc_key_t *rkp; /* right btree key pointer */
1655 xfs_alloc_ptr_t *rpp; /* right btree address pointer */
1656
1657 lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1658 lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1659 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1660 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1661#ifdef DEBUG
1662 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1663 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1664 return error;
1665 }
1666#endif
1667 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1668 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1669 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1670 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1671 *keyp = *rkp;
1672 } 349 }
1673 /* 350}
1674 * For leaf blocks, copy records over to the new block. 351#endif /* DEBUG */
1675 */
1676 else {
1677 xfs_alloc_rec_t *lrp; /* left btree record pointer */
1678 xfs_alloc_rec_t *rrp; /* right btree record pointer */
1679 352
1680 lrp = XFS_ALLOC_REC_ADDR(left, i, cur); 353#ifdef XFS_BTREE_TRACE
1681 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); 354ktrace_t *xfs_allocbt_trace_buf;
1682 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1683 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1684 keyp->ar_startblock = rrp->ar_startblock;
1685 keyp->ar_blockcount = rrp->ar_blockcount;
1686 }
1687 /*
1688 * Find the left block number by looking in the buffer.
1689 * Adjust numrecs, sibling pointers.
1690 */
1691 lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1692 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1693 right->bb_rightsib = left->bb_rightsib;
1694 left->bb_rightsib = cpu_to_be32(rbno);
1695 right->bb_leftsib = cpu_to_be32(lbno);
1696 xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1697 xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1698 /*
1699 * If there's a block to the new block's right, make that block
1700 * point back to right instead of to left.
1701 */
1702 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1703 xfs_alloc_block_t *rrblock; /* rr btree block */
1704 xfs_buf_t *rrbp; /* buffer for rrblock */
1705 355
1706 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 356STATIC void
1707 cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0, 357xfs_allocbt_trace_enter(
1708 &rrbp, XFS_ALLOC_BTREE_REF))) 358 struct xfs_btree_cur *cur,
1709 return error; 359 const char *func,
1710 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); 360 char *s,
1711 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 361 int type,
1712 return error; 362 int line,
1713 rrblock->bb_leftsib = cpu_to_be32(rbno); 363 __psunsigned_t a0,
1714 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); 364 __psunsigned_t a1,
1715 } 365 __psunsigned_t a2,
1716 /* 366 __psunsigned_t a3,
1717 * If the cursor is really in the right block, move it there. 367 __psunsigned_t a4,
1718 * If it's just pointing past the last entry in left, then we'll 368 __psunsigned_t a5,
1719 * insert there, so don't change anything in that case. 369 __psunsigned_t a6,
1720 */ 370 __psunsigned_t a7,
1721 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { 371 __psunsigned_t a8,
1722 xfs_btree_setbuf(cur, level, rbp); 372 __psunsigned_t a9,
1723 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); 373 __psunsigned_t a10)
1724 } 374{
1725 /* 375 ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
1726 * If there are more levels, we'll need another cursor which refers to 376 (void *)func, (void *)s, NULL, (void *)cur,
1727 * the right block, no matter where this cursor was. 377 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1728 */ 378 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1729 if (level + 1 < cur->bc_nlevels) { 379 (void *)a8, (void *)a9, (void *)a10);
1730 if ((error = xfs_btree_dup_cursor(cur, curp)))
1731 return error;
1732 (*curp)->bc_ptrs[level + 1]++;
1733 }
1734 *bnop = rbno;
1735 *stat = 1;
1736 return 0;
1737} 380}
1738 381
1739/* 382STATIC void
1740 * Update keys at all levels from here to the root along the cursor's path. 383xfs_allocbt_trace_cursor(
1741 */ 384 struct xfs_btree_cur *cur,
1742STATIC int /* error */ 385 __uint32_t *s0,
1743xfs_alloc_updkey( 386 __uint64_t *l0,
1744 xfs_btree_cur_t *cur, /* btree cursor */ 387 __uint64_t *l1)
1745 xfs_alloc_key_t *keyp, /* new key value to update to */
1746 int level) /* starting level for update */
1747{ 388{
1748 int ptr; /* index of key in block */ 389 *s0 = cur->bc_private.a.agno;
1749 390 *l0 = cur->bc_rec.a.ar_startblock;
1750 /* 391 *l1 = cur->bc_rec.a.ar_blockcount;
1751 * Go up the tree from this level toward the root.
1752 * At each level, update the key value to the value input.
1753 * Stop when we reach a level where the cursor isn't pointing
1754 * at the first entry in the block.
1755 */
1756 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1757 xfs_alloc_block_t *block; /* btree block */
1758 xfs_buf_t *bp; /* buffer for block */
1759#ifdef DEBUG
1760 int error; /* error return value */
1761#endif
1762 xfs_alloc_key_t *kp; /* ptr to btree block keys */
1763
1764 bp = cur->bc_bufs[level];
1765 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1766#ifdef DEBUG
1767 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1768 return error;
1769#endif
1770 ptr = cur->bc_ptrs[level];
1771 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1772 *kp = *keyp;
1773 xfs_alloc_log_keys(cur, bp, ptr, ptr);
1774 }
1775 return 0;
1776} 392}
1777 393
1778/* 394STATIC void
1779 * Externally visible routines. 395xfs_allocbt_trace_key(
1780 */ 396 struct xfs_btree_cur *cur,
1781 397 union xfs_btree_key *key,
1782/* 398 __uint64_t *l0,
1783 * Decrement cursor by one record at the level. 399 __uint64_t *l1)
1784 * For nonzero levels the leaf-ward information is untouched.
1785 */
1786int /* error */
1787xfs_alloc_decrement(
1788 xfs_btree_cur_t *cur, /* btree cursor */
1789 int level, /* level in btree, 0 is leaf */
1790 int *stat) /* success/failure */
1791{ 400{
1792 xfs_alloc_block_t *block; /* btree block */ 401 *l0 = be32_to_cpu(key->alloc.ar_startblock);
1793 int error; /* error return value */ 402 *l1 = be32_to_cpu(key->alloc.ar_blockcount);
1794 int lev; /* btree level */
1795
1796 ASSERT(level < cur->bc_nlevels);
1797 /*
1798 * Read-ahead to the left at this level.
1799 */
1800 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1801 /*
1802 * Decrement the ptr at this level. If we're still in the block
1803 * then we're done.
1804 */
1805 if (--cur->bc_ptrs[level] > 0) {
1806 *stat = 1;
1807 return 0;
1808 }
1809 /*
1810 * Get a pointer to the btree block.
1811 */
1812 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1813#ifdef DEBUG
1814 if ((error = xfs_btree_check_sblock(cur, block, level,
1815 cur->bc_bufs[level])))
1816 return error;
1817#endif
1818 /*
1819 * If we just went off the left edge of the tree, return failure.
1820 */
1821 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1822 *stat = 0;
1823 return 0;
1824 }
1825 /*
1826 * March up the tree decrementing pointers.
1827 * Stop when we don't go off the left edge of a block.
1828 */
1829 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1830 if (--cur->bc_ptrs[lev] > 0)
1831 break;
1832 /*
1833 * Read-ahead the left block, we're going to read it
1834 * in the next loop.
1835 */
1836 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1837 }
1838 /*
1839 * If we went off the root then we are seriously confused.
1840 */
1841 ASSERT(lev < cur->bc_nlevels);
1842 /*
1843 * Now walk back down the tree, fixing up the cursor's buffer
1844 * pointers and key numbers.
1845 */
1846 for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1847 xfs_agblock_t agbno; /* block number of btree block */
1848 xfs_buf_t *bp; /* buffer pointer for block */
1849
1850 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1851 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1852 cur->bc_private.a.agno, agbno, 0, &bp,
1853 XFS_ALLOC_BTREE_REF)))
1854 return error;
1855 lev--;
1856 xfs_btree_setbuf(cur, lev, bp);
1857 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1858 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1859 return error;
1860 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1861 }
1862 *stat = 1;
1863 return 0;
1864} 403}
1865 404
1866/* 405STATIC void
1867 * Delete the record pointed to by cur. 406xfs_allocbt_trace_record(
1868 * The cursor refers to the place where the record was (could be inserted) 407 struct xfs_btree_cur *cur,
1869 * when the operation returns. 408 union xfs_btree_rec *rec,
1870 */ 409 __uint64_t *l0,
1871int /* error */ 410 __uint64_t *l1,
1872xfs_alloc_delete( 411 __uint64_t *l2)
1873 xfs_btree_cur_t *cur, /* btree cursor */
1874 int *stat) /* success/failure */
1875{ 412{
1876 int error; /* error return value */ 413 *l0 = be32_to_cpu(rec->alloc.ar_startblock);
1877 int i; /* result code */ 414 *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
1878 int level; /* btree level */ 415 *l2 = 0;
1879
1880 /*
1881 * Go up the tree, starting at leaf level.
1882 * If 2 is returned then a join was done; go to the next level.
1883 * Otherwise we are done.
1884 */
1885 for (level = 0, i = 2; i == 2; level++) {
1886 if ((error = xfs_alloc_delrec(cur, level, &i)))
1887 return error;
1888 }
1889 if (i == 0) {
1890 for (level = 1; level < cur->bc_nlevels; level++) {
1891 if (cur->bc_ptrs[level] == 0) {
1892 if ((error = xfs_alloc_decrement(cur, level, &i)))
1893 return error;
1894 break;
1895 }
1896 }
1897 }
1898 *stat = i;
1899 return 0;
1900} 416}
417#endif /* XFS_BTREE_TRACE */
418
419static const struct xfs_btree_ops xfs_allocbt_ops = {
420 .rec_len = sizeof(xfs_alloc_rec_t),
421 .key_len = sizeof(xfs_alloc_key_t),
422
423 .dup_cursor = xfs_allocbt_dup_cursor,
424 .set_root = xfs_allocbt_set_root,
425 .kill_root = xfs_allocbt_kill_root,
426 .alloc_block = xfs_allocbt_alloc_block,
427 .free_block = xfs_allocbt_free_block,
428 .update_lastrec = xfs_allocbt_update_lastrec,
429 .get_minrecs = xfs_allocbt_get_minrecs,
430 .get_maxrecs = xfs_allocbt_get_maxrecs,
431 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
432 .init_rec_from_key = xfs_allocbt_init_rec_from_key,
433 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
434 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
435 .key_diff = xfs_allocbt_key_diff,
1901 436
1902/*
1903 * Get the data from the pointed-to record.
1904 */
1905int /* error */
1906xfs_alloc_get_rec(
1907 xfs_btree_cur_t *cur, /* btree cursor */
1908 xfs_agblock_t *bno, /* output: starting block of extent */
1909 xfs_extlen_t *len, /* output: length of extent */
1910 int *stat) /* output: success/failure */
1911{
1912 xfs_alloc_block_t *block; /* btree block */
1913#ifdef DEBUG 437#ifdef DEBUG
1914 int error; /* error return value */ 438 .keys_inorder = xfs_allocbt_keys_inorder,
439 .recs_inorder = xfs_allocbt_recs_inorder,
1915#endif 440#endif
1916 int ptr; /* record number */
1917 441
1918 ptr = cur->bc_ptrs[0]; 442#ifdef XFS_BTREE_TRACE
1919 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); 443 .trace_enter = xfs_allocbt_trace_enter,
1920#ifdef DEBUG 444 .trace_cursor = xfs_allocbt_trace_cursor,
1921 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) 445 .trace_key = xfs_allocbt_trace_key,
1922 return error; 446 .trace_record = xfs_allocbt_trace_record,
1923#endif 447#endif
1924 /* 448};
1925 * Off the right end or left end, return failure.
1926 */
1927 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1928 *stat = 0;
1929 return 0;
1930 }
1931 /*
1932 * Point to the record and extract its data.
1933 */
1934 {
1935 xfs_alloc_rec_t *rec; /* record data */
1936
1937 rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1938 *bno = be32_to_cpu(rec->ar_startblock);
1939 *len = be32_to_cpu(rec->ar_blockcount);
1940 }
1941 *stat = 1;
1942 return 0;
1943}
1944 449
1945/* 450/*
1946 * Increment cursor by one record at the level. 451 * Allocate a new allocation btree cursor.
1947 * For nonzero levels the leaf-ward information is untouched.
1948 */ 452 */
1949int /* error */ 453struct xfs_btree_cur * /* new alloc btree cursor */
1950xfs_alloc_increment( 454xfs_allocbt_init_cursor(
1951 xfs_btree_cur_t *cur, /* btree cursor */ 455 struct xfs_mount *mp, /* file system mount point */
1952 int level, /* level in btree, 0 is leaf */ 456 struct xfs_trans *tp, /* transaction pointer */
1953 int *stat) /* success/failure */ 457 struct xfs_buf *agbp, /* buffer for agf structure */
458 xfs_agnumber_t agno, /* allocation group number */
459 xfs_btnum_t btnum) /* btree identifier */
1954{ 460{
1955 xfs_alloc_block_t *block; /* btree block */ 461 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
1956 xfs_buf_t *bp; /* tree block buffer */ 462 struct xfs_btree_cur *cur;
1957 int error; /* error return value */
1958 int lev; /* btree level */
1959
1960 ASSERT(level < cur->bc_nlevels);
1961 /*
1962 * Read-ahead to the right at this level.
1963 */
1964 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1965 /*
1966 * Get a pointer to the btree block.
1967 */
1968 bp = cur->bc_bufs[level];
1969 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1970#ifdef DEBUG
1971 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1972 return error;
1973#endif
1974 /*
1975 * Increment the ptr at this level. If we're still in the block
1976 * then we're done.
1977 */
1978 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1979 *stat = 1;
1980 return 0;
1981 }
1982 /*
1983 * If we just went off the right edge of the tree, return failure.
1984 */
1985 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1986 *stat = 0;
1987 return 0;
1988 }
1989 /*
1990 * March up the tree incrementing pointers.
1991 * Stop when we don't go off the right edge of a block.
1992 */
1993 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1994 bp = cur->bc_bufs[lev];
1995 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1996#ifdef DEBUG
1997 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1998 return error;
1999#endif
2000 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2001 break;
2002 /*
2003 * Read-ahead the right block, we're going to read it
2004 * in the next loop.
2005 */
2006 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2007 }
2008 /*
2009 * If we went off the root then we are seriously confused.
2010 */
2011 ASSERT(lev < cur->bc_nlevels);
2012 /*
2013 * Now walk back down the tree, fixing up the cursor's buffer
2014 * pointers and key numbers.
2015 */
2016 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2017 lev > level; ) {
2018 xfs_agblock_t agbno; /* block number of btree block */
2019 463
2020 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 464 ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
2021 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2022 cur->bc_private.a.agno, agbno, 0, &bp,
2023 XFS_ALLOC_BTREE_REF)))
2024 return error;
2025 lev--;
2026 xfs_btree_setbuf(cur, lev, bp);
2027 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2028 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2029 return error;
2030 cur->bc_ptrs[lev] = 1;
2031 }
2032 *stat = 1;
2033 return 0;
2034}
2035 465
2036/* 466 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
2037 * Insert the current record at the point referenced by cur.
2038 * The cursor may be inconsistent on return if splits have been done.
2039 */
2040int /* error */
2041xfs_alloc_insert(
2042 xfs_btree_cur_t *cur, /* btree cursor */
2043 int *stat) /* success/failure */
2044{
2045 int error; /* error return value */
2046 int i; /* result value, 0 for failure */
2047 int level; /* current level number in btree */
2048 xfs_agblock_t nbno; /* new block number (split result) */
2049 xfs_btree_cur_t *ncur; /* new cursor (split result) */
2050 xfs_alloc_rec_t nrec; /* record being inserted this level */
2051 xfs_btree_cur_t *pcur; /* previous level's cursor */
2052 467
2053 level = 0; 468 cur->bc_tp = tp;
2054 nbno = NULLAGBLOCK; 469 cur->bc_mp = mp;
2055 nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 470 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
2056 nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 471 cur->bc_btnum = btnum;
2057 ncur = NULL; 472 cur->bc_blocklog = mp->m_sb.sb_blocklog;
2058 pcur = cur;
2059 /*
2060 * Loop going up the tree, starting at the leaf level.
2061 * Stop when we don't get a split block, that must mean that
2062 * the insert is finished with this level.
2063 */
2064 do {
2065 /*
2066 * Insert nrec/nbno into this level of the tree.
2067 * Note if we fail, nbno will be null.
2068 */
2069 if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2070 &i))) {
2071 if (pcur != cur)
2072 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2073 return error;
2074 }
2075 /*
2076 * See if the cursor we just used is trash.
2077 * Can't trash the caller's cursor, but otherwise we should
2078 * if ncur is a new cursor or we're about to be done.
2079 */
2080 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2081 cur->bc_nlevels = pcur->bc_nlevels;
2082 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2083 }
2084 /*
2085 * If we got a new cursor, switch to it.
2086 */
2087 if (ncur) {
2088 pcur = ncur;
2089 ncur = NULL;
2090 }
2091 } while (nbno != NULLAGBLOCK);
2092 *stat = i;
2093 return 0;
2094}
2095 473
2096/* 474 cur->bc_ops = &xfs_allocbt_ops;
2097 * Lookup the record equal to [bno, len] in the btree given by cur. 475 if (btnum == XFS_BTNUM_CNT)
2098 */ 476 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
2099int /* error */
2100xfs_alloc_lookup_eq(
2101 xfs_btree_cur_t *cur, /* btree cursor */
2102 xfs_agblock_t bno, /* starting block of extent */
2103 xfs_extlen_t len, /* length of extent */
2104 int *stat) /* success/failure */
2105{
2106 cur->bc_rec.a.ar_startblock = bno;
2107 cur->bc_rec.a.ar_blockcount = len;
2108 return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2109}
2110 477
2111/* 478 cur->bc_private.a.agbp = agbp;
2112 * Lookup the first record greater than or equal to [bno, len] 479 cur->bc_private.a.agno = agno;
2113 * in the btree given by cur.
2114 */
2115int /* error */
2116xfs_alloc_lookup_ge(
2117 xfs_btree_cur_t *cur, /* btree cursor */
2118 xfs_agblock_t bno, /* starting block of extent */
2119 xfs_extlen_t len, /* length of extent */
2120 int *stat) /* success/failure */
2121{
2122 cur->bc_rec.a.ar_startblock = bno;
2123 cur->bc_rec.a.ar_blockcount = len;
2124 return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2125}
2126 480
2127/* 481 return cur;
2128 * Lookup the first record less than or equal to [bno, len]
2129 * in the btree given by cur.
2130 */
2131int /* error */
2132xfs_alloc_lookup_le(
2133 xfs_btree_cur_t *cur, /* btree cursor */
2134 xfs_agblock_t bno, /* starting block of extent */
2135 xfs_extlen_t len, /* length of extent */
2136 int *stat) /* success/failure */
2137{
2138 cur->bc_rec.a.ar_startblock = bno;
2139 cur->bc_rec.a.ar_blockcount = len;
2140 return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2141} 482}
2142 483
2143/* 484/*
2144 * Update the record referred to by cur, to the value given by [bno, len]. 485 * Calculate number of records in an alloc btree block.
2145 * This either works (return 0) or gets an EFSCORRUPTED error.
2146 */ 486 */
2147int /* error */ 487int
2148xfs_alloc_update( 488xfs_allocbt_maxrecs(
2149 xfs_btree_cur_t *cur, /* btree cursor */ 489 struct xfs_mount *mp,
2150 xfs_agblock_t bno, /* starting block of extent */ 490 int blocklen,
2151 xfs_extlen_t len) /* length of extent */ 491 int leaf)
2152{ 492{
2153 xfs_alloc_block_t *block; /* btree block to update */ 493 blocklen -= XFS_ALLOC_BLOCK_LEN(mp);
2154 int error; /* error return value */
2155 int ptr; /* current record number (updating) */
2156 494
2157 ASSERT(len > 0); 495 if (leaf)
2158 /* 496 return blocklen / sizeof(xfs_alloc_rec_t);
2159 * Pick up the a.g. freelist struct and the current block. 497 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
2160 */
2161 block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2162#ifdef DEBUG
2163 if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2164 return error;
2165#endif
2166 /*
2167 * Get the address of the rec to be updated.
2168 */
2169 ptr = cur->bc_ptrs[0];
2170 {
2171 xfs_alloc_rec_t *rp; /* pointer to updated record */
2172
2173 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2174 /*
2175 * Fill in the new contents and log them.
2176 */
2177 rp->ar_startblock = cpu_to_be32(bno);
2178 rp->ar_blockcount = cpu_to_be32(len);
2179 xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2180 }
2181 /*
2182 * If it's the by-size btree and it's the last leaf block and
2183 * it's the last record... then update the size of the longest
2184 * extent in the a.g., which we cache in the a.g. freelist header.
2185 */
2186 if (cur->bc_btnum == XFS_BTNUM_CNT &&
2187 be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2188 ptr == be16_to_cpu(block->bb_numrecs)) {
2189 xfs_agf_t *agf; /* a.g. freespace header */
2190 xfs_agnumber_t seqno;
2191
2192 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2193 seqno = be32_to_cpu(agf->agf_seqno);
2194 cur->bc_mp->m_perag[seqno].pagf_longest = len;
2195 agf->agf_longest = cpu_to_be32(len);
2196 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2197 XFS_AGF_LONGEST);
2198 }
2199 /*
2200 * Updating first record in leaf. Pass new key value up to our parent.
2201 */
2202 if (ptr == 1) {
2203 xfs_alloc_key_t key; /* key containing [bno, len] */
2204
2205 key.ar_startblock = cpu_to_be32(bno);
2206 key.ar_blockcount = cpu_to_be32(len);
2207 if ((error = xfs_alloc_updkey(cur, &key, 1)))
2208 return error;
2209 }
2210 return 0;
2211} 498}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index 5bd1a2c8bd07..a6caa0022c9b 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -50,16 +49,6 @@ typedef struct xfs_alloc_rec_incore {
50 49
51/* btree pointer type */ 50/* btree pointer type */
52typedef __be32 xfs_alloc_ptr_t; 51typedef __be32 xfs_alloc_ptr_t;
53/* btree block header type */
54typedef struct xfs_btree_sblock xfs_alloc_block_t;
55
56#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp))
57
58/*
59 * Real block structures have a size equal to the disk block size.
60 */
61#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
62#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
63 52
64/* 53/*
65 * Minimum and maximum blocksize and sectorsize. 54 * Minimum and maximum blocksize and sectorsize.
@@ -83,73 +72,39 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
83#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) 72#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1))
84 73
85/* 74/*
86 * Record, key, and pointer address macros for btree blocks. 75 * Btree block header size depends on a superblock flag.
87 */ 76 *
88#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ 77 * (not quite yet, but soon)
89 XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
90
91#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
92 XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
93
94#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
95 XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
96
97/*
98 * Decrement cursor by one record at the level.
99 * For nonzero levels the leaf-ward information is untouched.
100 */
101extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat);
102
103/*
104 * Delete the record pointed to by cur.
105 * The cursor refers to the place where the record was (could be inserted)
106 * when the operation returns.
107 */
108extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat);
109
110/*
111 * Get the data from the pointed-to record.
112 */
113extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
114 xfs_extlen_t *len, int *stat);
115
116/*
117 * Increment cursor by one record at the level.
118 * For nonzero levels the leaf-ward information is untouched.
119 */
120extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat);
121
122/*
123 * Insert the current record at the point referenced by cur.
124 * The cursor may be inconsistent on return if splits have been done.
125 */
126extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat);
127
128/*
129 * Lookup the record equal to [bno, len] in the btree given by cur.
130 */
131extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
132 xfs_extlen_t len, int *stat);
133
134/*
135 * Lookup the first record greater than or equal to [bno, len]
136 * in the btree given by cur.
137 */
138extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
139 xfs_extlen_t len, int *stat);
140
141/*
142 * Lookup the first record less than or equal to [bno, len]
143 * in the btree given by cur.
144 */ 78 */
145extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, 79#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
146 xfs_extlen_t len, int *stat);
147 80
148/* 81/*
149 * Update the record referred to by cur, to the value given by [bno, len]. 82 * Record, key, and pointer address macros for btree blocks.
150 * This either works (return 0) or gets an EFSCORRUPTED error. 83 *
151 */ 84 * (note that some of these may appear unused, but they are used in userspace)
152extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, 85 */
153 xfs_extlen_t len); 86#define XFS_ALLOC_REC_ADDR(mp, block, index) \
87 ((xfs_alloc_rec_t *) \
88 ((char *)(block) + \
89 XFS_ALLOC_BLOCK_LEN(mp) + \
90 (((index) - 1) * sizeof(xfs_alloc_rec_t))))
91
92#define XFS_ALLOC_KEY_ADDR(mp, block, index) \
93 ((xfs_alloc_key_t *) \
94 ((char *)(block) + \
95 XFS_ALLOC_BLOCK_LEN(mp) + \
96 ((index) - 1) * sizeof(xfs_alloc_key_t)))
97
98#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \
99 ((xfs_alloc_ptr_t *) \
100 ((char *)(block) + \
101 XFS_ALLOC_BLOCK_LEN(mp) + \
102 (maxrecs) * sizeof(xfs_alloc_key_t) + \
103 ((index) - 1) * sizeof(xfs_alloc_ptr_t)))
104
105extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,
106 struct xfs_trans *, struct xfs_buf *,
107 xfs_agnumber_t, xfs_btnum_t);
108extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
154 109
155#endif /* __XFS_ALLOC_BTREE_H__ */ 110#endif /* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 0b3b5efe848c..53d5e70d1360 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -41,21 +41,36 @@
41#endif 41#endif
42 42
43#ifdef XFS_NATIVE_HOST 43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__be16)(val)) 44#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
45#define cpu_to_be32(val) ((__be32)(val)) 45#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
46#define cpu_to_be64(val) ((__be64)(val)) 46#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
47#define be16_to_cpu(val) ((__uint16_t)(val)) 47#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
48#define be32_to_cpu(val) ((__uint32_t)(val)) 48#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
49#define be64_to_cpu(val) ((__uint64_t)(val)) 49#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
50#else 50#else
51#define cpu_to_be16(val) (__swab16((__uint16_t)(val))) 51#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
52#define cpu_to_be32(val) (__swab32((__uint32_t)(val))) 52#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
53#define cpu_to_be64(val) (__swab64((__uint64_t)(val))) 53#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
54#define be16_to_cpu(val) (__swab16((__be16)(val))) 54#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__be32)(val))) 55#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__be64)(val))) 56#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
57#endif 57#endif
58 58
59static inline void be16_add_cpu(__be16 *a, __s16 b)
60{
61 *a = cpu_to_be16(be16_to_cpu(*a) + b);
62}
63
64static inline void be32_add_cpu(__be32 *a, __s32 b)
65{
66 *a = cpu_to_be32(be32_to_cpu(*a) + b);
67}
68
69static inline void be64_add_cpu(__be64 *a, __s64 b)
70{
71 *a = cpu_to_be64(be64_to_cpu(*a) + b);
72}
73
59#endif /* __KERNEL__ */ 74#endif /* __KERNEL__ */
60 75
61/* do we need conversion? */ 76/* do we need conversion? */
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h
index 8e0e463dae2d..bca7b243c319 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/xfs_bit.h
@@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v)
61/* Get low bit set out of 32-bit argument, -1 if none set */ 61/* Get low bit set out of 32-bit argument, -1 if none set */
62static inline int xfs_lowbit32(__uint32_t v) 62static inline int xfs_lowbit32(__uint32_t v)
63{ 63{
64 unsigned long t = v; 64 return ffs(v) - 1;
65 return (v) ? find_first_bit(&t, 32) : -1;
66} 65}
67 66
68/* Get low bit set out of 64-bit argument, -1 if none set */ 67/* Get low bit set out of 64-bit argument, -1 if none set */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a1aab9275d5a..138308e70d14 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -393,8 +393,8 @@ xfs_bmap_count_leaves(
393 393
394STATIC void 394STATIC void
395xfs_bmap_disk_count_leaves( 395xfs_bmap_disk_count_leaves(
396 xfs_extnum_t idx, 396 struct xfs_mount *mp,
397 xfs_bmbt_block_t *block, 397 struct xfs_btree_block *block,
398 int numrecs, 398 int numrecs,
399 int *count); 399 int *count);
400 400
@@ -402,6 +402,53 @@ xfs_bmap_disk_count_leaves(
402 * Bmap internal routines. 402 * Bmap internal routines.
403 */ 403 */
404 404
405STATIC int /* error */
406xfs_bmbt_lookup_eq(
407 struct xfs_btree_cur *cur,
408 xfs_fileoff_t off,
409 xfs_fsblock_t bno,
410 xfs_filblks_t len,
411 int *stat) /* success/failure */
412{
413 cur->bc_rec.b.br_startoff = off;
414 cur->bc_rec.b.br_startblock = bno;
415 cur->bc_rec.b.br_blockcount = len;
416 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
417}
418
419STATIC int /* error */
420xfs_bmbt_lookup_ge(
421 struct xfs_btree_cur *cur,
422 xfs_fileoff_t off,
423 xfs_fsblock_t bno,
424 xfs_filblks_t len,
425 int *stat) /* success/failure */
426{
427 cur->bc_rec.b.br_startoff = off;
428 cur->bc_rec.b.br_startblock = bno;
429 cur->bc_rec.b.br_blockcount = len;
430 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
431}
432
433/*
434* Update the record referred to by cur to the value given
435 * by [off, bno, len, state].
436 * This either works (return 0) or gets an EFSCORRUPTED error.
437 */
438STATIC int
439xfs_bmbt_update(
440 struct xfs_btree_cur *cur,
441 xfs_fileoff_t off,
442 xfs_fsblock_t bno,
443 xfs_filblks_t len,
444 xfs_exntst_t state)
445{
446 union xfs_btree_rec rec;
447
448 xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
449 return xfs_btree_update(cur, &rec);
450}
451
405/* 452/*
406 * Called from xfs_bmap_add_attrfork to handle btree format files. 453 * Called from xfs_bmap_add_attrfork to handle btree format files.
407 */ 454 */
@@ -422,15 +469,14 @@ xfs_bmap_add_attrfork_btree(
422 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) 469 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
423 *flags |= XFS_ILOG_DBROOT; 470 *flags |= XFS_ILOG_DBROOT;
424 else { 471 else {
425 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 472 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
426 XFS_DATA_FORK);
427 cur->bc_private.b.flist = flist; 473 cur->bc_private.b.flist = flist;
428 cur->bc_private.b.firstblock = *firstblock; 474 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 475 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 476 goto error0;
431 /* must be at least one entry */ 477 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); 478 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
433 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 479 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
434 goto error0; 480 goto error0;
435 if (stat == 0) { 481 if (stat == 0) {
436 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 482 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
@@ -818,10 +864,10 @@ xfs_bmap_add_extent_delay_real(
818 RIGHT.br_blockcount, &i))) 864 RIGHT.br_blockcount, &i)))
819 goto done; 865 goto done;
820 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 866 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
821 if ((error = xfs_bmbt_delete(cur, &i))) 867 if ((error = xfs_btree_delete(cur, &i)))
822 goto done; 868 goto done;
823 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 869 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
824 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 870 if ((error = xfs_btree_decrement(cur, 0, &i)))
825 goto done; 871 goto done;
826 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 872 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 873 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -931,7 +977,7 @@ xfs_bmap_add_extent_delay_real(
931 goto done; 977 goto done;
932 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 978 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
933 cur->bc_rec.b.br_state = XFS_EXT_NORM; 979 cur->bc_rec.b.br_state = XFS_EXT_NORM;
934 if ((error = xfs_bmbt_insert(cur, &i))) 980 if ((error = xfs_btree_insert(cur, &i)))
935 goto done; 981 goto done;
936 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 982 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
937 } 983 }
@@ -1007,7 +1053,7 @@ xfs_bmap_add_extent_delay_real(
1007 goto done; 1053 goto done;
1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1054 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1009 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1055 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1010 if ((error = xfs_bmbt_insert(cur, &i))) 1056 if ((error = xfs_btree_insert(cur, &i)))
1011 goto done; 1057 goto done;
1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1013 } 1059 }
@@ -1097,7 +1143,7 @@ xfs_bmap_add_extent_delay_real(
1097 goto done; 1143 goto done;
1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1144 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1099 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1145 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1100 if ((error = xfs_bmbt_insert(cur, &i))) 1146 if ((error = xfs_btree_insert(cur, &i)))
1101 goto done; 1147 goto done;
1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1148 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1103 } 1149 }
@@ -1152,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
1152 goto done; 1198 goto done;
1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1199 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1154 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1200 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1155 if ((error = xfs_bmbt_insert(cur, &i))) 1201 if ((error = xfs_btree_insert(cur, &i)))
1156 goto done; 1202 goto done;
1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1203 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1158 } 1204 }
@@ -1379,16 +1425,16 @@ xfs_bmap_add_extent_unwritten_real(
1379 RIGHT.br_blockcount, &i))) 1425 RIGHT.br_blockcount, &i)))
1380 goto done; 1426 goto done;
1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1427 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1382 if ((error = xfs_bmbt_delete(cur, &i))) 1428 if ((error = xfs_btree_delete(cur, &i)))
1383 goto done; 1429 goto done;
1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1385 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1431 if ((error = xfs_btree_decrement(cur, 0, &i)))
1386 goto done; 1432 goto done;
1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1388 if ((error = xfs_bmbt_delete(cur, &i))) 1434 if ((error = xfs_btree_delete(cur, &i)))
1389 goto done; 1435 goto done;
1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1391 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1437 if ((error = xfs_btree_decrement(cur, 0, &i)))
1392 goto done; 1438 goto done;
1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1439 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1440 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1428,10 +1474,10 @@ xfs_bmap_add_extent_unwritten_real(
1428 &i))) 1474 &i)))
1429 goto done; 1475 goto done;
1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1431 if ((error = xfs_bmbt_delete(cur, &i))) 1477 if ((error = xfs_btree_delete(cur, &i)))
1432 goto done; 1478 goto done;
1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1434 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1480 if ((error = xfs_btree_decrement(cur, 0, &i)))
1435 goto done; 1481 goto done;
1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1482 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1483 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
@@ -1471,10 +1517,10 @@ xfs_bmap_add_extent_unwritten_real(
1471 RIGHT.br_blockcount, &i))) 1517 RIGHT.br_blockcount, &i)))
1472 goto done; 1518 goto done;
1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1519 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1474 if ((error = xfs_bmbt_delete(cur, &i))) 1520 if ((error = xfs_btree_delete(cur, &i)))
1475 goto done; 1521 goto done;
1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1522 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1477 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1523 if ((error = xfs_btree_decrement(cur, 0, &i)))
1478 goto done; 1524 goto done;
1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1525 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1480 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1526 if ((error = xfs_bmbt_update(cur, new->br_startoff,
@@ -1557,7 +1603,7 @@ xfs_bmap_add_extent_unwritten_real(
1557 PREV.br_blockcount - new->br_blockcount, 1603 PREV.br_blockcount - new->br_blockcount,
1558 oldext))) 1604 oldext)))
1559 goto done; 1605 goto done;
1560 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1606 if ((error = xfs_btree_decrement(cur, 0, &i)))
1561 goto done; 1607 goto done;
1562 if (xfs_bmbt_update(cur, LEFT.br_startoff, 1608 if (xfs_bmbt_update(cur, LEFT.br_startoff,
1563 LEFT.br_startblock, 1609 LEFT.br_startblock,
@@ -1605,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real(
1605 oldext))) 1651 oldext)))
1606 goto done; 1652 goto done;
1607 cur->bc_rec.b = *new; 1653 cur->bc_rec.b = *new;
1608 if ((error = xfs_bmbt_insert(cur, &i))) 1654 if ((error = xfs_btree_insert(cur, &i)))
1609 goto done; 1655 goto done;
1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1656 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1611 } 1657 }
@@ -1647,7 +1693,7 @@ xfs_bmap_add_extent_unwritten_real(
1647 PREV.br_blockcount - new->br_blockcount, 1693 PREV.br_blockcount - new->br_blockcount,
1648 oldext))) 1694 oldext)))
1649 goto done; 1695 goto done;
1650 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1696 if ((error = xfs_btree_increment(cur, 0, &i)))
1651 goto done; 1697 goto done;
1652 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1698 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1653 new->br_startblock, 1699 new->br_startblock,
@@ -1695,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real(
1695 goto done; 1741 goto done;
1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1742 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1697 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1743 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1698 if ((error = xfs_bmbt_insert(cur, &i))) 1744 if ((error = xfs_btree_insert(cur, &i)))
1699 goto done; 1745 goto done;
1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1746 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1701 } 1747 }
@@ -1743,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real(
1743 cur->bc_rec.b = PREV; 1789 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount = 1790 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff; 1791 new->br_startoff - PREV.br_startoff;
1746 if ((error = xfs_bmbt_insert(cur, &i))) 1792 if ((error = xfs_btree_insert(cur, &i)))
1747 goto done; 1793 goto done;
1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1794 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1749 /* 1795 /*
@@ -1758,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real(
1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1804 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1759 /* new middle extent - newext */ 1805 /* new middle extent - newext */
1760 cur->bc_rec.b.br_state = new->br_state; 1806 cur->bc_rec.b.br_state = new->br_state;
1761 if ((error = xfs_bmbt_insert(cur, &i))) 1807 if ((error = xfs_btree_insert(cur, &i)))
1762 goto done; 1808 goto done;
1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1809 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1764 } 1810 }
@@ -2106,10 +2152,10 @@ xfs_bmap_add_extent_hole_real(
2106 right.br_blockcount, &i))) 2152 right.br_blockcount, &i)))
2107 goto done; 2153 goto done;
2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2154 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2109 if ((error = xfs_bmbt_delete(cur, &i))) 2155 if ((error = xfs_btree_delete(cur, &i)))
2110 goto done; 2156 goto done;
2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2112 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2158 if ((error = xfs_btree_decrement(cur, 0, &i)))
2113 goto done; 2159 goto done;
2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2160 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2115 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2161 if ((error = xfs_bmbt_update(cur, left.br_startoff,
@@ -2218,7 +2264,7 @@ xfs_bmap_add_extent_hole_real(
2218 goto done; 2264 goto done;
2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2265 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2220 cur->bc_rec.b.br_state = new->br_state; 2266 cur->bc_rec.b.br_state = new->br_state;
2221 if ((error = xfs_bmbt_insert(cur, &i))) 2267 if ((error = xfs_btree_insert(cur, &i)))
2222 goto done; 2268 goto done;
2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2269 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2224 } 2270 }
@@ -2996,24 +3042,24 @@ xfs_bmap_btree_to_extents(
2996 int whichfork) /* data or attr fork */ 3042 int whichfork) /* data or attr fork */
2997{ 3043{
2998 /* REFERENCED */ 3044 /* REFERENCED */
2999 xfs_bmbt_block_t *cblock;/* child btree block */ 3045 struct xfs_btree_block *cblock;/* child btree block */
3000 xfs_fsblock_t cbno; /* child block number */ 3046 xfs_fsblock_t cbno; /* child block number */
3001 xfs_buf_t *cbp; /* child block's buffer */ 3047 xfs_buf_t *cbp; /* child block's buffer */
3002 int error; /* error return value */ 3048 int error; /* error return value */
3003 xfs_ifork_t *ifp; /* inode fork data */ 3049 xfs_ifork_t *ifp; /* inode fork data */
3004 xfs_mount_t *mp; /* mount point structure */ 3050 xfs_mount_t *mp; /* mount point structure */
3005 __be64 *pp; /* ptr to block address */ 3051 __be64 *pp; /* ptr to block address */
3006 xfs_bmbt_block_t *rblock;/* root btree block */ 3052 struct xfs_btree_block *rblock;/* root btree block */
3007 3053
3054 mp = ip->i_mount;
3008 ifp = XFS_IFORK_PTR(ip, whichfork); 3055 ifp = XFS_IFORK_PTR(ip, whichfork);
3009 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 3056 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3010 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 3057 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
3011 rblock = ifp->if_broot; 3058 rblock = ifp->if_broot;
3012 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 3059 ASSERT(be16_to_cpu(rblock->bb_level) == 1);
3013 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 3060 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
3014 ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); 3061 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
3015 mp = ip->i_mount; 3062 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
3016 pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
3017 cbno = be64_to_cpu(*pp); 3063 cbno = be64_to_cpu(*pp);
3018 *logflagsp = 0; 3064 *logflagsp = 0;
3019#ifdef DEBUG 3065#ifdef DEBUG
@@ -3023,8 +3069,8 @@ xfs_bmap_btree_to_extents(
3023 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, 3069 if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
3024 XFS_BMAP_BTREE_REF))) 3070 XFS_BMAP_BTREE_REF)))
3025 return error; 3071 return error;
3026 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); 3072 cblock = XFS_BUF_TO_BLOCK(cbp);
3027 if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp))) 3073 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
3028 return error; 3074 return error;
3029 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); 3075 xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
3030 ip->i_d.di_nblocks--; 3076 ip->i_d.di_nblocks--;
@@ -3170,7 +3216,7 @@ xfs_bmap_del_extent(
3170 flags |= XFS_ILOG_FEXT(whichfork); 3216 flags |= XFS_ILOG_FEXT(whichfork);
3171 break; 3217 break;
3172 } 3218 }
3173 if ((error = xfs_bmbt_delete(cur, &i))) 3219 if ((error = xfs_btree_delete(cur, &i)))
3174 goto done; 3220 goto done;
3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3221 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3176 break; 3222 break;
@@ -3254,10 +3300,10 @@ xfs_bmap_del_extent(
3254 got.br_startblock, temp, 3300 got.br_startblock, temp,
3255 got.br_state))) 3301 got.br_state)))
3256 goto done; 3302 goto done;
3257 if ((error = xfs_bmbt_increment(cur, 0, &i))) 3303 if ((error = xfs_btree_increment(cur, 0, &i)))
3258 goto done; 3304 goto done;
3259 cur->bc_rec.b = new; 3305 cur->bc_rec.b = new;
3260 error = xfs_bmbt_insert(cur, &i); 3306 error = xfs_btree_insert(cur, &i);
3261 if (error && error != ENOSPC) 3307 if (error && error != ENOSPC)
3262 goto done; 3308 goto done;
3263 /* 3309 /*
@@ -3404,11 +3450,11 @@ xfs_bmap_extents_to_btree(
3404 int *logflagsp, /* inode logging flags */ 3450 int *logflagsp, /* inode logging flags */
3405 int whichfork) /* data or attr fork */ 3451 int whichfork) /* data or attr fork */
3406{ 3452{
3407 xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ 3453 struct xfs_btree_block *ablock; /* allocated (child) bt block */
3408 xfs_buf_t *abp; /* buffer for ablock */ 3454 xfs_buf_t *abp; /* buffer for ablock */
3409 xfs_alloc_arg_t args; /* allocation arguments */ 3455 xfs_alloc_arg_t args; /* allocation arguments */
3410 xfs_bmbt_rec_t *arp; /* child record pointer */ 3456 xfs_bmbt_rec_t *arp; /* child record pointer */
3411 xfs_bmbt_block_t *block; /* btree root block */ 3457 struct xfs_btree_block *block; /* btree root block */
3412 xfs_btree_cur_t *cur; /* bmap btree cursor */ 3458 xfs_btree_cur_t *cur; /* bmap btree cursor */
3413 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 3459 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
3414 int error; /* error return value */ 3460 int error; /* error return value */
@@ -3428,6 +3474,7 @@ xfs_bmap_extents_to_btree(
3428 */ 3474 */
3429 xfs_iroot_realloc(ip, 1, whichfork); 3475 xfs_iroot_realloc(ip, 1, whichfork);
3430 ifp->if_flags |= XFS_IFBROOT; 3476 ifp->if_flags |= XFS_IFBROOT;
3477
3431 /* 3478 /*
3432 * Fill in the root. 3479 * Fill in the root.
3433 */ 3480 */
@@ -3435,14 +3482,14 @@ xfs_bmap_extents_to_btree(
3435 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3482 block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3436 block->bb_level = cpu_to_be16(1); 3483 block->bb_level = cpu_to_be16(1);
3437 block->bb_numrecs = cpu_to_be16(1); 3484 block->bb_numrecs = cpu_to_be16(1);
3438 block->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3485 block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3439 block->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3486 block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3487
3440 /* 3488 /*
3441 * Need a cursor. Can't allocate until bb_level is filled in. 3489 * Need a cursor. Can't allocate until bb_level is filled in.
3442 */ 3490 */
3443 mp = ip->i_mount; 3491 mp = ip->i_mount;
3444 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 3492 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
3445 whichfork);
3446 cur->bc_private.b.firstblock = *firstblock; 3493 cur->bc_private.b.firstblock = *firstblock;
3447 cur->bc_private.b.flist = flist; 3494 cur->bc_private.b.flist = flist;
3448 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 3495 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
@@ -3489,12 +3536,12 @@ xfs_bmap_extents_to_btree(
3489 /* 3536 /*
3490 * Fill in the child block. 3537 * Fill in the child block.
3491 */ 3538 */
3492 ablock = XFS_BUF_TO_BMBT_BLOCK(abp); 3539 ablock = XFS_BUF_TO_BLOCK(abp);
3493 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); 3540 ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
3494 ablock->bb_level = 0; 3541 ablock->bb_level = 0;
3495 ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 3542 ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
3496 ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 3543 ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
3497 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3544 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3498 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 3545 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3499 for (cnt = i = 0; i < nextents; i++) { 3546 for (cnt = i = 0; i < nextents; i++) {
3500 ep = xfs_iext_get_ext(ifp, i); 3547 ep = xfs_iext_get_ext(ifp, i);
@@ -3505,21 +3552,24 @@ xfs_bmap_extents_to_btree(
3505 } 3552 }
3506 } 3553 }
3507 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 3554 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
3508 ablock->bb_numrecs = cpu_to_be16(cnt); 3555 xfs_btree_set_numrecs(ablock, cnt);
3556
3509 /* 3557 /*
3510 * Fill in the root key and pointer. 3558 * Fill in the root key and pointer.
3511 */ 3559 */
3512 kp = XFS_BMAP_KEY_IADDR(block, 1, cur); 3560 kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
3513 arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); 3561 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
3514 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 3562 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
3515 pp = XFS_BMAP_PTR_IADDR(block, 1, cur); 3563 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
3564 be16_to_cpu(block->bb_level)));
3516 *pp = cpu_to_be64(args.fsbno); 3565 *pp = cpu_to_be64(args.fsbno);
3566
3517 /* 3567 /*
3518 * Do all this logging at the end so that 3568 * Do all this logging at the end so that
3519 * the root is at the right level. 3569 * the root is at the right level.
3520 */ 3570 */
3521 xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); 3571 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
3522 xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 3572 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
3523 ASSERT(*curp == NULL); 3573 ASSERT(*curp == NULL);
3524 *curp = cur; 3574 *curp = cur;
3525 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); 3575 *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
@@ -4176,7 +4226,7 @@ xfs_bmap_compute_maxlevels(
4176 maxleafents = MAXAEXTNUM; 4226 maxleafents = MAXAEXTNUM;
4177 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 4227 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
4178 } 4228 }
4179 maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); 4229 maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0);
4180 minleafrecs = mp->m_bmap_dmnr[0]; 4230 minleafrecs = mp->m_bmap_dmnr[0];
4181 minnoderecs = mp->m_bmap_dmnr[1]; 4231 minnoderecs = mp->m_bmap_dmnr[1];
4182 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 4232 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
@@ -4242,9 +4292,15 @@ xfs_bmap_finish(
4242 * We have a new transaction, so we should return committed=1, 4292 * We have a new transaction, so we should return committed=1,
4243 * even though we're returning an error. 4293 * even though we're returning an error.
4244 */ 4294 */
4245 if (error) { 4295 if (error)
4246 return error; 4296 return error;
4247 } 4297
4298 /*
4299 * transaction commit worked ok so we can drop the extra ticket
4300 * reference that we gained in xfs_trans_dup()
4301 */
4302 xfs_log_ticket_put(ntp->t_ticket);
4303
4248 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, 4304 if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
4249 logcount))) 4305 logcount)))
4250 return error; 4306 return error;
@@ -4474,6 +4530,22 @@ xfs_bmap_one_block(
4474 return rval; 4530 return rval;
4475} 4531}
4476 4532
4533STATIC int
4534xfs_bmap_sanity_check(
4535 struct xfs_mount *mp,
4536 struct xfs_buf *bp,
4537 int level)
4538{
4539 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4540
4541 if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
4542 be16_to_cpu(block->bb_level) != level ||
4543 be16_to_cpu(block->bb_numrecs) == 0 ||
4544 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
4545 return 0;
4546 return 1;
4547}
4548
4477/* 4549/*
4478 * Read in the extents to if_extents. 4550 * Read in the extents to if_extents.
4479 * All inode fields are set up by caller, we just traverse the btree 4551 * All inode fields are set up by caller, we just traverse the btree
@@ -4486,7 +4558,7 @@ xfs_bmap_read_extents(
4486 xfs_inode_t *ip, /* incore inode */ 4558 xfs_inode_t *ip, /* incore inode */
4487 int whichfork) /* data or attr fork */ 4559 int whichfork) /* data or attr fork */
4488{ 4560{
4489 xfs_bmbt_block_t *block; /* current btree block */ 4561 struct xfs_btree_block *block; /* current btree block */
4490 xfs_fsblock_t bno; /* block # of "block" */ 4562 xfs_fsblock_t bno; /* block # of "block" */
4491 xfs_buf_t *bp; /* buffer for "block" */ 4563 xfs_buf_t *bp; /* buffer for "block" */
4492 int error; /* error return value */ 4564 int error; /* error return value */
@@ -4510,7 +4582,7 @@ xfs_bmap_read_extents(
4510 */ 4582 */
4511 level = be16_to_cpu(block->bb_level); 4583 level = be16_to_cpu(block->bb_level);
4512 ASSERT(level > 0); 4584 ASSERT(level > 0);
4513 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 4585 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
4514 bno = be64_to_cpu(*pp); 4586 bno = be64_to_cpu(*pp);
4515 ASSERT(bno != NULLDFSBNO); 4587 ASSERT(bno != NULLDFSBNO);
4516 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 4588 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -4523,13 +4595,13 @@ xfs_bmap_read_extents(
4523 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4595 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4524 XFS_BMAP_BTREE_REF))) 4596 XFS_BMAP_BTREE_REF)))
4525 return error; 4597 return error;
4526 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4598 block = XFS_BUF_TO_BLOCK(bp);
4527 XFS_WANT_CORRUPTED_GOTO( 4599 XFS_WANT_CORRUPTED_GOTO(
4528 XFS_BMAP_SANITY_CHECK(mp, block, level), 4600 xfs_bmap_sanity_check(mp, bp, level),
4529 error0); 4601 error0);
4530 if (level == 0) 4602 if (level == 0)
4531 break; 4603 break;
4532 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 4604 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
4533 bno = be64_to_cpu(*pp); 4605 bno = be64_to_cpu(*pp);
4534 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 4606 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
4535 xfs_trans_brelse(tp, bp); 4607 xfs_trans_brelse(tp, bp);
@@ -4549,7 +4621,7 @@ xfs_bmap_read_extents(
4549 xfs_extnum_t start; 4621 xfs_extnum_t start;
4550 4622
4551 4623
4552 num_recs = be16_to_cpu(block->bb_numrecs); 4624 num_recs = xfs_btree_get_numrecs(block);
4553 if (unlikely(i + num_recs > room)) { 4625 if (unlikely(i + num_recs > room)) {
4554 ASSERT(i + num_recs <= room); 4626 ASSERT(i + num_recs <= room);
4555 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4627 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
@@ -4561,18 +4633,18 @@ xfs_bmap_read_extents(
4561 goto error0; 4633 goto error0;
4562 } 4634 }
4563 XFS_WANT_CORRUPTED_GOTO( 4635 XFS_WANT_CORRUPTED_GOTO(
4564 XFS_BMAP_SANITY_CHECK(mp, block, 0), 4636 xfs_bmap_sanity_check(mp, bp, 0),
4565 error0); 4637 error0);
4566 /* 4638 /*
4567 * Read-ahead the next leaf block, if any. 4639 * Read-ahead the next leaf block, if any.
4568 */ 4640 */
4569 nextbno = be64_to_cpu(block->bb_rightsib); 4641 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
4570 if (nextbno != NULLFSBLOCK) 4642 if (nextbno != NULLFSBLOCK)
4571 xfs_btree_reada_bufl(mp, nextbno, 1); 4643 xfs_btree_reada_bufl(mp, nextbno, 1);
4572 /* 4644 /*
4573 * Copy records into the extent records. 4645 * Copy records into the extent records.
4574 */ 4646 */
4575 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 4647 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
4576 start = i; 4648 start = i;
4577 for (j = 0; j < num_recs; j++, i++, frp++) { 4649 for (j = 0; j < num_recs; j++, i++, frp++) {
4578 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 4650 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
@@ -4603,7 +4675,7 @@ xfs_bmap_read_extents(
4603 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 4675 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4604 XFS_BMAP_BTREE_REF))) 4676 XFS_BMAP_BTREE_REF)))
4605 return error; 4677 return error;
4606 block = XFS_BUF_TO_BMBT_BLOCK(bp); 4678 block = XFS_BUF_TO_BLOCK(bp);
4607 } 4679 }
4608 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); 4680 ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
4609 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); 4681 ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
@@ -5029,8 +5101,7 @@ xfs_bmapi(
5029 if (abno == NULLFSBLOCK) 5101 if (abno == NULLFSBLOCK)
5030 break; 5102 break;
5031 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5103 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5032 cur = xfs_btree_init_cursor(mp, 5104 cur = xfs_bmbt_init_cursor(mp, tp,
5033 tp, NULL, 0, XFS_BTNUM_BMAP,
5034 ip, whichfork); 5105 ip, whichfork);
5035 cur->bc_private.b.firstblock = 5106 cur->bc_private.b.firstblock =
5036 *firstblock; 5107 *firstblock;
@@ -5147,9 +5218,8 @@ xfs_bmapi(
5147 */ 5218 */
5148 ASSERT(mval->br_blockcount <= len); 5219 ASSERT(mval->br_blockcount <= len);
5149 if ((ifp->if_flags & XFS_IFBROOT) && !cur) { 5220 if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
5150 cur = xfs_btree_init_cursor(mp, 5221 cur = xfs_bmbt_init_cursor(mp,
5151 tp, NULL, 0, XFS_BTNUM_BMAP, 5222 tp, ip, whichfork);
5152 ip, whichfork);
5153 cur->bc_private.b.firstblock = 5223 cur->bc_private.b.firstblock =
5154 *firstblock; 5224 *firstblock;
5155 cur->bc_private.b.flist = flist; 5225 cur->bc_private.b.flist = flist;
@@ -5440,8 +5510,7 @@ xfs_bunmapi(
5440 logflags = 0; 5510 logflags = 0;
5441 if (ifp->if_flags & XFS_IFBROOT) { 5511 if (ifp->if_flags & XFS_IFBROOT) {
5442 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5512 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5443 cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, 5513 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5444 whichfork);
5445 cur->bc_private.b.firstblock = *firstblock; 5514 cur->bc_private.b.firstblock = *firstblock;
5446 cur->bc_private.b.flist = flist; 5515 cur->bc_private.b.flist = flist;
5447 cur->bc_private.b.flags = 0; 5516 cur->bc_private.b.flags = 0;
@@ -5742,14 +5811,17 @@ error0:
5742STATIC int 5811STATIC int
5743xfs_getbmapx_fix_eof_hole( 5812xfs_getbmapx_fix_eof_hole(
5744 xfs_inode_t *ip, /* xfs incore inode pointer */ 5813 xfs_inode_t *ip, /* xfs incore inode pointer */
5745 struct getbmap *out, /* output structure */ 5814 struct getbmapx *out, /* output structure */
5746 int prealloced, /* this is a file with 5815 int prealloced, /* this is a file with
5747 * preallocated data space */ 5816 * preallocated data space */
5748 __int64_t end, /* last block requested */ 5817 __int64_t end, /* last block requested */
5749 xfs_fsblock_t startblock) 5818 xfs_fsblock_t startblock)
5750{ 5819{
5751 __int64_t fixlen; 5820 __int64_t fixlen;
5752 xfs_mount_t *mp; /* file system mount point */ 5821 xfs_mount_t *mp; /* file system mount point */
5822 xfs_ifork_t *ifp; /* inode fork pointer */
5823 xfs_extnum_t lastx; /* last extent pointer */
5824 xfs_fileoff_t fileblock;
5753 5825
5754 if (startblock == HOLESTARTBLOCK) { 5826 if (startblock == HOLESTARTBLOCK) {
5755 mp = ip->i_mount; 5827 mp = ip->i_mount;
@@ -5763,21 +5835,33 @@ xfs_getbmapx_fix_eof_hole(
5763 out->bmv_length = fixlen; 5835 out->bmv_length = fixlen;
5764 } 5836 }
5765 } else { 5837 } else {
5766 out->bmv_block = XFS_FSB_TO_DB(ip, startblock); 5838 if (startblock == DELAYSTARTBLOCK)
5839 out->bmv_block = -2;
5840 else
5841 out->bmv_block = XFS_FSB_TO_DB(ip, startblock);
5842 fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
5843 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
5844 if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
5845 (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
5846 out->bmv_oflags |= BMV_OF_LAST;
5767 } 5847 }
5768 5848
5769 return 1; 5849 return 1;
5770} 5850}
5771 5851
5772/* 5852/*
5773 * Fcntl interface to xfs_bmapi. 5853 * Get inode's extents as described in bmv, and format for output.
5854 * Calls formatter to fill the user's buffer until all extents
5855 * are mapped, until the passed-in bmv->bmv_count slots have
5856 * been filled, or until the formatter short-circuits the loop,
5857 * if it is tracking filled-in extents on its own.
5774 */ 5858 */
5775int /* error code */ 5859int /* error code */
5776xfs_getbmap( 5860xfs_getbmap(
5777 xfs_inode_t *ip, 5861 xfs_inode_t *ip,
5778 struct getbmap *bmv, /* user bmap structure */ 5862 struct getbmapx *bmv, /* user bmap structure */
5779 void __user *ap, /* pointer to user's array */ 5863 xfs_bmap_format_t formatter, /* format to user */
5780 int interface) /* interface flags */ 5864 void *arg) /* formatter arg */
5781{ 5865{
5782 __int64_t bmvend; /* last block requested */ 5866 __int64_t bmvend; /* last block requested */
5783 int error; /* return value */ 5867 int error; /* return value */
@@ -5790,19 +5874,17 @@ xfs_getbmap(
5790 int nexleft; /* # of user extents left */ 5874 int nexleft; /* # of user extents left */
5791 int subnex; /* # of bmapi's can do */ 5875 int subnex; /* # of bmapi's can do */
5792 int nmap; /* number of map entries */ 5876 int nmap; /* number of map entries */
5793 struct getbmap out; /* output structure */ 5877 struct getbmapx out; /* output structure */
5794 int whichfork; /* data or attr fork */ 5878 int whichfork; /* data or attr fork */
5795 int prealloced; /* this is a file with 5879 int prealloced; /* this is a file with
5796 * preallocated data space */ 5880 * preallocated data space */
5797 int sh_unwritten; /* true, if unwritten */ 5881 int iflags; /* interface flags */
5798 /* extents listed separately */
5799 int bmapi_flags; /* flags for xfs_bmapi */ 5882 int bmapi_flags; /* flags for xfs_bmapi */
5800 __int32_t oflags; /* getbmapx bmv_oflags field */
5801 5883
5802 mp = ip->i_mount; 5884 mp = ip->i_mount;
5885 iflags = bmv->bmv_iflags;
5803 5886
5804 whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 5887 whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
5805 sh_unwritten = (interface & BMV_IF_PREALLOC) != 0;
5806 5888
5807 /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not 5889 /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
5808 * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ 5890 * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ
@@ -5817,7 +5899,7 @@ xfs_getbmap(
5817 * could misinterpret holes in a DMAPI file as true holes, 5899 * could misinterpret holes in a DMAPI file as true holes,
5818 * when in fact they may represent offline user data. 5900 * when in fact they may represent offline user data.
5819 */ 5901 */
5820 if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && 5902 if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
5821 DM_EVENT_ENABLED(ip, DM_EVENT_READ) && 5903 DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5822 whichfork == XFS_DATA_FORK) { 5904 whichfork == XFS_DATA_FORK) {
5823 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); 5905 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
@@ -5873,8 +5955,9 @@ xfs_getbmap(
5873 5955
5874 xfs_ilock(ip, XFS_IOLOCK_SHARED); 5956 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5875 5957
5876 if (whichfork == XFS_DATA_FORK && 5958 if (((iflags & BMV_IF_DELALLOC) == 0) &&
5877 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { 5959 (whichfork == XFS_DATA_FORK) &&
5960 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) {
5878 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5961 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5879 error = xfs_flush_pages(ip, (xfs_off_t)0, 5962 error = xfs_flush_pages(ip, (xfs_off_t)0,
5880 -1, 0, FI_REMAPF); 5963 -1, 0, FI_REMAPF);
@@ -5884,7 +5967,8 @@ xfs_getbmap(
5884 } 5967 }
5885 } 5968 }
5886 5969
5887 ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); 5970 ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) ||
5971 ip->i_delayed_blks == 0);
5888 5972
5889 lock = xfs_ilock_map_shared(ip); 5973 lock = xfs_ilock_map_shared(ip);
5890 5974
@@ -5896,7 +5980,7 @@ xfs_getbmap(
5896 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 5980 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
5897 5981
5898 bmapi_flags = XFS_BMAPI_AFLAG(whichfork) | 5982 bmapi_flags = XFS_BMAPI_AFLAG(whichfork) |
5899 ((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE); 5983 ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE);
5900 5984
5901 /* 5985 /*
5902 * Allocate enough space to handle "subnex" maps at a time. 5986 * Allocate enough space to handle "subnex" maps at a time.
@@ -5906,9 +5990,12 @@ xfs_getbmap(
5906 5990
5907 bmv->bmv_entries = 0; 5991 bmv->bmv_entries = 0;
5908 5992
5909 if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) { 5993 if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) {
5910 error = 0; 5994 if (((iflags & BMV_IF_DELALLOC) == 0) ||
5911 goto unlock_and_return; 5995 whichfork == XFS_ATTR_FORK) {
5996 error = 0;
5997 goto unlock_and_return;
5998 }
5912 } 5999 }
5913 6000
5914 nexleft = nex; 6001 nexleft = nex;
@@ -5924,52 +6011,40 @@ xfs_getbmap(
5924 ASSERT(nmap <= subnex); 6011 ASSERT(nmap <= subnex);
5925 6012
5926 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 6013 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
5927 nexleft--; 6014 out.bmv_oflags = 0;
5928 oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? 6015 if (map[i].br_state == XFS_EXT_UNWRITTEN)
5929 BMV_OF_PREALLOC : 0; 6016 out.bmv_oflags |= BMV_OF_PREALLOC;
6017 else if (map[i].br_startblock == DELAYSTARTBLOCK)
6018 out.bmv_oflags |= BMV_OF_DELALLOC;
5930 out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); 6019 out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
5931 out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 6020 out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
5932 ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); 6021 out.bmv_unused1 = out.bmv_unused2 = 0;
6022 ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
6023 (map[i].br_startblock != DELAYSTARTBLOCK));
5933 if (map[i].br_startblock == HOLESTARTBLOCK && 6024 if (map[i].br_startblock == HOLESTARTBLOCK &&
5934 whichfork == XFS_ATTR_FORK) { 6025 whichfork == XFS_ATTR_FORK) {
5935 /* came to the end of attribute fork */ 6026 /* came to the end of attribute fork */
6027 out.bmv_oflags |= BMV_OF_LAST;
5936 goto unlock_and_return; 6028 goto unlock_and_return;
5937 } else { 6029 } else {
6030 int full = 0; /* user array is full */
6031
5938 if (!xfs_getbmapx_fix_eof_hole(ip, &out, 6032 if (!xfs_getbmapx_fix_eof_hole(ip, &out,
5939 prealloced, bmvend, 6033 prealloced, bmvend,
5940 map[i].br_startblock)) { 6034 map[i].br_startblock)) {
5941 goto unlock_and_return; 6035 goto unlock_and_return;
5942 } 6036 }
5943 6037
5944 /* return either getbmap/getbmapx structure. */ 6038 /* format results & advance arg */
5945 if (interface & BMV_IF_EXTENDED) { 6039 error = formatter(&arg, &out, &full);
5946 struct getbmapx outx; 6040 if (error || full)
5947 6041 goto unlock_and_return;
5948 GETBMAP_CONVERT(out,outx); 6042 nexleft--;
5949 outx.bmv_oflags = oflags;
5950 outx.bmv_unused1 = outx.bmv_unused2 = 0;
5951 if (copy_to_user(ap, &outx,
5952 sizeof(outx))) {
5953 error = XFS_ERROR(EFAULT);
5954 goto unlock_and_return;
5955 }
5956 } else {
5957 if (copy_to_user(ap, &out,
5958 sizeof(out))) {
5959 error = XFS_ERROR(EFAULT);
5960 goto unlock_and_return;
5961 }
5962 }
5963 bmv->bmv_offset = 6043 bmv->bmv_offset =
5964 out.bmv_offset + out.bmv_length; 6044 out.bmv_offset + out.bmv_length;
5965 bmv->bmv_length = MAX((__int64_t)0, 6045 bmv->bmv_length = MAX((__int64_t)0,
5966 (__int64_t)(bmvend - bmv->bmv_offset)); 6046 (__int64_t)(bmvend - bmv->bmv_offset));
5967 bmv->bmv_entries++; 6047 bmv->bmv_entries++;
5968 ap = (interface & BMV_IF_EXTENDED) ?
5969 (void __user *)
5970 ((struct getbmapx __user *)ap + 1) :
5971 (void __user *)
5972 ((struct getbmap __user *)ap + 1);
5973 } 6048 }
5974 } 6049 }
5975 } while (nmap && nexleft && bmv->bmv_length); 6050 } while (nmap && nexleft && bmv->bmv_length);
@@ -6131,7 +6206,7 @@ xfs_bmap_get_bp(
6131 6206
6132void 6207void
6133xfs_check_block( 6208xfs_check_block(
6134 xfs_bmbt_block_t *block, 6209 struct xfs_btree_block *block,
6135 xfs_mount_t *mp, 6210 xfs_mount_t *mp,
6136 int root, 6211 int root,
6137 short sz) 6212 short sz)
@@ -6143,36 +6218,29 @@ xfs_check_block(
6143 ASSERT(be16_to_cpu(block->bb_level) > 0); 6218 ASSERT(be16_to_cpu(block->bb_level) > 0);
6144 6219
6145 prevp = NULL; 6220 prevp = NULL;
6146 for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { 6221 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
6147 dmxr = mp->m_bmap_dmxr[0]; 6222 dmxr = mp->m_bmap_dmxr[0];
6148 6223 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
6149 if (root) {
6150 keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
6151 } else {
6152 keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
6153 }
6154 6224
6155 if (prevp) { 6225 if (prevp) {
6156 xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp); 6226 ASSERT(be64_to_cpu(prevp->br_startoff) <
6227 be64_to_cpu(keyp->br_startoff));
6157 } 6228 }
6158 prevp = keyp; 6229 prevp = keyp;
6159 6230
6160 /* 6231 /*
6161 * Compare the block numbers to see if there are dups. 6232 * Compare the block numbers to see if there are dups.
6162 */ 6233 */
6234 if (root)
6235 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
6236 else
6237 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
6163 6238
6164 if (root) {
6165 pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
6166 } else {
6167 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
6168 }
6169 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 6239 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
6170 if (root) { 6240 if (root)
6171 thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); 6241 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
6172 } else { 6242 else
6173 thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, 6243 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
6174 dmxr);
6175 }
6176 if (*thispa == *pp) { 6244 if (*thispa == *pp) {
6177 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 6245 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
6178 __func__, j, i, 6246 __func__, j, i,
@@ -6195,7 +6263,7 @@ xfs_bmap_check_leaf_extents(
6195 xfs_inode_t *ip, /* incore inode pointer */ 6263 xfs_inode_t *ip, /* incore inode pointer */
6196 int whichfork) /* data or attr fork */ 6264 int whichfork) /* data or attr fork */
6197{ 6265{
6198 xfs_bmbt_block_t *block; /* current btree block */ 6266 struct xfs_btree_block *block; /* current btree block */
6199 xfs_fsblock_t bno; /* block # of "block" */ 6267 xfs_fsblock_t bno; /* block # of "block" */
6200 xfs_buf_t *bp; /* buffer for "block" */ 6268 xfs_buf_t *bp; /* buffer for "block" */
6201 int error; /* error return value */ 6269 int error; /* error return value */
@@ -6223,7 +6291,7 @@ xfs_bmap_check_leaf_extents(
6223 level = be16_to_cpu(block->bb_level); 6291 level = be16_to_cpu(block->bb_level);
6224 ASSERT(level > 0); 6292 ASSERT(level > 0);
6225 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 6293 xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
6226 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6294 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6227 bno = be64_to_cpu(*pp); 6295 bno = be64_to_cpu(*pp);
6228 6296
6229 ASSERT(bno != NULLDFSBNO); 6297 ASSERT(bno != NULLDFSBNO);
@@ -6245,9 +6313,9 @@ xfs_bmap_check_leaf_extents(
6245 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6313 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6246 XFS_BMAP_BTREE_REF))) 6314 XFS_BMAP_BTREE_REF)))
6247 goto error_norelse; 6315 goto error_norelse;
6248 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6316 block = XFS_BUF_TO_BLOCK(bp);
6249 XFS_WANT_CORRUPTED_GOTO( 6317 XFS_WANT_CORRUPTED_GOTO(
6250 XFS_BMAP_SANITY_CHECK(mp, block, level), 6318 xfs_bmap_sanity_check(mp, bp, level),
6251 error0); 6319 error0);
6252 if (level == 0) 6320 if (level == 0)
6253 break; 6321 break;
@@ -6258,7 +6326,7 @@ xfs_bmap_check_leaf_extents(
6258 */ 6326 */
6259 6327
6260 xfs_check_block(block, mp, 0, 0); 6328 xfs_check_block(block, mp, 0, 0);
6261 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6329 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6262 bno = be64_to_cpu(*pp); 6330 bno = be64_to_cpu(*pp);
6263 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 6331 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
6264 if (bp_release) { 6332 if (bp_release) {
@@ -6280,13 +6348,13 @@ xfs_bmap_check_leaf_extents(
6280 xfs_extnum_t num_recs; 6348 xfs_extnum_t num_recs;
6281 6349
6282 6350
6283 num_recs = be16_to_cpu(block->bb_numrecs); 6351 num_recs = xfs_btree_get_numrecs(block);
6284 6352
6285 /* 6353 /*
6286 * Read-ahead the next leaf block, if any. 6354 * Read-ahead the next leaf block, if any.
6287 */ 6355 */
6288 6356
6289 nextbno = be64_to_cpu(block->bb_rightsib); 6357 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6290 6358
6291 /* 6359 /*
6292 * Check all the extents to make sure they are OK. 6360 * Check all the extents to make sure they are OK.
@@ -6294,13 +6362,17 @@ xfs_bmap_check_leaf_extents(
6294 * conform with the first entry in this one. 6362 * conform with the first entry in this one.
6295 */ 6363 */
6296 6364
6297 ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); 6365 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
6298 if (i) { 6366 if (i) {
6299 xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); 6367 ASSERT(xfs_bmbt_disk_get_startoff(&last) +
6368 xfs_bmbt_disk_get_blockcount(&last) <=
6369 xfs_bmbt_disk_get_startoff(ep));
6300 } 6370 }
6301 for (j = 1; j < num_recs; j++) { 6371 for (j = 1; j < num_recs; j++) {
6302 nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); 6372 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
6303 xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); 6373 ASSERT(xfs_bmbt_disk_get_startoff(ep) +
6374 xfs_bmbt_disk_get_blockcount(ep) <=
6375 xfs_bmbt_disk_get_startoff(nextp));
6304 ep = nextp; 6376 ep = nextp;
6305 } 6377 }
6306 6378
@@ -6326,7 +6398,7 @@ xfs_bmap_check_leaf_extents(
6326 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 6398 if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6327 XFS_BMAP_BTREE_REF))) 6399 XFS_BMAP_BTREE_REF)))
6328 goto error_norelse; 6400 goto error_norelse;
6329 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6401 block = XFS_BUF_TO_BLOCK(bp);
6330 } 6402 }
6331 if (bp_release) { 6403 if (bp_release) {
6332 bp_release = 0; 6404 bp_release = 0;
@@ -6356,7 +6428,7 @@ xfs_bmap_count_blocks(
6356 int whichfork, /* data or attr fork */ 6428 int whichfork, /* data or attr fork */
6357 int *count) /* out: count of blocks */ 6429 int *count) /* out: count of blocks */
6358{ 6430{
6359 xfs_bmbt_block_t *block; /* current btree block */ 6431 struct xfs_btree_block *block; /* current btree block */
6360 xfs_fsblock_t bno; /* block # of "block" */ 6432 xfs_fsblock_t bno; /* block # of "block" */
6361 xfs_ifork_t *ifp; /* fork structure */ 6433 xfs_ifork_t *ifp; /* fork structure */
6362 int level; /* btree level, for checking */ 6434 int level; /* btree level, for checking */
@@ -6379,7 +6451,7 @@ xfs_bmap_count_blocks(
6379 block = ifp->if_broot; 6451 block = ifp->if_broot;
6380 level = be16_to_cpu(block->bb_level); 6452 level = be16_to_cpu(block->bb_level);
6381 ASSERT(level > 0); 6453 ASSERT(level > 0);
6382 pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); 6454 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
6383 bno = be64_to_cpu(*pp); 6455 bno = be64_to_cpu(*pp);
6384 ASSERT(bno != NULLDFSBNO); 6456 ASSERT(bno != NULLDFSBNO);
6385 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 6457 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
@@ -6413,29 +6485,29 @@ xfs_bmap_count_tree(
6413 __be64 *pp; 6485 __be64 *pp;
6414 xfs_fsblock_t bno = blockno; 6486 xfs_fsblock_t bno = blockno;
6415 xfs_fsblock_t nextbno; 6487 xfs_fsblock_t nextbno;
6416 xfs_bmbt_block_t *block, *nextblock; 6488 struct xfs_btree_block *block, *nextblock;
6417 int numrecs; 6489 int numrecs;
6418 6490
6419 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) 6491 if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
6420 return error; 6492 return error;
6421 *count += 1; 6493 *count += 1;
6422 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6494 block = XFS_BUF_TO_BLOCK(bp);
6423 6495
6424 if (--level) { 6496 if (--level) {
6425 /* Not at node above leafs, count this level of nodes */ 6497 /* Not at node above leafs, count this level of nodes */
6426 nextbno = be64_to_cpu(block->bb_rightsib); 6498 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6427 while (nextbno != NULLFSBLOCK) { 6499 while (nextbno != NULLFSBLOCK) {
6428 if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 6500 if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
6429 0, &nbp, XFS_BMAP_BTREE_REF))) 6501 0, &nbp, XFS_BMAP_BTREE_REF)))
6430 return error; 6502 return error;
6431 *count += 1; 6503 *count += 1;
6432 nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp); 6504 nextblock = XFS_BUF_TO_BLOCK(nbp);
6433 nextbno = be64_to_cpu(nextblock->bb_rightsib); 6505 nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
6434 xfs_trans_brelse(tp, nbp); 6506 xfs_trans_brelse(tp, nbp);
6435 } 6507 }
6436 6508
6437 /* Dive to the next level */ 6509 /* Dive to the next level */
6438 pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); 6510 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
6439 bno = be64_to_cpu(*pp); 6511 bno = be64_to_cpu(*pp);
6440 if (unlikely((error = 6512 if (unlikely((error =
6441 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 6513 xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6448,9 +6520,9 @@ xfs_bmap_count_tree(
6448 } else { 6520 } else {
6449 /* count all level 1 nodes and their leaves */ 6521 /* count all level 1 nodes and their leaves */
6450 for (;;) { 6522 for (;;) {
6451 nextbno = be64_to_cpu(block->bb_rightsib); 6523 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
6452 numrecs = be16_to_cpu(block->bb_numrecs); 6524 numrecs = be16_to_cpu(block->bb_numrecs);
6453 xfs_bmap_disk_count_leaves(0, block, numrecs, count); 6525 xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
6454 xfs_trans_brelse(tp, bp); 6526 xfs_trans_brelse(tp, bp);
6455 if (nextbno == NULLFSBLOCK) 6527 if (nextbno == NULLFSBLOCK)
6456 break; 6528 break;
@@ -6459,7 +6531,7 @@ xfs_bmap_count_tree(
6459 XFS_BMAP_BTREE_REF))) 6531 XFS_BMAP_BTREE_REF)))
6460 return error; 6532 return error;
6461 *count += 1; 6533 *count += 1;
6462 block = XFS_BUF_TO_BMBT_BLOCK(bp); 6534 block = XFS_BUF_TO_BLOCK(bp);
6463 } 6535 }
6464 } 6536 }
6465 return 0; 6537 return 0;
@@ -6489,8 +6561,8 @@ xfs_bmap_count_leaves(
6489 */ 6561 */
6490STATIC void 6562STATIC void
6491xfs_bmap_disk_count_leaves( 6563xfs_bmap_disk_count_leaves(
6492 xfs_extnum_t idx, 6564 struct xfs_mount *mp,
6493 xfs_bmbt_block_t *block, 6565 struct xfs_btree_block *block,
6494 int numrecs, 6566 int numrecs,
6495 int *count) 6567 int *count)
6496{ 6568{
@@ -6498,7 +6570,7 @@ xfs_bmap_disk_count_leaves(
6498 xfs_bmbt_rec_t *frp; 6570 xfs_bmbt_rec_t *frp;
6499 6571
6500 for (b = 1; b <= numrecs; b++) { 6572 for (b = 1; b <= numrecs; b++) {
6501 frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); 6573 frp = XFS_BMBT_REC_ADDR(mp, block, b);
6502 *count += xfs_bmbt_disk_get_blockcount(frp); 6574 *count += xfs_bmbt_disk_get_blockcount(frp);
6503 } 6575 }
6504} 6576}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 9f3e3a836d15..284571c05ed0 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -137,9 +137,7 @@ typedef struct xfs_bmalloca {
137 char conv; /* overwriting unwritten extents */ 137 char conv; /* overwriting unwritten extents */
138} xfs_bmalloca_t; 138} xfs_bmalloca_t;
139 139
140#ifdef __KERNEL__ 140#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE)
141
142#if defined(XFS_BMAP_TRACE)
143/* 141/*
144 * Trace operations for bmap extent tracing 142 * Trace operations for bmap extent tracing
145 */ 143 */
@@ -163,9 +161,12 @@ xfs_bmap_trace_exlist(
163 int whichfork); /* data or attr fork */ 161 int whichfork); /* data or attr fork */
164#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ 162#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
165 xfs_bmap_trace_exlist(__func__,ip,c,w) 163 xfs_bmap_trace_exlist(__func__,ip,c,w)
166#else 164
165#else /* __KERNEL__ && XFS_BMAP_TRACE */
166
167#define XFS_BMAP_TRACE_EXLIST(ip,c,w) 167#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
168#endif 168
169#endif /* __KERNEL__ && XFS_BMAP_TRACE */
169 170
170/* 171/*
171 * Convert inode from non-attributed to attributed. 172 * Convert inode from non-attributed to attributed.
@@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels(
206 int whichfork); /* data or attr fork */ 207 int whichfork); /* data or attr fork */
207 208
208/* 209/*
209 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
210 * caller. Frees all the extents that need freeing, which must be done
211 * last due to locking considerations.
212 *
213 * Return 1 if the given transaction was committed and a new one allocated,
214 * and 0 otherwise.
215 */
216int /* error */
217xfs_bmap_finish(
218 struct xfs_trans **tp, /* transaction pointer addr */
219 xfs_bmap_free_t *flist, /* i/o: list extents to free */
220 int *committed); /* xact committed or not */
221
222/*
223 * Returns the file-relative block number of the first unused block in the file. 210 * Returns the file-relative block number of the first unused block in the file.
224 * This is the lowest-address hole if the file has holes, else the first block 211 * This is the lowest-address hole if the file has holes, else the first block
225 * past the end of file. 212 * past the end of file.
@@ -344,14 +331,43 @@ xfs_bunmapi(
344 int *done); /* set if not done yet */ 331 int *done); /* set if not done yet */
345 332
346/* 333/*
347 * Fcntl interface to xfs_bmapi. 334 * Check an extent list, which has just been read, for
335 * any bit in the extent flag field.
336 */
337int
338xfs_check_nostate_extents(
339 struct xfs_ifork *ifp,
340 xfs_extnum_t idx,
341 xfs_extnum_t num);
342
343#ifdef __KERNEL__
344
345/*
346 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
347 * caller. Frees all the extents that need freeing, which must be done
348 * last due to locking considerations.
349 *
350 * Return 1 if the given transaction was committed and a new one allocated,
351 * and 0 otherwise.
352 */
353int /* error */
354xfs_bmap_finish(
355 struct xfs_trans **tp, /* transaction pointer addr */
356 xfs_bmap_free_t *flist, /* i/o: list extents to free */
357 int *committed); /* xact committed or not */
358
359/* bmap to userspace formatter - copy to user & advance pointer */
360typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
361
362/*
363 * Get inode's extents as described in bmv, and format for output.
348 */ 364 */
349int /* error code */ 365int /* error code */
350xfs_getbmap( 366xfs_getbmap(
351 xfs_inode_t *ip, 367 xfs_inode_t *ip,
352 struct getbmap *bmv, /* user bmap structure */ 368 struct getbmapx *bmv, /* user bmap structure */
353 void __user *ap, /* pointer to user's array */ 369 xfs_bmap_format_t formatter, /* format to user */
354 int iflags); /* interface flags */ 370 void *arg); /* formatter arg */
355 371
356/* 372/*
357 * Check if the endoff is outside the last extent. If so the caller will grow 373 * Check if the endoff is outside the last extent. If so the caller will grow
@@ -375,16 +391,6 @@ xfs_bmap_count_blocks(
375 int *count); 391 int *count);
376 392
377/* 393/*
378 * Check an extent list, which has just been read, for
379 * any bit in the extent flag field.
380 */
381int
382xfs_check_nostate_extents(
383 struct xfs_ifork *ifp,
384 xfs_extnum_t idx,
385 xfs_extnum_t num);
386
387/*
388 * Search the extent records for the entry containing block bno. 394 * Search the extent records for the entry containing block bno.
389 * If bno lies in a hole, point to the next entry. If bno lies 395 * If bno lies in a hole, point to the next entry. If bno lies
390 * past eof, *eofp will be set, and *prevp will contain the last 396 * past eof, *eofp will be set, and *prevp will contain the last
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 23efad29a5cd..8f1ec73725d3 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -37,1406 +37,13 @@
37#include "xfs_inode_item.h" 37#include "xfs_inode_item.h"
38#include "xfs_alloc.h" 38#include "xfs_alloc.h"
39#include "xfs_btree.h" 39#include "xfs_btree.h"
40#include "xfs_btree_trace.h"
40#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
41#include "xfs_itable.h" 42#include "xfs_itable.h"
42#include "xfs_bmap.h" 43#include "xfs_bmap.h"
43#include "xfs_error.h" 44#include "xfs_error.h"
44#include "xfs_quota.h" 45#include "xfs_quota.h"
45 46
46#if defined(XFS_BMBT_TRACE)
47ktrace_t *xfs_bmbt_trace_buf;
48#endif
49
50/*
51 * Prototypes for internal btree functions.
52 */
53
54
55STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *);
56STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
57STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
58STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *);
59STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *);
60STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *,
61 __uint64_t *, xfs_btree_cur_t **, int *);
62STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int);
63
64
65#if defined(XFS_BMBT_TRACE)
66
67static char ARGS[] = "args";
68static char ENTRY[] = "entry";
69static char ERROR[] = "error";
70#undef EXIT
71static char EXIT[] = "exit";
72
73/*
74 * Add a trace buffer entry for the arguments given to the routine,
75 * generic form.
76 */
77STATIC void
78xfs_bmbt_trace_enter(
79 const char *func,
80 xfs_btree_cur_t *cur,
81 char *s,
82 int type,
83 int line,
84 __psunsigned_t a0,
85 __psunsigned_t a1,
86 __psunsigned_t a2,
87 __psunsigned_t a3,
88 __psunsigned_t a4,
89 __psunsigned_t a5,
90 __psunsigned_t a6,
91 __psunsigned_t a7,
92 __psunsigned_t a8,
93 __psunsigned_t a9,
94 __psunsigned_t a10)
95{
96 xfs_inode_t *ip;
97 int whichfork;
98
99 ip = cur->bc_private.b.ip;
100 whichfork = cur->bc_private.b.whichfork;
101 ktrace_enter(xfs_bmbt_trace_buf,
102 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
103 (void *)func, (void *)s, (void *)ip, (void *)cur,
104 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
105 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
106 (void *)a8, (void *)a9, (void *)a10);
107 ASSERT(ip->i_btrace);
108 ktrace_enter(ip->i_btrace,
109 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
110 (void *)func, (void *)s, (void *)ip, (void *)cur,
111 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
112 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
113 (void *)a8, (void *)a9, (void *)a10);
114}
115/*
116 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
117 */
118STATIC void
119xfs_bmbt_trace_argbi(
120 const char *func,
121 xfs_btree_cur_t *cur,
122 xfs_buf_t *b,
123 int i,
124 int line)
125{
126 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line,
127 (__psunsigned_t)b, i, 0, 0,
128 0, 0, 0, 0,
129 0, 0, 0);
130}
131
132/*
133 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
134 */
135STATIC void
136xfs_bmbt_trace_argbii(
137 const char *func,
138 xfs_btree_cur_t *cur,
139 xfs_buf_t *b,
140 int i0,
141 int i1,
142 int line)
143{
144 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line,
145 (__psunsigned_t)b, i0, i1, 0,
146 0, 0, 0, 0,
147 0, 0, 0);
148}
149
150/*
151 * Add a trace buffer entry for arguments, for 3 block-length args
152 * and an integer arg.
153 */
154STATIC void
155xfs_bmbt_trace_argfffi(
156 const char *func,
157 xfs_btree_cur_t *cur,
158 xfs_dfiloff_t o,
159 xfs_dfsbno_t b,
160 xfs_dfilblks_t i,
161 int j,
162 int line)
163{
164 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line,
165 o >> 32, (int)o, b >> 32, (int)b,
166 i >> 32, (int)i, (int)j, 0,
167 0, 0, 0);
168}
169
170/*
171 * Add a trace buffer entry for arguments, for one integer arg.
172 */
173STATIC void
174xfs_bmbt_trace_argi(
175 const char *func,
176 xfs_btree_cur_t *cur,
177 int i,
178 int line)
179{
180 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line,
181 i, 0, 0, 0,
182 0, 0, 0, 0,
183 0, 0, 0);
184}
185
186/*
187 * Add a trace buffer entry for arguments, for int, fsblock, key.
188 */
189STATIC void
190xfs_bmbt_trace_argifk(
191 const char *func,
192 xfs_btree_cur_t *cur,
193 int i,
194 xfs_fsblock_t f,
195 xfs_dfiloff_t o,
196 int line)
197{
198 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
199 i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32,
200 (int)o, 0, 0, 0,
201 0, 0, 0);
202}
203
204/*
205 * Add a trace buffer entry for arguments, for int, fsblock, rec.
206 */
207STATIC void
208xfs_bmbt_trace_argifr(
209 const char *func,
210 xfs_btree_cur_t *cur,
211 int i,
212 xfs_fsblock_t f,
213 xfs_bmbt_rec_t *r,
214 int line)
215{
216 xfs_dfsbno_t b;
217 xfs_dfilblks_t c;
218 xfs_dfsbno_t d;
219 xfs_dfiloff_t o;
220 xfs_bmbt_irec_t s;
221
222 d = (xfs_dfsbno_t)f;
223 xfs_bmbt_disk_get_all(r, &s);
224 o = (xfs_dfiloff_t)s.br_startoff;
225 b = (xfs_dfsbno_t)s.br_startblock;
226 c = s.br_blockcount;
227 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line,
228 i, d >> 32, (int)d, o >> 32,
229 (int)o, b >> 32, (int)b, c >> 32,
230 (int)c, 0, 0);
231}
232
233/*
234 * Add a trace buffer entry for arguments, for int, key.
235 */
236STATIC void
237xfs_bmbt_trace_argik(
238 const char *func,
239 xfs_btree_cur_t *cur,
240 int i,
241 xfs_bmbt_key_t *k,
242 int line)
243{
244 xfs_dfiloff_t o;
245
246 o = be64_to_cpu(k->br_startoff);
247 xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line,
248 i, o >> 32, (int)o, 0,
249 0, 0, 0, 0,
250 0, 0, 0);
251}
252
253/*
254 * Add a trace buffer entry for the cursor/operation.
255 */
256STATIC void
257xfs_bmbt_trace_cursor(
258 const char *func,
259 xfs_btree_cur_t *cur,
260 char *s,
261 int line)
262{
263 xfs_bmbt_rec_host_t r;
264
265 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
266 xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line,
267 (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) |
268 cur->bc_private.b.allocated,
269 r.l0 >> 32, (int)r.l0,
270 r.l1 >> 32, (int)r.l1,
271 (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1],
272 (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3],
273 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
274 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
275}
276
277#define XFS_BMBT_TRACE_ARGBI(c,b,i) \
278 xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__)
279#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \
280 xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__)
281#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \
282 xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
283#define XFS_BMBT_TRACE_ARGI(c,i) \
284 xfs_bmbt_trace_argi(__func__, c, i, __LINE__)
285#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \
286 xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__)
287#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \
288 xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__)
289#define XFS_BMBT_TRACE_ARGIK(c,i,k) \
290 xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__)
291#define XFS_BMBT_TRACE_CURSOR(c,s) \
292 xfs_bmbt_trace_cursor(__func__, c, s, __LINE__)
293#else
294#define XFS_BMBT_TRACE_ARGBI(c,b,i)
295#define XFS_BMBT_TRACE_ARGBII(c,b,i,j)
296#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j)
297#define XFS_BMBT_TRACE_ARGI(c,i)
298#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s)
299#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r)
300#define XFS_BMBT_TRACE_ARGIK(c,i,k)
301#define XFS_BMBT_TRACE_CURSOR(c,s)
302#endif /* XFS_BMBT_TRACE */
303
304
305/*
306 * Internal functions.
307 */
308
309/*
310 * Delete record pointed to by cur/level.
311 */
312STATIC int /* error */
313xfs_bmbt_delrec(
314 xfs_btree_cur_t *cur,
315 int level,
316 int *stat) /* success/failure */
317{
318 xfs_bmbt_block_t *block; /* bmap btree block */
319 xfs_fsblock_t bno; /* fs-relative block number */
320 xfs_buf_t *bp; /* buffer for block */
321 int error; /* error return value */
322 int i; /* loop counter */
323 int j; /* temp state */
324 xfs_bmbt_key_t key; /* bmap btree key */
325 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
326 xfs_fsblock_t lbno; /* left sibling block number */
327 xfs_buf_t *lbp; /* left buffer pointer */
328 xfs_bmbt_block_t *left; /* left btree block */
329 xfs_bmbt_key_t *lkp; /* left btree key */
330 xfs_bmbt_ptr_t *lpp; /* left address pointer */
331 int lrecs=0; /* left record count */
332 xfs_bmbt_rec_t *lrp; /* left record pointer */
333 xfs_mount_t *mp; /* file system mount point */
334 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
335 int ptr; /* key/record index */
336 xfs_fsblock_t rbno; /* right sibling block number */
337 xfs_buf_t *rbp; /* right buffer pointer */
338 xfs_bmbt_block_t *right; /* right btree block */
339 xfs_bmbt_key_t *rkp; /* right btree key */
340 xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */
341 xfs_bmbt_ptr_t *rpp; /* right address pointer */
342 xfs_bmbt_block_t *rrblock; /* right-right btree block */
343 xfs_buf_t *rrbp; /* right-right buffer pointer */
344 int rrecs=0; /* right record count */
345 xfs_bmbt_rec_t *rrp; /* right record pointer */
346 xfs_btree_cur_t *tcur; /* temporary btree cursor */
347 int numrecs; /* temporary numrec count */
348 int numlrecs, numrrecs;
349
350 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
351 XFS_BMBT_TRACE_ARGI(cur, level);
352 ptr = cur->bc_ptrs[level];
353 tcur = NULL;
354 if (ptr == 0) {
355 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
356 *stat = 0;
357 return 0;
358 }
359 block = xfs_bmbt_get_block(cur, level, &bp);
360 numrecs = be16_to_cpu(block->bb_numrecs);
361#ifdef DEBUG
362 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
363 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
364 goto error0;
365 }
366#endif
367 if (ptr > numrecs) {
368 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
369 *stat = 0;
370 return 0;
371 }
372 XFS_STATS_INC(xs_bmbt_delrec);
373 if (level > 0) {
374 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
375 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
376#ifdef DEBUG
377 for (i = ptr; i < numrecs; i++) {
378 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
379 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
380 goto error0;
381 }
382 }
383#endif
384 if (ptr < numrecs) {
385 memmove(&kp[ptr - 1], &kp[ptr],
386 (numrecs - ptr) * sizeof(*kp));
387 memmove(&pp[ptr - 1], &pp[ptr],
388 (numrecs - ptr) * sizeof(*pp));
389 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1);
390 xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1);
391 }
392 } else {
393 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
394 if (ptr < numrecs) {
395 memmove(&rp[ptr - 1], &rp[ptr],
396 (numrecs - ptr) * sizeof(*rp));
397 xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1);
398 }
399 if (ptr == 1) {
400 key.br_startoff =
401 cpu_to_be64(xfs_bmbt_disk_get_startoff(rp));
402 kp = &key;
403 }
404 }
405 numrecs--;
406 block->bb_numrecs = cpu_to_be16(numrecs);
407 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
408 /*
409 * We're at the root level.
410 * First, shrink the root block in-memory.
411 * Try to get rid of the next level down.
412 * If we can't then there's nothing left to do.
413 */
414 if (level == cur->bc_nlevels - 1) {
415 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
416 cur->bc_private.b.whichfork);
417 if ((error = xfs_bmbt_killroot(cur))) {
418 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
419 goto error0;
420 }
421 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
422 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
423 goto error0;
424 }
425 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
426 *stat = 1;
427 return 0;
428 }
429 if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) {
430 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
431 goto error0;
432 }
433 if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
434 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) {
435 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
436 goto error0;
437 }
438 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
439 *stat = 1;
440 return 0;
441 }
442 rbno = be64_to_cpu(block->bb_rightsib);
443 lbno = be64_to_cpu(block->bb_leftsib);
444 /*
445 * One child of root, need to get a chance to copy its contents
446 * into the root and delete it. Can't go up to next level,
447 * there's nothing to delete there.
448 */
449 if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK &&
450 level == cur->bc_nlevels - 2) {
451 if ((error = xfs_bmbt_killroot(cur))) {
452 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
453 goto error0;
454 }
455 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
456 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
457 goto error0;
458 }
459 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
460 *stat = 1;
461 return 0;
462 }
463 ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK);
464 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
465 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
466 goto error0;
467 }
468 bno = NULLFSBLOCK;
469 if (rbno != NULLFSBLOCK) {
470 i = xfs_btree_lastrec(tcur, level);
471 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
472 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
473 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
474 goto error0;
475 }
476 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
477 i = xfs_btree_lastrec(tcur, level);
478 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
479 rbp = tcur->bc_bufs[level];
480 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
481#ifdef DEBUG
482 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
483 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
484 goto error0;
485 }
486#endif
487 bno = be64_to_cpu(right->bb_leftsib);
488 if (be16_to_cpu(right->bb_numrecs) - 1 >=
489 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
490 if ((error = xfs_bmbt_lshift(tcur, level, &i))) {
491 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
492 goto error0;
493 }
494 if (i) {
495 ASSERT(be16_to_cpu(block->bb_numrecs) >=
496 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
497 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
498 tcur = NULL;
499 if (level > 0) {
500 if ((error = xfs_bmbt_decrement(cur,
501 level, &i))) {
502 XFS_BMBT_TRACE_CURSOR(cur,
503 ERROR);
504 goto error0;
505 }
506 }
507 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
508 *stat = 1;
509 return 0;
510 }
511 }
512 rrecs = be16_to_cpu(right->bb_numrecs);
513 if (lbno != NULLFSBLOCK) {
514 i = xfs_btree_firstrec(tcur, level);
515 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
516 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
517 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
518 goto error0;
519 }
520 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
521 }
522 }
523 if (lbno != NULLFSBLOCK) {
524 i = xfs_btree_firstrec(tcur, level);
525 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
526 /*
527 * decrement to last in block
528 */
529 if ((error = xfs_bmbt_decrement(tcur, level, &i))) {
530 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
531 goto error0;
532 }
533 i = xfs_btree_firstrec(tcur, level);
534 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
535 lbp = tcur->bc_bufs[level];
536 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
537#ifdef DEBUG
538 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
539 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
540 goto error0;
541 }
542#endif
543 bno = be64_to_cpu(left->bb_rightsib);
544 if (be16_to_cpu(left->bb_numrecs) - 1 >=
545 XFS_BMAP_BLOCK_IMINRECS(level, cur)) {
546 if ((error = xfs_bmbt_rshift(tcur, level, &i))) {
547 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
548 goto error0;
549 }
550 if (i) {
551 ASSERT(be16_to_cpu(block->bb_numrecs) >=
552 XFS_BMAP_BLOCK_IMINRECS(level, tcur));
553 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
554 tcur = NULL;
555 if (level == 0)
556 cur->bc_ptrs[0]++;
557 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
558 *stat = 1;
559 return 0;
560 }
561 }
562 lrecs = be16_to_cpu(left->bb_numrecs);
563 }
564 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
565 tcur = NULL;
566 mp = cur->bc_mp;
567 ASSERT(bno != NULLFSBLOCK);
568 if (lbno != NULLFSBLOCK &&
569 lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
570 rbno = bno;
571 right = block;
572 rbp = bp;
573 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp,
574 XFS_BMAP_BTREE_REF))) {
575 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
576 goto error0;
577 }
578 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
579 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
581 goto error0;
582 }
583 } else if (rbno != NULLFSBLOCK &&
584 rrecs + be16_to_cpu(block->bb_numrecs) <=
585 XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
586 lbno = bno;
587 left = block;
588 lbp = bp;
589 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp,
590 XFS_BMAP_BTREE_REF))) {
591 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
592 goto error0;
593 }
594 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
595 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
596 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
597 goto error0;
598 }
599 lrecs = be16_to_cpu(left->bb_numrecs);
600 } else {
601 if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) {
602 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
603 goto error0;
604 }
605 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
606 *stat = 1;
607 return 0;
608 }
609 numlrecs = be16_to_cpu(left->bb_numrecs);
610 numrrecs = be16_to_cpu(right->bb_numrecs);
611 if (level > 0) {
612 lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur);
613 lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur);
614 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
615 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
616#ifdef DEBUG
617 for (i = 0; i < numrrecs; i++) {
618 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
619 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
620 goto error0;
621 }
622 }
623#endif
624 memcpy(lkp, rkp, numrrecs * sizeof(*lkp));
625 memcpy(lpp, rpp, numrrecs * sizeof(*lpp));
626 xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
627 xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
628 } else {
629 lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur);
630 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
631 memcpy(lrp, rrp, numrrecs * sizeof(*lrp));
632 xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs);
633 }
634 be16_add_cpu(&left->bb_numrecs, numrrecs);
635 left->bb_rightsib = right->bb_rightsib;
636 xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS);
637 if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) {
638 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp,
639 be64_to_cpu(left->bb_rightsib),
640 0, &rrbp, XFS_BMAP_BTREE_REF))) {
641 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
642 goto error0;
643 }
644 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
645 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
646 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
647 goto error0;
648 }
649 rrblock->bb_leftsib = cpu_to_be64(lbno);
650 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
651 }
652 xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1,
653 cur->bc_private.b.flist, mp);
654 cur->bc_private.b.ip->i_d.di_nblocks--;
655 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
656 XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip,
657 XFS_TRANS_DQ_BCOUNT, -1L);
658 xfs_trans_binval(cur->bc_tp, rbp);
659 if (bp != lbp) {
660 cur->bc_bufs[level] = lbp;
661 cur->bc_ptrs[level] += lrecs;
662 cur->bc_ra[level] = 0;
663 } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) {
664 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
665 goto error0;
666 }
667 if (level > 0)
668 cur->bc_ptrs[level]--;
669 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
670 *stat = 2;
671 return 0;
672
673error0:
674 if (tcur)
675 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
676 return error;
677}
678
679/*
680 * Insert one record/level. Return information to the caller
681 * allowing the next level up to proceed if necessary.
682 */
683STATIC int /* error */
684xfs_bmbt_insrec(
685 xfs_btree_cur_t *cur,
686 int level,
687 xfs_fsblock_t *bnop,
688 xfs_bmbt_rec_t *recp,
689 xfs_btree_cur_t **curp,
690 int *stat) /* no-go/done/continue */
691{
692 xfs_bmbt_block_t *block; /* bmap btree block */
693 xfs_buf_t *bp; /* buffer for block */
694 int error; /* error return value */
695 int i; /* loop index */
696 xfs_bmbt_key_t key; /* bmap btree key */
697 xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */
698 int logflags; /* inode logging flags */
699 xfs_fsblock_t nbno; /* new block number */
700 struct xfs_btree_cur *ncur; /* new btree cursor */
701 __uint64_t startoff; /* new btree key value */
702 xfs_bmbt_rec_t nrec; /* new record count */
703 int optr; /* old key/record index */
704 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
705 int ptr; /* key/record index */
706 xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */
707 int numrecs;
708
709 ASSERT(level < cur->bc_nlevels);
710 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
711 XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp);
712 ncur = NULL;
713 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp));
714 optr = ptr = cur->bc_ptrs[level];
715 if (ptr == 0) {
716 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
717 *stat = 0;
718 return 0;
719 }
720 XFS_STATS_INC(xs_bmbt_insrec);
721 block = xfs_bmbt_get_block(cur, level, &bp);
722 numrecs = be16_to_cpu(block->bb_numrecs);
723#ifdef DEBUG
724 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
725 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
726 return error;
727 }
728 if (ptr <= numrecs) {
729 if (level == 0) {
730 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
731 xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp);
732 } else {
733 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
734 xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp);
735 }
736 }
737#endif
738 nbno = NULLFSBLOCK;
739 if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
740 if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
741 /*
742 * A root block, that can be made bigger.
743 */
744 xfs_iroot_realloc(cur->bc_private.b.ip, 1,
745 cur->bc_private.b.whichfork);
746 block = xfs_bmbt_get_block(cur, level, &bp);
747 } else if (level == cur->bc_nlevels - 1) {
748 if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) ||
749 *stat == 0) {
750 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
751 return error;
752 }
753 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
754 logflags);
755 block = xfs_bmbt_get_block(cur, level, &bp);
756 } else {
757 if ((error = xfs_bmbt_rshift(cur, level, &i))) {
758 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
759 return error;
760 }
761 if (i) {
762 /* nothing */
763 } else {
764 if ((error = xfs_bmbt_lshift(cur, level, &i))) {
765 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
766 return error;
767 }
768 if (i) {
769 optr = ptr = cur->bc_ptrs[level];
770 } else {
771 if ((error = xfs_bmbt_split(cur, level,
772 &nbno, &startoff, &ncur,
773 &i))) {
774 XFS_BMBT_TRACE_CURSOR(cur,
775 ERROR);
776 return error;
777 }
778 if (i) {
779 block = xfs_bmbt_get_block(
780 cur, level, &bp);
781#ifdef DEBUG
782 if ((error =
783 xfs_btree_check_lblock(cur,
784 block, level, bp))) {
785 XFS_BMBT_TRACE_CURSOR(
786 cur, ERROR);
787 return error;
788 }
789#endif
790 ptr = cur->bc_ptrs[level];
791 xfs_bmbt_disk_set_allf(&nrec,
792 startoff, 0, 0,
793 XFS_EXT_NORM);
794 } else {
795 XFS_BMBT_TRACE_CURSOR(cur,
796 EXIT);
797 *stat = 0;
798 return 0;
799 }
800 }
801 }
802 }
803 }
804 numrecs = be16_to_cpu(block->bb_numrecs);
805 if (level > 0) {
806 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
807 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
808#ifdef DEBUG
809 for (i = numrecs; i >= ptr; i--) {
810 if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1],
811 level))) {
812 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
813 return error;
814 }
815 }
816#endif
817 memmove(&kp[ptr], &kp[ptr - 1],
818 (numrecs - ptr + 1) * sizeof(*kp));
819 memmove(&pp[ptr], &pp[ptr - 1],
820 (numrecs - ptr + 1) * sizeof(*pp));
821#ifdef DEBUG
822 if ((error = xfs_btree_check_lptr(cur, *bnop, level))) {
823 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
824 return error;
825 }
826#endif
827 kp[ptr - 1] = key;
828 pp[ptr - 1] = cpu_to_be64(*bnop);
829 numrecs++;
830 block->bb_numrecs = cpu_to_be16(numrecs);
831 xfs_bmbt_log_keys(cur, bp, ptr, numrecs);
832 xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs);
833 } else {
834 rp = XFS_BMAP_REC_IADDR(block, 1, cur);
835 memmove(&rp[ptr], &rp[ptr - 1],
836 (numrecs - ptr + 1) * sizeof(*rp));
837 rp[ptr - 1] = *recp;
838 numrecs++;
839 block->bb_numrecs = cpu_to_be16(numrecs);
840 xfs_bmbt_log_recs(cur, bp, ptr, numrecs);
841 }
842 xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS);
843#ifdef DEBUG
844 if (ptr < numrecs) {
845 if (level == 0)
846 xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1,
847 rp + ptr);
848 else
849 xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1,
850 kp + ptr);
851 }
852#endif
853 if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) {
854 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
855 return error;
856 }
857 *bnop = nbno;
858 if (nbno != NULLFSBLOCK) {
859 *recp = nrec;
860 *curp = ncur;
861 }
862 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
863 *stat = 1;
864 return 0;
865}
866
867STATIC int
868xfs_bmbt_killroot(
869 xfs_btree_cur_t *cur)
870{
871 xfs_bmbt_block_t *block;
872 xfs_bmbt_block_t *cblock;
873 xfs_buf_t *cbp;
874 xfs_bmbt_key_t *ckp;
875 xfs_bmbt_ptr_t *cpp;
876#ifdef DEBUG
877 int error;
878#endif
879 int i;
880 xfs_bmbt_key_t *kp;
881 xfs_inode_t *ip;
882 xfs_ifork_t *ifp;
883 int level;
884 xfs_bmbt_ptr_t *pp;
885
886 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
887 level = cur->bc_nlevels - 1;
888 ASSERT(level >= 1);
889 /*
890 * Don't deal with the root block needs to be a leaf case.
891 * We're just going to turn the thing back into extents anyway.
892 */
893 if (level == 1) {
894 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
895 return 0;
896 }
897 block = xfs_bmbt_get_block(cur, level, &cbp);
898 /*
899 * Give up if the root has multiple children.
900 */
901 if (be16_to_cpu(block->bb_numrecs) != 1) {
902 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
903 return 0;
904 }
905 /*
906 * Only do this if the next level will fit.
907 * Then the data must be copied up to the inode,
908 * instead of freeing the root you free the next level.
909 */
910 cbp = cur->bc_bufs[level - 1];
911 cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
912 if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) {
913 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
914 return 0;
915 }
916 ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO);
917 ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO);
918 ip = cur->bc_private.b.ip;
919 ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork);
920 ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) ==
921 XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes));
922 i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur));
923 if (i) {
924 xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork);
925 block = ifp->if_broot;
926 }
927 be16_add_cpu(&block->bb_numrecs, i);
928 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
929 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
930 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
931 memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp));
932 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
933 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
934#ifdef DEBUG
935 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
936 if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) {
937 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
938 return error;
939 }
940 }
941#endif
942 memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp));
943 xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1,
944 cur->bc_private.b.flist, cur->bc_mp);
945 ip->i_d.di_nblocks--;
946 XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip,
947 XFS_TRANS_DQ_BCOUNT, -1L);
948 xfs_trans_binval(cur->bc_tp, cbp);
949 cur->bc_bufs[level - 1] = NULL;
950 be16_add_cpu(&block->bb_level, -1);
951 xfs_trans_log_inode(cur->bc_tp, ip,
952 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
953 cur->bc_nlevels--;
954 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
955 return 0;
956}
957
958/*
959 * Log key values from the btree block.
960 */
961STATIC void
962xfs_bmbt_log_keys(
963 xfs_btree_cur_t *cur,
964 xfs_buf_t *bp,
965 int kfirst,
966 int klast)
967{
968 xfs_trans_t *tp;
969
970 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
971 XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast);
972 tp = cur->bc_tp;
973 if (bp) {
974 xfs_bmbt_block_t *block;
975 int first;
976 xfs_bmbt_key_t *kp;
977 int last;
978
979 block = XFS_BUF_TO_BMBT_BLOCK(bp);
980 kp = XFS_BMAP_KEY_DADDR(block, 1, cur);
981 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
982 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
983 xfs_trans_log_buf(tp, bp, first, last);
984 } else {
985 xfs_inode_t *ip;
986
987 ip = cur->bc_private.b.ip;
988 xfs_trans_log_inode(tp, ip,
989 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
990 }
991 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
992}
993
994/*
995 * Log pointer values from the btree block.
996 */
997STATIC void
998xfs_bmbt_log_ptrs(
999 xfs_btree_cur_t *cur,
1000 xfs_buf_t *bp,
1001 int pfirst,
1002 int plast)
1003{
1004 xfs_trans_t *tp;
1005
1006 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1007 XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast);
1008 tp = cur->bc_tp;
1009 if (bp) {
1010 xfs_bmbt_block_t *block;
1011 int first;
1012 int last;
1013 xfs_bmbt_ptr_t *pp;
1014
1015 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1016 pp = XFS_BMAP_PTR_DADDR(block, 1, cur);
1017 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
1018 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
1019 xfs_trans_log_buf(tp, bp, first, last);
1020 } else {
1021 xfs_inode_t *ip;
1022
1023 ip = cur->bc_private.b.ip;
1024 xfs_trans_log_inode(tp, ip,
1025 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
1026 }
1027 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1028}
1029
1030/*
1031 * Lookup the record. The cursor is made to point to it, based on dir.
1032 */
1033STATIC int /* error */
1034xfs_bmbt_lookup(
1035 xfs_btree_cur_t *cur,
1036 xfs_lookup_t dir,
1037 int *stat) /* success/failure */
1038{
1039 xfs_bmbt_block_t *block=NULL;
1040 xfs_buf_t *bp;
1041 xfs_daddr_t d;
1042 xfs_sfiloff_t diff;
1043 int error; /* error return value */
1044 xfs_fsblock_t fsbno=0;
1045 int high;
1046 int i;
1047 int keyno=0;
1048 xfs_bmbt_key_t *kkbase=NULL;
1049 xfs_bmbt_key_t *kkp;
1050 xfs_bmbt_rec_t *krbase=NULL;
1051 xfs_bmbt_rec_t *krp;
1052 int level;
1053 int low;
1054 xfs_mount_t *mp;
1055 xfs_bmbt_ptr_t *pp;
1056 xfs_bmbt_irec_t *rp;
1057 xfs_fileoff_t startoff;
1058 xfs_trans_t *tp;
1059
1060 XFS_STATS_INC(xs_bmbt_lookup);
1061 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1062 XFS_BMBT_TRACE_ARGI(cur, (int)dir);
1063 tp = cur->bc_tp;
1064 mp = cur->bc_mp;
1065 rp = &cur->bc_rec.b;
1066 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1067 if (level < cur->bc_nlevels - 1) {
1068 d = XFS_FSB_TO_DADDR(mp, fsbno);
1069 bp = cur->bc_bufs[level];
1070 if (bp && XFS_BUF_ADDR(bp) != d)
1071 bp = NULL;
1072 if (!bp) {
1073 if ((error = xfs_btree_read_bufl(mp, tp, fsbno,
1074 0, &bp, XFS_BMAP_BTREE_REF))) {
1075 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1076 return error;
1077 }
1078 xfs_btree_setbuf(cur, level, bp);
1079 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1080 if ((error = xfs_btree_check_lblock(cur, block,
1081 level, bp))) {
1082 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1083 return error;
1084 }
1085 } else
1086 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1087 } else
1088 block = xfs_bmbt_get_block(cur, level, &bp);
1089 if (diff == 0)
1090 keyno = 1;
1091 else {
1092 if (level > 0)
1093 kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur);
1094 else
1095 krbase = XFS_BMAP_REC_IADDR(block, 1, cur);
1096 low = 1;
1097 if (!(high = be16_to_cpu(block->bb_numrecs))) {
1098 ASSERT(level == 0);
1099 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1100 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1101 *stat = 0;
1102 return 0;
1103 }
1104 while (low <= high) {
1105 XFS_STATS_INC(xs_bmbt_compare);
1106 keyno = (low + high) >> 1;
1107 if (level > 0) {
1108 kkp = kkbase + keyno - 1;
1109 startoff = be64_to_cpu(kkp->br_startoff);
1110 } else {
1111 krp = krbase + keyno - 1;
1112 startoff = xfs_bmbt_disk_get_startoff(krp);
1113 }
1114 diff = (xfs_sfiloff_t)
1115 (startoff - rp->br_startoff);
1116 if (diff < 0)
1117 low = keyno + 1;
1118 else if (diff > 0)
1119 high = keyno - 1;
1120 else
1121 break;
1122 }
1123 }
1124 if (level > 0) {
1125 if (diff > 0 && --keyno < 1)
1126 keyno = 1;
1127 pp = XFS_BMAP_PTR_IADDR(block, keyno, cur);
1128 fsbno = be64_to_cpu(*pp);
1129#ifdef DEBUG
1130 if ((error = xfs_btree_check_lptr(cur, fsbno, level))) {
1131 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1132 return error;
1133 }
1134#endif
1135 cur->bc_ptrs[level] = keyno;
1136 }
1137 }
1138 if (dir != XFS_LOOKUP_LE && diff < 0) {
1139 keyno++;
1140 /*
1141 * If ge search and we went off the end of the block, but it's
1142 * not the last block, we're in the wrong block.
1143 */
1144 if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) &&
1145 be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) {
1146 cur->bc_ptrs[0] = keyno;
1147 if ((error = xfs_bmbt_increment(cur, 0, &i))) {
1148 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1149 return error;
1150 }
1151 XFS_WANT_CORRUPTED_RETURN(i == 1);
1152 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1153 *stat = 1;
1154 return 0;
1155 }
1156 }
1157 else if (dir == XFS_LOOKUP_LE && diff > 0)
1158 keyno--;
1159 cur->bc_ptrs[0] = keyno;
1160 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) {
1161 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1162 *stat = 0;
1163 } else {
1164 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1165 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1166 }
1167 return 0;
1168}
1169
1170/*
1171 * Move 1 record left from cur/level if possible.
1172 * Update cur to reflect the new path.
1173 */
1174STATIC int /* error */
1175xfs_bmbt_lshift(
1176 xfs_btree_cur_t *cur,
1177 int level,
1178 int *stat) /* success/failure */
1179{
1180 int error; /* error return value */
1181#ifdef DEBUG
1182 int i; /* loop counter */
1183#endif
1184 xfs_bmbt_key_t key; /* bmap btree key */
1185 xfs_buf_t *lbp; /* left buffer pointer */
1186 xfs_bmbt_block_t *left; /* left btree block */
1187 xfs_bmbt_key_t *lkp=NULL; /* left btree key */
1188 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1189 int lrecs; /* left record count */
1190 xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */
1191 xfs_mount_t *mp; /* file system mount point */
1192 xfs_buf_t *rbp; /* right buffer pointer */
1193 xfs_bmbt_block_t *right; /* right btree block */
1194 xfs_bmbt_key_t *rkp=NULL; /* right btree key */
1195 xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */
1196 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1197 int rrecs; /* right record count */
1198
1199 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1200 XFS_BMBT_TRACE_ARGI(cur, level);
1201 if (level == cur->bc_nlevels - 1) {
1202 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1203 *stat = 0;
1204 return 0;
1205 }
1206 rbp = cur->bc_bufs[level];
1207 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1208#ifdef DEBUG
1209 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1210 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1211 return error;
1212 }
1213#endif
1214 if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) {
1215 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1216 *stat = 0;
1217 return 0;
1218 }
1219 if (cur->bc_ptrs[level] <= 1) {
1220 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1221 *stat = 0;
1222 return 0;
1223 }
1224 mp = cur->bc_mp;
1225 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0,
1226 &lbp, XFS_BMAP_BTREE_REF))) {
1227 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1228 return error;
1229 }
1230 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1231 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1232 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1233 return error;
1234 }
1235 if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1236 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1237 *stat = 0;
1238 return 0;
1239 }
1240 lrecs = be16_to_cpu(left->bb_numrecs) + 1;
1241 if (level > 0) {
1242 lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur);
1243 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1244 *lkp = *rkp;
1245 xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs);
1246 lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur);
1247 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1248#ifdef DEBUG
1249 if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) {
1250 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1251 return error;
1252 }
1253#endif
1254 *lpp = *rpp;
1255 xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs);
1256 } else {
1257 lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur);
1258 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1259 *lrp = *rrp;
1260 xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs);
1261 }
1262 left->bb_numrecs = cpu_to_be16(lrecs);
1263 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1264#ifdef DEBUG
1265 if (level > 0)
1266 xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp);
1267 else
1268 xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp);
1269#endif
1270 rrecs = be16_to_cpu(right->bb_numrecs) - 1;
1271 right->bb_numrecs = cpu_to_be16(rrecs);
1272 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1273 if (level > 0) {
1274#ifdef DEBUG
1275 for (i = 0; i < rrecs; i++) {
1276 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1],
1277 level))) {
1278 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1279 return error;
1280 }
1281 }
1282#endif
1283 memmove(rkp, rkp + 1, rrecs * sizeof(*rkp));
1284 memmove(rpp, rpp + 1, rrecs * sizeof(*rpp));
1285 xfs_bmbt_log_keys(cur, rbp, 1, rrecs);
1286 xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs);
1287 } else {
1288 memmove(rrp, rrp + 1, rrecs * sizeof(*rrp));
1289 xfs_bmbt_log_recs(cur, rbp, 1, rrecs);
1290 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1291 rkp = &key;
1292 }
1293 if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) {
1294 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1295 return error;
1296 }
1297 cur->bc_ptrs[level]--;
1298 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1299 *stat = 1;
1300 return 0;
1301}
1302
1303/*
1304 * Move 1 record right from cur/level if possible.
1305 * Update cur to reflect the new path.
1306 */
1307STATIC int /* error */
1308xfs_bmbt_rshift(
1309 xfs_btree_cur_t *cur,
1310 int level,
1311 int *stat) /* success/failure */
1312{
1313 int error; /* error return value */
1314 int i; /* loop counter */
1315 xfs_bmbt_key_t key; /* bmap btree key */
1316 xfs_buf_t *lbp; /* left buffer pointer */
1317 xfs_bmbt_block_t *left; /* left btree block */
1318 xfs_bmbt_key_t *lkp; /* left btree key */
1319 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1320 xfs_bmbt_rec_t *lrp; /* left record pointer */
1321 xfs_mount_t *mp; /* file system mount point */
1322 xfs_buf_t *rbp; /* right buffer pointer */
1323 xfs_bmbt_block_t *right; /* right btree block */
1324 xfs_bmbt_key_t *rkp; /* right btree key */
1325 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1326 xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */
1327 struct xfs_btree_cur *tcur; /* temporary btree cursor */
1328
1329 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1330 XFS_BMBT_TRACE_ARGI(cur, level);
1331 if (level == cur->bc_nlevels - 1) {
1332 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1333 *stat = 0;
1334 return 0;
1335 }
1336 lbp = cur->bc_bufs[level];
1337 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1338#ifdef DEBUG
1339 if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) {
1340 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1341 return error;
1342 }
1343#endif
1344 if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) {
1345 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1346 *stat = 0;
1347 return 0;
1348 }
1349 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1350 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1351 *stat = 0;
1352 return 0;
1353 }
1354 mp = cur->bc_mp;
1355 if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0,
1356 &rbp, XFS_BMAP_BTREE_REF))) {
1357 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1358 return error;
1359 }
1360 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1361 if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) {
1362 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1363 return error;
1364 }
1365 if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) {
1366 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1367 *stat = 0;
1368 return 0;
1369 }
1370 if (level > 0) {
1371 lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1372 lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1373 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1374 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1375#ifdef DEBUG
1376 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1377 if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) {
1378 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1379 return error;
1380 }
1381 }
1382#endif
1383 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1384 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1385#ifdef DEBUG
1386 if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) {
1387 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1388 return error;
1389 }
1390#endif
1391 *rkp = *lkp;
1392 *rpp = *lpp;
1393 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1394 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1395 } else {
1396 lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1397 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1398 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1399 *rrp = *lrp;
1400 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1401 key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp));
1402 rkp = &key;
1403 }
1404 be16_add_cpu(&left->bb_numrecs, -1);
1405 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS);
1406 be16_add_cpu(&right->bb_numrecs, 1);
1407#ifdef DEBUG
1408 if (level > 0)
1409 xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1);
1410 else
1411 xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1);
1412#endif
1413 xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS);
1414 if ((error = xfs_btree_dup_cursor(cur, &tcur))) {
1415 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1416 return error;
1417 }
1418 i = xfs_btree_lastrec(tcur, level);
1419 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1420 if ((error = xfs_bmbt_increment(tcur, level, &i))) {
1421 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1422 goto error1;
1423 }
1424 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1425 if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) {
1426 XFS_BMBT_TRACE_CURSOR(tcur, ERROR);
1427 goto error1;
1428 }
1429 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1430 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1431 *stat = 1;
1432 return 0;
1433error0:
1434 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1435error1:
1436 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1437 return error;
1438}
1439
1440/* 47/*
1441 * Determine the extent state. 48 * Determine the extent state.
1442 */ 49 */
@@ -1453,229 +60,15 @@ xfs_extent_state(
1453 return XFS_EXT_NORM; 60 return XFS_EXT_NORM;
1454} 61}
1455 62
1456
1457/*
1458 * Split cur/level block in half.
1459 * Return new block number and its first record (to be inserted into parent).
1460 */
1461STATIC int /* error */
1462xfs_bmbt_split(
1463 xfs_btree_cur_t *cur,
1464 int level,
1465 xfs_fsblock_t *bnop,
1466 __uint64_t *startoff,
1467 xfs_btree_cur_t **curp,
1468 int *stat) /* success/failure */
1469{
1470 xfs_alloc_arg_t args; /* block allocation args */
1471 int error; /* error return value */
1472 int i; /* loop counter */
1473 xfs_fsblock_t lbno; /* left sibling block number */
1474 xfs_buf_t *lbp; /* left buffer pointer */
1475 xfs_bmbt_block_t *left; /* left btree block */
1476 xfs_bmbt_key_t *lkp; /* left btree key */
1477 xfs_bmbt_ptr_t *lpp; /* left address pointer */
1478 xfs_bmbt_rec_t *lrp; /* left record pointer */
1479 xfs_buf_t *rbp; /* right buffer pointer */
1480 xfs_bmbt_block_t *right; /* right btree block */
1481 xfs_bmbt_key_t *rkp; /* right btree key */
1482 xfs_bmbt_ptr_t *rpp; /* right address pointer */
1483 xfs_bmbt_block_t *rrblock; /* right-right btree block */
1484 xfs_buf_t *rrbp; /* right-right buffer pointer */
1485 xfs_bmbt_rec_t *rrp; /* right record pointer */
1486
1487 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1488 XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff);
1489 args.tp = cur->bc_tp;
1490 args.mp = cur->bc_mp;
1491 lbp = cur->bc_bufs[level];
1492 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1497 if (args.fsbno == NULLFSBLOCK) {
1498 args.fsbno = lbno;
1499 args.type = XFS_ALLOCTYPE_START_BNO;
1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1516 args.mod = args.alignment = args.total = args.isfl =
1517 args.userdata = args.minalignslop = 0;
1518 args.minlen = args.maxlen = args.prod = 1;
1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
1520 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
1521 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1522 return XFS_ERROR(ENOSPC);
1523 }
1524 if ((error = xfs_alloc_vextent(&args))) {
1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1526 return error;
1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1543 if (args.fsbno == NULLFSBLOCK) {
1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1545 *stat = 0;
1546 return 0;
1547 }
1548 ASSERT(args.len == 1);
1549 cur->bc_private.b.firstblock = args.fsbno;
1550 cur->bc_private.b.allocated++;
1551 cur->bc_private.b.ip->i_d.di_nblocks++;
1552 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
1553 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
1554 XFS_TRANS_DQ_BCOUNT, 1L);
1555 rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0);
1556 right = XFS_BUF_TO_BMBT_BLOCK(rbp);
1557#ifdef DEBUG
1558 if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) {
1559 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1560 return error;
1561 }
1562#endif
1563 right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
1564 right->bb_level = left->bb_level;
1565 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1566 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1567 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1568 be16_add_cpu(&right->bb_numrecs, 1);
1569 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1570 if (level > 0) {
1571 lkp = XFS_BMAP_KEY_IADDR(left, i, cur);
1572 lpp = XFS_BMAP_PTR_IADDR(left, i, cur);
1573 rkp = XFS_BMAP_KEY_IADDR(right, 1, cur);
1574 rpp = XFS_BMAP_PTR_IADDR(right, 1, cur);
1575#ifdef DEBUG
1576 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1577 if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) {
1578 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1579 return error;
1580 }
1581 }
1582#endif
1583 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1584 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1585 xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1586 xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1587 *startoff = be64_to_cpu(rkp->br_startoff);
1588 } else {
1589 lrp = XFS_BMAP_REC_IADDR(left, i, cur);
1590 rrp = XFS_BMAP_REC_IADDR(right, 1, cur);
1591 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1592 xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1593 *startoff = xfs_bmbt_disk_get_startoff(rrp);
1594 }
1595 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1596 right->bb_rightsib = left->bb_rightsib;
1597 left->bb_rightsib = cpu_to_be64(args.fsbno);
1598 right->bb_leftsib = cpu_to_be64(lbno);
1599 xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS);
1600 xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1601 if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) {
1602 if ((error = xfs_btree_read_bufl(args.mp, args.tp,
1603 be64_to_cpu(right->bb_rightsib), 0, &rrbp,
1604 XFS_BMAP_BTREE_REF))) {
1605 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1606 return error;
1607 }
1608 rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp);
1609 if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) {
1610 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1611 return error;
1612 }
1613 rrblock->bb_leftsib = cpu_to_be64(args.fsbno);
1614 xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB);
1615 }
1616 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1617 xfs_btree_setbuf(cur, level, rbp);
1618 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1619 }
1620 if (level + 1 < cur->bc_nlevels) {
1621 if ((error = xfs_btree_dup_cursor(cur, curp))) {
1622 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1623 return error;
1624 }
1625 (*curp)->bc_ptrs[level + 1]++;
1626 }
1627 *bnop = args.fsbno;
1628 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1629 *stat = 1;
1630 return 0;
1631}
1632
1633
1634/*
1635 * Update keys for the record.
1636 */
1637STATIC int
1638xfs_bmbt_updkey(
1639 xfs_btree_cur_t *cur,
1640 xfs_bmbt_key_t *keyp, /* on-disk format */
1641 int level)
1642{
1643 xfs_bmbt_block_t *block;
1644 xfs_buf_t *bp;
1645#ifdef DEBUG
1646 int error;
1647#endif
1648 xfs_bmbt_key_t *kp;
1649 int ptr;
1650
1651 ASSERT(level >= 1);
1652 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1653 XFS_BMBT_TRACE_ARGIK(cur, level, keyp);
1654 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1655 block = xfs_bmbt_get_block(cur, level, &bp);
1656#ifdef DEBUG
1657 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1658 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1659 return error;
1660 }
1661#endif
1662 ptr = cur->bc_ptrs[level];
1663 kp = XFS_BMAP_KEY_IADDR(block, ptr, cur);
1664 *kp = *keyp;
1665 xfs_bmbt_log_keys(cur, bp, ptr, ptr);
1666 }
1667 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1668 return 0;
1669}
1670
1671/* 63/*
1672 * Convert on-disk form of btree root to in-memory form. 64 * Convert on-disk form of btree root to in-memory form.
1673 */ 65 */
1674void 66void
1675xfs_bmdr_to_bmbt( 67xfs_bmdr_to_bmbt(
68 struct xfs_mount *mp,
1676 xfs_bmdr_block_t *dblock, 69 xfs_bmdr_block_t *dblock,
1677 int dblocklen, 70 int dblocklen,
1678 xfs_bmbt_block_t *rblock, 71 struct xfs_btree_block *rblock,
1679 int rblocklen) 72 int rblocklen)
1680{ 73{
1681 int dmxr; 74 int dmxr;
@@ -1688,129 +81,19 @@ xfs_bmdr_to_bmbt(
1688 rblock->bb_level = dblock->bb_level; 81 rblock->bb_level = dblock->bb_level;
1689 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 82 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
1690 rblock->bb_numrecs = dblock->bb_numrecs; 83 rblock->bb_numrecs = dblock->bb_numrecs;
1691 rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); 84 rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
1692 rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); 85 rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
1693 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 86 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
1694 fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 87 fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
1695 tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 88 tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
1696 fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 89 fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
1697 tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 90 tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
1698 dmxr = be16_to_cpu(dblock->bb_numrecs); 91 dmxr = be16_to_cpu(dblock->bb_numrecs);
1699 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 92 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
1700 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 93 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
1701} 94}
1702 95
1703/* 96/*
1704 * Decrement cursor by one record at the level.
1705 * For nonzero levels the leaf-ward information is untouched.
1706 */
1707int /* error */
1708xfs_bmbt_decrement(
1709 xfs_btree_cur_t *cur,
1710 int level,
1711 int *stat) /* success/failure */
1712{
1713 xfs_bmbt_block_t *block;
1714 xfs_buf_t *bp;
1715 int error; /* error return value */
1716 xfs_fsblock_t fsbno;
1717 int lev;
1718 xfs_mount_t *mp;
1719 xfs_trans_t *tp;
1720
1721 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1722 XFS_BMBT_TRACE_ARGI(cur, level);
1723 ASSERT(level < cur->bc_nlevels);
1724 if (level < cur->bc_nlevels - 1)
1725 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1726 if (--cur->bc_ptrs[level] > 0) {
1727 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1728 *stat = 1;
1729 return 0;
1730 }
1731 block = xfs_bmbt_get_block(cur, level, &bp);
1732#ifdef DEBUG
1733 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
1734 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1735 return error;
1736 }
1737#endif
1738 if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) {
1739 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1740 *stat = 0;
1741 return 0;
1742 }
1743 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1744 if (--cur->bc_ptrs[lev] > 0)
1745 break;
1746 if (lev < cur->bc_nlevels - 1)
1747 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1748 }
1749 if (lev == cur->bc_nlevels) {
1750 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1751 *stat = 0;
1752 return 0;
1753 }
1754 tp = cur->bc_tp;
1755 mp = cur->bc_mp;
1756 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
1757 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
1758 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
1759 XFS_BMAP_BTREE_REF))) {
1760 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1761 return error;
1762 }
1763 lev--;
1764 xfs_btree_setbuf(cur, lev, bp);
1765 block = XFS_BUF_TO_BMBT_BLOCK(bp);
1766 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
1767 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1768 return error;
1769 }
1770 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1771 }
1772 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1773 *stat = 1;
1774 return 0;
1775}
1776
1777/*
1778 * Delete the record pointed to by cur.
1779 */
1780int /* error */
1781xfs_bmbt_delete(
1782 xfs_btree_cur_t *cur,
1783 int *stat) /* success/failure */
1784{
1785 int error; /* error return value */
1786 int i;
1787 int level;
1788
1789 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1790 for (level = 0, i = 2; i == 2; level++) {
1791 if ((error = xfs_bmbt_delrec(cur, level, &i))) {
1792 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1793 return error;
1794 }
1795 }
1796 if (i == 0) {
1797 for (level = 1; level < cur->bc_nlevels; level++) {
1798 if (cur->bc_ptrs[level] == 0) {
1799 if ((error = xfs_bmbt_decrement(cur, level,
1800 &i))) {
1801 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1802 return error;
1803 }
1804 break;
1805 }
1806 }
1807 }
1808 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1809 *stat = i;
1810 return 0;
1811}
1812
1813/*
1814 * Convert a compressed bmap extent record to an uncompressed form. 97 * Convert a compressed bmap extent record to an uncompressed form.
1815 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
1816 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
@@ -1864,31 +147,6 @@ xfs_bmbt_get_all(
1864} 147}
1865 148
1866/* 149/*
1867 * Get the block pointer for the given level of the cursor.
1868 * Fill in the buffer pointer, if applicable.
1869 */
1870xfs_bmbt_block_t *
1871xfs_bmbt_get_block(
1872 xfs_btree_cur_t *cur,
1873 int level,
1874 xfs_buf_t **bpp)
1875{
1876 xfs_ifork_t *ifp;
1877 xfs_bmbt_block_t *rval;
1878
1879 if (level < cur->bc_nlevels - 1) {
1880 *bpp = cur->bc_bufs[level];
1881 rval = XFS_BUF_TO_BMBT_BLOCK(*bpp);
1882 } else {
1883 *bpp = NULL;
1884 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
1885 cur->bc_private.b.whichfork);
1886 rval = ifp->if_broot;
1887 }
1888 return rval;
1889}
1890
1891/*
1892 * Extract the blockcount field from an in memory bmap extent record. 150 * Extract the blockcount field from an in memory bmap extent record.
1893 */ 151 */
1894xfs_filblks_t 152xfs_filblks_t
@@ -1950,7 +208,8 @@ xfs_bmbt_disk_get_all(
1950 xfs_bmbt_rec_t *r, 208 xfs_bmbt_rec_t *r,
1951 xfs_bmbt_irec_t *s) 209 xfs_bmbt_irec_t *s)
1952{ 210{
1953 __xfs_bmbt_get_all(be64_to_cpu(r->l0), be64_to_cpu(r->l1), s); 211 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
212 get_unaligned_be64(&r->l1), s);
1954} 213}
1955 214
1956/* 215/*
@@ -1974,348 +233,6 @@ xfs_bmbt_disk_get_startoff(
1974 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 233 XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
1975} 234}
1976 235
1977/*
1978 * Increment cursor by one record at the level.
1979 * For nonzero levels the leaf-ward information is untouched.
1980 */
1981int /* error */
1982xfs_bmbt_increment(
1983 xfs_btree_cur_t *cur,
1984 int level,
1985 int *stat) /* success/failure */
1986{
1987 xfs_bmbt_block_t *block;
1988 xfs_buf_t *bp;
1989 int error; /* error return value */
1990 xfs_fsblock_t fsbno;
1991 int lev;
1992 xfs_mount_t *mp;
1993 xfs_trans_t *tp;
1994
1995 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
1996 XFS_BMBT_TRACE_ARGI(cur, level);
1997 ASSERT(level < cur->bc_nlevels);
1998 if (level < cur->bc_nlevels - 1)
1999 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
2000 block = xfs_bmbt_get_block(cur, level, &bp);
2001#ifdef DEBUG
2002 if ((error = xfs_btree_check_lblock(cur, block, level, bp))) {
2003 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2004 return error;
2005 }
2006#endif
2007 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
2008 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2009 *stat = 1;
2010 return 0;
2011 }
2012 if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) {
2013 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2014 *stat = 0;
2015 return 0;
2016 }
2017 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
2018 block = xfs_bmbt_get_block(cur, lev, &bp);
2019#ifdef DEBUG
2020 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2021 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2022 return error;
2023 }
2024#endif
2025 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
2026 break;
2027 if (lev < cur->bc_nlevels - 1)
2028 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
2029 }
2030 if (lev == cur->bc_nlevels) {
2031 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2032 *stat = 0;
2033 return 0;
2034 }
2035 tp = cur->bc_tp;
2036 mp = cur->bc_mp;
2037 for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) {
2038 fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur));
2039 if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp,
2040 XFS_BMAP_BTREE_REF))) {
2041 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2042 return error;
2043 }
2044 lev--;
2045 xfs_btree_setbuf(cur, lev, bp);
2046 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2047 if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) {
2048 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2049 return error;
2050 }
2051 cur->bc_ptrs[lev] = 1;
2052 }
2053 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2054 *stat = 1;
2055 return 0;
2056}
2057
2058/*
2059 * Insert the current record at the point referenced by cur.
2060 *
2061 * A multi-level split of the tree on insert will invalidate the original
2062 * cursor. All callers of this function should assume that the cursor is
2063 * no longer valid and revalidate it.
2064 */
2065int /* error */
2066xfs_bmbt_insert(
2067 xfs_btree_cur_t *cur,
2068 int *stat) /* success/failure */
2069{
2070 int error; /* error return value */
2071 int i;
2072 int level;
2073 xfs_fsblock_t nbno;
2074 xfs_btree_cur_t *ncur;
2075 xfs_bmbt_rec_t nrec;
2076 xfs_btree_cur_t *pcur;
2077
2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2079 level = 0;
2080 nbno = NULLFSBLOCK;
2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2082 ncur = NULL;
2083 pcur = cur;
2084 do {
2085 if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur,
2086 &i))) {
2087 if (pcur != cur)
2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2091 }
2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2094 cur->bc_nlevels = pcur->bc_nlevels;
2095 cur->bc_private.b.allocated +=
2096 pcur->bc_private.b.allocated;
2097 pcur->bc_private.b.allocated = 0;
2098 ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) ||
2099 XFS_IS_REALTIME_INODE(cur->bc_private.b.ip));
2100 cur->bc_private.b.firstblock =
2101 pcur->bc_private.b.firstblock;
2102 ASSERT(cur->bc_private.b.flist ==
2103 pcur->bc_private.b.flist);
2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2105 }
2106 if (ncur) {
2107 pcur = ncur;
2108 ncur = NULL;
2109 }
2110 } while (nbno != NULLFSBLOCK);
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i;
2113 return 0;
2114error0:
2115 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2116 return error;
2117}
2118
2119/*
2120 * Log fields from the btree block header.
2121 */
2122void
2123xfs_bmbt_log_block(
2124 xfs_btree_cur_t *cur,
2125 xfs_buf_t *bp,
2126 int fields)
2127{
2128 int first;
2129 int last;
2130 xfs_trans_t *tp;
2131 static const short offsets[] = {
2132 offsetof(xfs_bmbt_block_t, bb_magic),
2133 offsetof(xfs_bmbt_block_t, bb_level),
2134 offsetof(xfs_bmbt_block_t, bb_numrecs),
2135 offsetof(xfs_bmbt_block_t, bb_leftsib),
2136 offsetof(xfs_bmbt_block_t, bb_rightsib),
2137 sizeof(xfs_bmbt_block_t)
2138 };
2139
2140 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2141 XFS_BMBT_TRACE_ARGBI(cur, bp, fields);
2142 tp = cur->bc_tp;
2143 if (bp) {
2144 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first,
2145 &last);
2146 xfs_trans_log_buf(tp, bp, first, last);
2147 } else
2148 xfs_trans_log_inode(tp, cur->bc_private.b.ip,
2149 XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
2150 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2151}
2152
2153/*
2154 * Log record values from the btree block.
2155 */
2156void
2157xfs_bmbt_log_recs(
2158 xfs_btree_cur_t *cur,
2159 xfs_buf_t *bp,
2160 int rfirst,
2161 int rlast)
2162{
2163 xfs_bmbt_block_t *block;
2164 int first;
2165 int last;
2166 xfs_bmbt_rec_t *rp;
2167 xfs_trans_t *tp;
2168
2169 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2170 XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast);
2171 ASSERT(bp);
2172 tp = cur->bc_tp;
2173 block = XFS_BUF_TO_BMBT_BLOCK(bp);
2174 rp = XFS_BMAP_REC_DADDR(block, 1, cur);
2175 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
2176 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
2177 xfs_trans_log_buf(tp, bp, first, last);
2178 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2179}
2180
2181int /* error */
2182xfs_bmbt_lookup_eq(
2183 xfs_btree_cur_t *cur,
2184 xfs_fileoff_t off,
2185 xfs_fsblock_t bno,
2186 xfs_filblks_t len,
2187 int *stat) /* success/failure */
2188{
2189 cur->bc_rec.b.br_startoff = off;
2190 cur->bc_rec.b.br_startblock = bno;
2191 cur->bc_rec.b.br_blockcount = len;
2192 return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat);
2193}
2194
2195int /* error */
2196xfs_bmbt_lookup_ge(
2197 xfs_btree_cur_t *cur,
2198 xfs_fileoff_t off,
2199 xfs_fsblock_t bno,
2200 xfs_filblks_t len,
2201 int *stat) /* success/failure */
2202{
2203 cur->bc_rec.b.br_startoff = off;
2204 cur->bc_rec.b.br_startblock = bno;
2205 cur->bc_rec.b.br_blockcount = len;
2206 return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat);
2207}
2208
2209/*
2210 * Give the bmap btree a new root block. Copy the old broot contents
2211 * down into a real block and make the broot point to it.
2212 */
2213int /* error */
2214xfs_bmbt_newroot(
2215 xfs_btree_cur_t *cur, /* btree cursor */
2216 int *logflags, /* logging flags for inode */
2217 int *stat) /* return status - 0 fail */
2218{
2219 xfs_alloc_arg_t args; /* allocation arguments */
2220 xfs_bmbt_block_t *block; /* bmap btree block */
2221 xfs_buf_t *bp; /* buffer for block */
2222 xfs_bmbt_block_t *cblock; /* child btree block */
2223 xfs_bmbt_key_t *ckp; /* child key pointer */
2224 xfs_bmbt_ptr_t *cpp; /* child ptr pointer */
2225 int error; /* error return code */
2226#ifdef DEBUG
2227 int i; /* loop counter */
2228#endif
2229 xfs_bmbt_key_t *kp; /* pointer to bmap btree key */
2230 int level; /* btree level */
2231 xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */
2232
2233 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2234 level = cur->bc_nlevels - 1;
2235 block = xfs_bmbt_get_block(cur, level, &bp);
2236 /*
2237 * Copy the root into a real block.
2238 */
2239 args.mp = cur->bc_mp;
2240 pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
2241 args.tp = cur->bc_tp;
2242 args.fsbno = cur->bc_private.b.firstblock;
2243 args.mod = args.minleft = args.alignment = args.total = args.isfl =
2244 args.userdata = args.minalignslop = 0;
2245 args.minlen = args.maxlen = args.prod = 1;
2246 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2247 args.firstblock = args.fsbno;
2248 if (args.fsbno == NULLFSBLOCK) {
2249#ifdef DEBUG
2250 if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) {
2251 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2252 return error;
2253 }
2254#endif
2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2261 if ((error = xfs_alloc_vextent(&args))) {
2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2263 return error;
2264 }
2265 if (args.fsbno == NULLFSBLOCK) {
2266 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2267 *stat = 0;
2268 return 0;
2269 }
2270 ASSERT(args.len == 1);
2271 cur->bc_private.b.firstblock = args.fsbno;
2272 cur->bc_private.b.allocated++;
2273 cur->bc_private.b.ip->i_d.di_nblocks++;
2274 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
2275 XFS_TRANS_DQ_BCOUNT, 1L);
2276 bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0);
2277 cblock = XFS_BUF_TO_BMBT_BLOCK(bp);
2278 *cblock = *block;
2279 be16_add_cpu(&block->bb_level, 1);
2280 block->bb_numrecs = cpu_to_be16(1);
2281 cur->bc_nlevels++;
2282 cur->bc_ptrs[level + 1] = 1;
2283 kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
2284 ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur);
2285 memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp));
2286 cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur);
2287#ifdef DEBUG
2288 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2289 if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) {
2290 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2291 return error;
2292 }
2293 }
2294#endif
2295 memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp));
2296#ifdef DEBUG
2297 if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) {
2298 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2299 return error;
2300 }
2301#endif
2302 *pp = cpu_to_be64(args.fsbno);
2303 xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs),
2304 cur->bc_private.b.whichfork);
2305 xfs_btree_setbuf(cur, level, bp);
2306 /*
2307 * Do all this logging at the end so that
2308 * the root is at the right level.
2309 */
2310 xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS);
2311 xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2312 xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs));
2313 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2314 *logflags |=
2315 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2316 *stat = 1;
2317 return 0;
2318}
2319 236
2320/* 237/*
2321 * Set all the fields in a bmap extent record from the arguments. 238 * Set all the fields in a bmap extent record from the arguments.
@@ -2512,7 +429,8 @@ xfs_bmbt_set_state(
2512 */ 429 */
2513void 430void
2514xfs_bmbt_to_bmdr( 431xfs_bmbt_to_bmdr(
2515 xfs_bmbt_block_t *rblock, 432 struct xfs_mount *mp,
433 struct xfs_btree_block *rblock,
2516 int rblocklen, 434 int rblocklen,
2517 xfs_bmdr_block_t *dblock, 435 xfs_bmdr_block_t *dblock,
2518 int dblocklen) 436 int dblocklen)
@@ -2524,67 +442,22 @@ xfs_bmbt_to_bmdr(
2524 __be64 *tpp; 442 __be64 *tpp;
2525 443
2526 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); 444 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
2527 ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); 445 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
2528 ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO); 446 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
2529 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 447 ASSERT(be16_to_cpu(rblock->bb_level) > 0);
2530 dblock->bb_level = rblock->bb_level; 448 dblock->bb_level = rblock->bb_level;
2531 dblock->bb_numrecs = rblock->bb_numrecs; 449 dblock->bb_numrecs = rblock->bb_numrecs;
2532 dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); 450 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
2533 fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); 451 fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
2534 tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); 452 tkp = XFS_BMDR_KEY_ADDR(dblock, 1);
2535 fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); 453 fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen);
2536 tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); 454 tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr);
2537 dmxr = be16_to_cpu(dblock->bb_numrecs); 455 dmxr = be16_to_cpu(dblock->bb_numrecs);
2538 memcpy(tkp, fkp, sizeof(*fkp) * dmxr); 456 memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
2539 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 457 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
2540} 458}
2541 459
2542/* 460/*
2543 * Update the record to the passed values.
2544 */
2545int
2546xfs_bmbt_update(
2547 xfs_btree_cur_t *cur,
2548 xfs_fileoff_t off,
2549 xfs_fsblock_t bno,
2550 xfs_filblks_t len,
2551 xfs_exntst_t state)
2552{
2553 xfs_bmbt_block_t *block;
2554 xfs_buf_t *bp;
2555 int error;
2556 xfs_bmbt_key_t key;
2557 int ptr;
2558 xfs_bmbt_rec_t *rp;
2559
2560 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2561 XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno,
2562 (xfs_dfilblks_t)len, (int)state);
2563 block = xfs_bmbt_get_block(cur, 0, &bp);
2564#ifdef DEBUG
2565 if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) {
2566 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2567 return error;
2568 }
2569#endif
2570 ptr = cur->bc_ptrs[0];
2571 rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
2572 xfs_bmbt_disk_set_allf(rp, off, bno, len, state);
2573 xfs_bmbt_log_recs(cur, bp, ptr, ptr);
2574 if (ptr > 1) {
2575 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2576 return 0;
2577 }
2578 key.br_startoff = cpu_to_be64(off);
2579 if ((error = xfs_bmbt_updkey(cur, &key, 1))) {
2580 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2581 return error;
2582 }
2583 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2584 return 0;
2585}
2586
2587/*
2588 * Check extent records, which have just been read, for 461 * Check extent records, which have just been read, for
2589 * any bit in the extent flag field. ASSERT on debug 462 * any bit in the extent flag field. ASSERT on debug
2590 * kernels, as this condition should not occur. 463 * kernels, as this condition should not occur.
@@ -2608,3 +481,451 @@ xfs_check_nostate_extents(
2608 } 481 }
2609 return 0; 482 return 0;
2610} 483}
484
485
486STATIC struct xfs_btree_cur *
487xfs_bmbt_dup_cursor(
488 struct xfs_btree_cur *cur)
489{
490 struct xfs_btree_cur *new;
491
492 new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
493 cur->bc_private.b.ip, cur->bc_private.b.whichfork);
494
495 /*
496 * Copy the firstblock, flist, and flags values,
497 * since init cursor doesn't get them.
498 */
499 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
500 new->bc_private.b.flist = cur->bc_private.b.flist;
501 new->bc_private.b.flags = cur->bc_private.b.flags;
502
503 return new;
504}
505
506STATIC void
507xfs_bmbt_update_cursor(
508 struct xfs_btree_cur *src,
509 struct xfs_btree_cur *dst)
510{
511 ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) ||
512 (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME));
513 ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist);
514
515 dst->bc_private.b.allocated += src->bc_private.b.allocated;
516 dst->bc_private.b.firstblock = src->bc_private.b.firstblock;
517
518 src->bc_private.b.allocated = 0;
519}
520
521STATIC int
522xfs_bmbt_alloc_block(
523 struct xfs_btree_cur *cur,
524 union xfs_btree_ptr *start,
525 union xfs_btree_ptr *new,
526 int length,
527 int *stat)
528{
529 xfs_alloc_arg_t args; /* block allocation args */
530 int error; /* error return value */
531
532 memset(&args, 0, sizeof(args));
533 args.tp = cur->bc_tp;
534 args.mp = cur->bc_mp;
535 args.fsbno = cur->bc_private.b.firstblock;
536 args.firstblock = args.fsbno;
537
538 if (args.fsbno == NULLFSBLOCK) {
539 args.fsbno = be64_to_cpu(start->l);
540 args.type = XFS_ALLOCTYPE_START_BNO;
541 /*
542 * Make sure there is sufficient room left in the AG to
543 * complete a full tree split for an extent insert. If
544 * we are converting the middle part of an extent then
545 * we may need space for two tree splits.
546 *
547 * We are relying on the caller to make the correct block
548 * reservation for this operation to succeed. If the
549 * reservation amount is insufficient then we may fail a
550 * block allocation here and corrupt the filesystem.
551 */
552 args.minleft = xfs_trans_get_block_res(args.tp);
553 } else if (cur->bc_private.b.flist->xbf_low) {
554 args.type = XFS_ALLOCTYPE_START_BNO;
555 } else {
556 args.type = XFS_ALLOCTYPE_NEAR_BNO;
557 }
558
559 args.minlen = args.maxlen = args.prod = 1;
560 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
561 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
562 error = XFS_ERROR(ENOSPC);
563 goto error0;
564 }
565 error = xfs_alloc_vextent(&args);
566 if (error)
567 goto error0;
568
569 if (args.fsbno == NULLFSBLOCK && args.minleft) {
570 /*
571 * Could not find an AG with enough free space to satisfy
572 * a full btree split. Try again without minleft and if
573 * successful activate the lowspace algorithm.
574 */
575 args.fsbno = 0;
576 args.type = XFS_ALLOCTYPE_FIRST_AG;
577 args.minleft = 0;
578 error = xfs_alloc_vextent(&args);
579 if (error)
580 goto error0;
581 cur->bc_private.b.flist->xbf_low = 1;
582 }
583 if (args.fsbno == NULLFSBLOCK) {
584 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
585 *stat = 0;
586 return 0;
587 }
588 ASSERT(args.len == 1);
589 cur->bc_private.b.firstblock = args.fsbno;
590 cur->bc_private.b.allocated++;
591 cur->bc_private.b.ip->i_d.di_nblocks++;
592 xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
593 XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
594 XFS_TRANS_DQ_BCOUNT, 1L);
595
596 new->l = cpu_to_be64(args.fsbno);
597
598 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
599 *stat = 1;
600 return 0;
601
602 error0:
603 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
604 return error;
605}
606
607STATIC int
608xfs_bmbt_free_block(
609 struct xfs_btree_cur *cur,
610 struct xfs_buf *bp)
611{
612 struct xfs_mount *mp = cur->bc_mp;
613 struct xfs_inode *ip = cur->bc_private.b.ip;
614 struct xfs_trans *tp = cur->bc_tp;
615 xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp));
616
617 xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp);
618 ip->i_d.di_nblocks--;
619
620 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
621 XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
622 xfs_trans_binval(tp, bp);
623 return 0;
624}
625
626STATIC int
627xfs_bmbt_get_minrecs(
628 struct xfs_btree_cur *cur,
629 int level)
630{
631 if (level == cur->bc_nlevels - 1) {
632 struct xfs_ifork *ifp;
633
634 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
635 cur->bc_private.b.whichfork);
636
637 return xfs_bmbt_maxrecs(cur->bc_mp,
638 ifp->if_broot_bytes, level == 0) / 2;
639 }
640
641 return cur->bc_mp->m_bmap_dmnr[level != 0];
642}
643
644int
645xfs_bmbt_get_maxrecs(
646 struct xfs_btree_cur *cur,
647 int level)
648{
649 if (level == cur->bc_nlevels - 1) {
650 struct xfs_ifork *ifp;
651
652 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip,
653 cur->bc_private.b.whichfork);
654
655 return xfs_bmbt_maxrecs(cur->bc_mp,
656 ifp->if_broot_bytes, level == 0);
657 }
658
659 return cur->bc_mp->m_bmap_dmxr[level != 0];
660
661}
662
663/*
664 * Get the maximum records we could store in the on-disk format.
665 *
666 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
667 * for the root node this checks the available space in the dinode fork
668 * so that we can resize the in-memory buffer to match it. After a
669 * resize to the maximum size this function returns the same value
670 * as xfs_bmbt_get_maxrecs for the root node, too.
671 */
672STATIC int
673xfs_bmbt_get_dmaxrecs(
674 struct xfs_btree_cur *cur,
675 int level)
676{
677 if (level != cur->bc_nlevels - 1)
678 return cur->bc_mp->m_bmap_dmxr[level != 0];
679 return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize,
680 level == 0);
681}
682
683STATIC void
684xfs_bmbt_init_key_from_rec(
685 union xfs_btree_key *key,
686 union xfs_btree_rec *rec)
687{
688 key->bmbt.br_startoff =
689 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt));
690}
691
692STATIC void
693xfs_bmbt_init_rec_from_key(
694 union xfs_btree_key *key,
695 union xfs_btree_rec *rec)
696{
697 ASSERT(key->bmbt.br_startoff != 0);
698
699 xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff),
700 0, 0, XFS_EXT_NORM);
701}
702
703STATIC void
704xfs_bmbt_init_rec_from_cur(
705 struct xfs_btree_cur *cur,
706 union xfs_btree_rec *rec)
707{
708 xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
709}
710
711STATIC void
712xfs_bmbt_init_ptr_from_cur(
713 struct xfs_btree_cur *cur,
714 union xfs_btree_ptr *ptr)
715{
716 ptr->l = 0;
717}
718
719STATIC __int64_t
720xfs_bmbt_key_diff(
721 struct xfs_btree_cur *cur,
722 union xfs_btree_key *key)
723{
724 return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) -
725 cur->bc_rec.b.br_startoff;
726}
727
728#ifdef DEBUG
729STATIC int
730xfs_bmbt_keys_inorder(
731 struct xfs_btree_cur *cur,
732 union xfs_btree_key *k1,
733 union xfs_btree_key *k2)
734{
735 return be64_to_cpu(k1->bmbt.br_startoff) <
736 be64_to_cpu(k2->bmbt.br_startoff);
737}
738
739STATIC int
740xfs_bmbt_recs_inorder(
741 struct xfs_btree_cur *cur,
742 union xfs_btree_rec *r1,
743 union xfs_btree_rec *r2)
744{
745 return xfs_bmbt_disk_get_startoff(&r1->bmbt) +
746 xfs_bmbt_disk_get_blockcount(&r1->bmbt) <=
747 xfs_bmbt_disk_get_startoff(&r2->bmbt);
748}
749#endif /* DEBUG */
750
751#ifdef XFS_BTREE_TRACE
752ktrace_t *xfs_bmbt_trace_buf;
753
754STATIC void
755xfs_bmbt_trace_enter(
756 struct xfs_btree_cur *cur,
757 const char *func,
758 char *s,
759 int type,
760 int line,
761 __psunsigned_t a0,
762 __psunsigned_t a1,
763 __psunsigned_t a2,
764 __psunsigned_t a3,
765 __psunsigned_t a4,
766 __psunsigned_t a5,
767 __psunsigned_t a6,
768 __psunsigned_t a7,
769 __psunsigned_t a8,
770 __psunsigned_t a9,
771 __psunsigned_t a10)
772{
773 struct xfs_inode *ip = cur->bc_private.b.ip;
774 int whichfork = cur->bc_private.b.whichfork;
775
776 ktrace_enter(xfs_bmbt_trace_buf,
777 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
778 (void *)func, (void *)s, (void *)ip, (void *)cur,
779 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
780 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
781 (void *)a8, (void *)a9, (void *)a10);
782 ktrace_enter(ip->i_btrace,
783 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
784 (void *)func, (void *)s, (void *)ip, (void *)cur,
785 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
786 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
787 (void *)a8, (void *)a9, (void *)a10);
788}
789
790STATIC void
791xfs_bmbt_trace_cursor(
792 struct xfs_btree_cur *cur,
793 __uint32_t *s0,
794 __uint64_t *l0,
795 __uint64_t *l1)
796{
797 struct xfs_bmbt_rec_host r;
798
799 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
800
801 *s0 = (cur->bc_nlevels << 24) |
802 (cur->bc_private.b.flags << 16) |
803 cur->bc_private.b.allocated;
804 *l0 = r.l0;
805 *l1 = r.l1;
806}
807
808STATIC void
809xfs_bmbt_trace_key(
810 struct xfs_btree_cur *cur,
811 union xfs_btree_key *key,
812 __uint64_t *l0,
813 __uint64_t *l1)
814{
815 *l0 = be64_to_cpu(key->bmbt.br_startoff);
816 *l1 = 0;
817}
818
819STATIC void
820xfs_bmbt_trace_record(
821 struct xfs_btree_cur *cur,
822 union xfs_btree_rec *rec,
823 __uint64_t *l0,
824 __uint64_t *l1,
825 __uint64_t *l2)
826{
827 struct xfs_bmbt_irec irec;
828
829 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
830 *l0 = irec.br_startoff;
831 *l1 = irec.br_startblock;
832 *l2 = irec.br_blockcount;
833}
834#endif /* XFS_BTREE_TRACE */
835
836static const struct xfs_btree_ops xfs_bmbt_ops = {
837 .rec_len = sizeof(xfs_bmbt_rec_t),
838 .key_len = sizeof(xfs_bmbt_key_t),
839
840 .dup_cursor = xfs_bmbt_dup_cursor,
841 .update_cursor = xfs_bmbt_update_cursor,
842 .alloc_block = xfs_bmbt_alloc_block,
843 .free_block = xfs_bmbt_free_block,
844 .get_maxrecs = xfs_bmbt_get_maxrecs,
845 .get_minrecs = xfs_bmbt_get_minrecs,
846 .get_dmaxrecs = xfs_bmbt_get_dmaxrecs,
847 .init_key_from_rec = xfs_bmbt_init_key_from_rec,
848 .init_rec_from_key = xfs_bmbt_init_rec_from_key,
849 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
850 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
851 .key_diff = xfs_bmbt_key_diff,
852
853#ifdef DEBUG
854 .keys_inorder = xfs_bmbt_keys_inorder,
855 .recs_inorder = xfs_bmbt_recs_inorder,
856#endif
857
858#ifdef XFS_BTREE_TRACE
859 .trace_enter = xfs_bmbt_trace_enter,
860 .trace_cursor = xfs_bmbt_trace_cursor,
861 .trace_key = xfs_bmbt_trace_key,
862 .trace_record = xfs_bmbt_trace_record,
863#endif
864};
865
866/*
867 * Allocate a new bmap btree cursor.
868 */
869struct xfs_btree_cur * /* new bmap btree cursor */
870xfs_bmbt_init_cursor(
871 struct xfs_mount *mp, /* file system mount point */
872 struct xfs_trans *tp, /* transaction pointer */
873 struct xfs_inode *ip, /* inode owning the btree */
874 int whichfork) /* data or attr fork */
875{
876 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
877 struct xfs_btree_cur *cur;
878
879 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
880
881 cur->bc_tp = tp;
882 cur->bc_mp = mp;
883 cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
884 cur->bc_btnum = XFS_BTNUM_BMAP;
885 cur->bc_blocklog = mp->m_sb.sb_blocklog;
886
887 cur->bc_ops = &xfs_bmbt_ops;
888 cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
889
890 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
891 cur->bc_private.b.ip = ip;
892 cur->bc_private.b.firstblock = NULLFSBLOCK;
893 cur->bc_private.b.flist = NULL;
894 cur->bc_private.b.allocated = 0;
895 cur->bc_private.b.flags = 0;
896 cur->bc_private.b.whichfork = whichfork;
897
898 return cur;
899}
900
901/*
902 * Calculate number of records in a bmap btree block.
903 */
904int
905xfs_bmbt_maxrecs(
906 struct xfs_mount *mp,
907 int blocklen,
908 int leaf)
909{
910 blocklen -= XFS_BMBT_BLOCK_LEN(mp);
911
912 if (leaf)
913 return blocklen / sizeof(xfs_bmbt_rec_t);
914 return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
915}
916
917/*
918 * Calculate number of records in a bmap btree inode root.
919 */
920int
921xfs_bmdr_maxrecs(
922 struct xfs_mount *mp,
923 int blocklen,
924 int leaf)
925{
926 blocklen -= sizeof(xfs_bmdr_block_t);
927
928 if (leaf)
929 return blocklen / sizeof(xfs_bmdr_rec_t);
930 return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
931}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index cd0d4b4bb816..a4555abb6622 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -21,9 +21,10 @@
21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ 21#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */
22 22
23struct xfs_btree_cur; 23struct xfs_btree_cur;
24struct xfs_btree_lblock; 24struct xfs_btree_block;
25struct xfs_mount; 25struct xfs_mount;
26struct xfs_inode; 26struct xfs_inode;
27struct xfs_trans;
27 28
28/* 29/*
29 * Bmap root header, on-disk form only. 30 * Bmap root header, on-disk form only.
@@ -145,71 +146,60 @@ typedef struct xfs_bmbt_key {
145/* btree pointer type */ 146/* btree pointer type */
146typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; 147typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
147 148
148/* btree block header type */ 149/*
149typedef struct xfs_btree_lblock xfs_bmbt_block_t; 150 * Btree block header size depends on a superblock flag.
150 151 *
151#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) 152 * (not quite yet, but soon)
152 153 */
153#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) 154#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN
154#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ 155
155 ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ 156#define XFS_BMBT_REC_ADDR(mp, block, index) \
156 (cur)->bc_private.b.whichfork)->if_broot_bytes) 157 ((xfs_bmbt_rec_t *) \
157 158 ((char *)(block) + \
158#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ 159 XFS_BMBT_BLOCK_LEN(mp) + \
159 (((lev) == (cur)->bc_nlevels - 1 ? \ 160 ((index) - 1) * sizeof(xfs_bmbt_rec_t)))
160 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ 161
161 xfs_bmdr, (lev) == 0) : \ 162#define XFS_BMBT_KEY_ADDR(mp, block, index) \
162 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 163 ((xfs_bmbt_key_t *) \
163#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ 164 ((char *)(block) + \
164 (((lev) == (cur)->bc_nlevels - 1 ? \ 165 XFS_BMBT_BLOCK_LEN(mp) + \
165 XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 166 ((index) - 1) * sizeof(xfs_bmbt_key_t)))
166 xfs_bmbt, (lev) == 0) : \ 167
167 ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) 168#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \
168 169 ((xfs_bmbt_ptr_t *) \
169#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ 170 ((char *)(block) + \
170 (((lev) == (cur)->bc_nlevels - 1 ? \ 171 XFS_BMBT_BLOCK_LEN(mp) + \
171 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\ 172 (maxrecs) * sizeof(xfs_bmbt_key_t) + \
172 xfs_bmdr, (lev) == 0) : \ 173 ((index) - 1) * sizeof(xfs_bmbt_ptr_t)))
173 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 174
174#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ 175#define XFS_BMDR_REC_ADDR(block, index) \
175 (((lev) == (cur)->bc_nlevels - 1 ? \ 176 ((xfs_bmdr_rec_t *) \
176 XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ 177 ((char *)(block) + \
177 xfs_bmbt, (lev) == 0) : \ 178 sizeof(struct xfs_bmdr_block) + \
178 ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) 179 ((index) - 1) * sizeof(xfs_bmdr_rec_t)))
179 180
180#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 181#define XFS_BMDR_KEY_ADDR(block, index) \
181 182 ((xfs_bmdr_key_t *) \
182#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) 183 ((char *)(block) + \
183 184 sizeof(struct xfs_bmdr_block) + \
184#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ 185 ((index) - 1) * sizeof(xfs_bmdr_key_t)))
185 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 186
186 187#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \
187#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ 188 ((xfs_bmdr_ptr_t *) \
188 (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) 189 ((char *)(block) + \
189 190 sizeof(struct xfs_bmdr_block) + \
190#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ 191 (maxrecs) * sizeof(xfs_bmdr_key_t) + \
191 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ 192 ((index) - 1) * sizeof(xfs_bmdr_ptr_t)))
192 be16_to_cpu((bb)->bb_level), cur)))
193#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
194 (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
195 be16_to_cpu((bb)->bb_level), cur)))
196 193
197/* 194/*
198 * These are to be used when we know the size of the block and 195 * These are to be used when we know the size of the block and
199 * we don't have a cursor. 196 * we don't have a cursor.
200 */ 197 */
201#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ 198#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
202 (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) 199 XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
203#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
204 (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
205#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
206 (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
207
208#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
209#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
210 200
211#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ 201#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
212 (int)(sizeof(xfs_bmbt_block_t) + \ 202 (int)(XFS_BTREE_LBLOCK_LEN + \
213 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) 203 ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
214 204
215#define XFS_BMAP_BROOT_SPACE(bb) \ 205#define XFS_BMAP_BROOT_SPACE(bb) \
@@ -223,42 +213,12 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
223 */ 213 */
224#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) 214#define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)])
225 215
226#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \
227 (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \
228 be16_to_cpu((bb)->bb_level) == level && \
229 be16_to_cpu((bb)->bb_numrecs) > 0 && \
230 be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0])
231
232
233#ifdef __KERNEL__
234
235#if defined(XFS_BMBT_TRACE)
236/*
237 * Trace buffer entry types.
238 */
239#define XFS_BMBT_KTRACE_ARGBI 1
240#define XFS_BMBT_KTRACE_ARGBII 2
241#define XFS_BMBT_KTRACE_ARGFFFI 3
242#define XFS_BMBT_KTRACE_ARGI 4
243#define XFS_BMBT_KTRACE_ARGIFK 5
244#define XFS_BMBT_KTRACE_ARGIFR 6
245#define XFS_BMBT_KTRACE_ARGIK 7
246#define XFS_BMBT_KTRACE_CUR 8
247
248#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
249#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
250extern ktrace_t *xfs_bmbt_trace_buf;
251#endif
252
253/* 216/*
254 * Prototypes for xfs_bmap.c to call. 217 * Prototypes for xfs_bmap.c to call.
255 */ 218 */
256extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); 219extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
257extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *); 220 struct xfs_btree_block *, int);
258extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *);
259extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 221extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
260extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur,
261 int, struct xfs_buf **bpp);
262extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); 222extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
263extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); 223extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r);
264extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); 224extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r);
@@ -268,22 +228,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
268extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); 228extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
269extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); 229extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
270 230
271extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *);
272extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *);
273extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
274extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int,
275 int);
276extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t,
277 xfs_fsblock_t, xfs_filblks_t, int *);
278extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t,
279 xfs_fsblock_t, xfs_filblks_t, int *);
280
281/*
282 * Give the bmap btree a new root block. Copy the old broot contents
283 * down into a real block and make the broot point to it.
284 */
285extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat);
286
287extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); 231extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
288extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, 232extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o,
289 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 233 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
@@ -296,10 +240,15 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
296extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, 240extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o,
297 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); 241 xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v);
298 242
299extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); 243extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,
300extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, 244 xfs_bmdr_block_t *, int);
301 xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); 245
246extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
247extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
248extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
249
250extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
251 struct xfs_trans *, struct xfs_inode *, int);
302 252
303#endif /* __KERNEL__ */
304 253
305#endif /* __XFS_BMAP_BTREE_H__ */ 254#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index cc593a84c345..7ed59267420d 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -34,7 +34,9 @@
34#include "xfs_attr_sf.h" 34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_inode_item.h"
37#include "xfs_btree.h" 38#include "xfs_btree.h"
39#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
39#include "xfs_error.h" 41#include "xfs_error.h"
40 42
@@ -50,135 +52,33 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC 52 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51}; 53};
52 54
53/*
54 * Checking routine: return maxrecs for the block.
55 */
56STATIC int /* number of records fitting in block */
57xfs_btree_maxrecs(
58 xfs_btree_cur_t *cur, /* btree cursor */
59 xfs_btree_block_t *block) /* generic btree block pointer */
60{
61 switch (cur->bc_btnum) {
62 case XFS_BTNUM_BNO:
63 case XFS_BTNUM_CNT:
64 return (int)XFS_ALLOC_BLOCK_MAXRECS(
65 be16_to_cpu(block->bb_h.bb_level), cur);
66 case XFS_BTNUM_BMAP:
67 return (int)XFS_BMAP_BLOCK_IMAXRECS(
68 be16_to_cpu(block->bb_h.bb_level), cur);
69 case XFS_BTNUM_INO:
70 return (int)XFS_INOBT_BLOCK_MAXRECS(
71 be16_to_cpu(block->bb_h.bb_level), cur);
72 default:
73 ASSERT(0);
74 return 0;
75 }
76}
77
78/*
79 * External routines.
80 */
81
82#ifdef DEBUG
83/*
84 * Debug routine: check that block header is ok.
85 */
86void
87xfs_btree_check_block(
88 xfs_btree_cur_t *cur, /* btree cursor */
89 xfs_btree_block_t *block, /* generic btree block pointer */
90 int level, /* level of the btree block */
91 xfs_buf_t *bp) /* buffer containing block, if any */
92{
93 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
94 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
95 bp);
96 else
97 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
98 bp);
99}
100
101/*
102 * Debug routine: check that keys are in the right order.
103 */
104void
105xfs_btree_check_key(
106 xfs_btnum_t btnum, /* btree identifier */
107 void *ak1, /* pointer to left (lower) key */
108 void *ak2) /* pointer to right (higher) key */
109{
110 switch (btnum) {
111 case XFS_BTNUM_BNO: {
112 xfs_alloc_key_t *k1;
113 xfs_alloc_key_t *k2;
114
115 k1 = ak1;
116 k2 = ak2;
117 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
118 break;
119 }
120 case XFS_BTNUM_CNT: {
121 xfs_alloc_key_t *k1;
122 xfs_alloc_key_t *k2;
123
124 k1 = ak1;
125 k2 = ak2;
126 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
127 (k1->ar_blockcount == k2->ar_blockcount &&
128 be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
129 break;
130 }
131 case XFS_BTNUM_BMAP: {
132 xfs_bmbt_key_t *k1;
133 xfs_bmbt_key_t *k2;
134
135 k1 = ak1;
136 k2 = ak2;
137 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
138 break;
139 }
140 case XFS_BTNUM_INO: {
141 xfs_inobt_key_t *k1;
142 xfs_inobt_key_t *k2;
143
144 k1 = ak1;
145 k2 = ak2;
146 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
147 break;
148 }
149 default:
150 ASSERT(0);
151 }
152}
153#endif /* DEBUG */
154 55
155/* 56STATIC int /* error (0 or EFSCORRUPTED) */
156 * Checking routine: check that long form block header is ok.
157 */
158/* ARGSUSED */
159int /* error (0 or EFSCORRUPTED) */
160xfs_btree_check_lblock( 57xfs_btree_check_lblock(
161 xfs_btree_cur_t *cur, /* btree cursor */ 58 struct xfs_btree_cur *cur, /* btree cursor */
162 xfs_btree_lblock_t *block, /* btree long form block pointer */ 59 struct xfs_btree_block *block, /* btree long form block pointer */
163 int level, /* level of the btree block */ 60 int level, /* level of the btree block */
164 xfs_buf_t *bp) /* buffer for block, if any */ 61 struct xfs_buf *bp) /* buffer for block, if any */
165{ 62{
166 int lblock_ok; /* block passes checks */ 63 int lblock_ok; /* block passes checks */
167 xfs_mount_t *mp; /* file system mount point */ 64 struct xfs_mount *mp; /* file system mount point */
168 65
169 mp = cur->bc_mp; 66 mp = cur->bc_mp;
170 lblock_ok = 67 lblock_ok =
171 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 68 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
172 be16_to_cpu(block->bb_level) == level && 69 be16_to_cpu(block->bb_level) == level &&
173 be16_to_cpu(block->bb_numrecs) <= 70 be16_to_cpu(block->bb_numrecs) <=
174 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 71 cur->bc_ops->get_maxrecs(cur, level) &&
175 block->bb_leftsib && 72 block->bb_u.l.bb_leftsib &&
176 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || 73 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
177 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && 74 XFS_FSB_SANITY_CHECK(mp,
178 block->bb_rightsib && 75 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
179 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || 76 block->bb_u.l.bb_rightsib &&
180 XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); 77 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
181 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, 78 XFS_FSB_SANITY_CHECK(mp,
79 be64_to_cpu(block->bb_u.l.bb_rightsib)));
80 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
81 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
182 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 82 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
183 if (bp) 83 if (bp)
184 xfs_buftrace("LBTREE ERROR", bp); 84 xfs_buftrace("LBTREE ERROR", bp);
@@ -189,98 +89,15 @@ xfs_btree_check_lblock(
189 return 0; 89 return 0;
190} 90}
191 91
192/* 92STATIC int /* error (0 or EFSCORRUPTED) */
193 * Checking routine: check that (long) pointer is ok.
194 */
195int /* error (0 or EFSCORRUPTED) */
196xfs_btree_check_lptr(
197 xfs_btree_cur_t *cur, /* btree cursor */
198 xfs_dfsbno_t ptr, /* btree block disk address */
199 int level) /* btree block level */
200{
201 xfs_mount_t *mp; /* file system mount point */
202
203 mp = cur->bc_mp;
204 XFS_WANT_CORRUPTED_RETURN(
205 level > 0 &&
206 ptr != NULLDFSBNO &&
207 XFS_FSB_SANITY_CHECK(mp, ptr));
208 return 0;
209}
210
211#ifdef DEBUG
212/*
213 * Debug routine: check that records are in the right order.
214 */
215void
216xfs_btree_check_rec(
217 xfs_btnum_t btnum, /* btree identifier */
218 void *ar1, /* pointer to left (lower) record */
219 void *ar2) /* pointer to right (higher) record */
220{
221 switch (btnum) {
222 case XFS_BTNUM_BNO: {
223 xfs_alloc_rec_t *r1;
224 xfs_alloc_rec_t *r2;
225
226 r1 = ar1;
227 r2 = ar2;
228 ASSERT(be32_to_cpu(r1->ar_startblock) +
229 be32_to_cpu(r1->ar_blockcount) <=
230 be32_to_cpu(r2->ar_startblock));
231 break;
232 }
233 case XFS_BTNUM_CNT: {
234 xfs_alloc_rec_t *r1;
235 xfs_alloc_rec_t *r2;
236
237 r1 = ar1;
238 r2 = ar2;
239 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
240 (r1->ar_blockcount == r2->ar_blockcount &&
241 be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
242 break;
243 }
244 case XFS_BTNUM_BMAP: {
245 xfs_bmbt_rec_t *r1;
246 xfs_bmbt_rec_t *r2;
247
248 r1 = ar1;
249 r2 = ar2;
250 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
251 xfs_bmbt_disk_get_blockcount(r1) <=
252 xfs_bmbt_disk_get_startoff(r2));
253 break;
254 }
255 case XFS_BTNUM_INO: {
256 xfs_inobt_rec_t *r1;
257 xfs_inobt_rec_t *r2;
258
259 r1 = ar1;
260 r2 = ar2;
261 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
262 be32_to_cpu(r2->ir_startino));
263 break;
264 }
265 default:
266 ASSERT(0);
267 }
268}
269#endif /* DEBUG */
270
271/*
272 * Checking routine: check that block header is ok.
273 */
274/* ARGSUSED */
275int /* error (0 or EFSCORRUPTED) */
276xfs_btree_check_sblock( 93xfs_btree_check_sblock(
277 xfs_btree_cur_t *cur, /* btree cursor */ 94 struct xfs_btree_cur *cur, /* btree cursor */
278 xfs_btree_sblock_t *block, /* btree short form block pointer */ 95 struct xfs_btree_block *block, /* btree short form block pointer */
279 int level, /* level of the btree block */ 96 int level, /* level of the btree block */
280 xfs_buf_t *bp) /* buffer containing block */ 97 struct xfs_buf *bp) /* buffer containing block */
281{ 98{
282 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 99 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
283 xfs_agf_t *agf; /* ag. freespace structure */ 100 struct xfs_agf *agf; /* ag. freespace structure */
284 xfs_agblock_t agflen; /* native ag. freespace length */ 101 xfs_agblock_t agflen; /* native ag. freespace length */
285 int sblock_ok; /* block passes checks */ 102 int sblock_ok; /* block passes checks */
286 103
@@ -291,13 +108,13 @@ xfs_btree_check_sblock(
291 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && 108 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
292 be16_to_cpu(block->bb_level) == level && 109 be16_to_cpu(block->bb_level) == level &&
293 be16_to_cpu(block->bb_numrecs) <= 110 be16_to_cpu(block->bb_numrecs) <=
294 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && 111 cur->bc_ops->get_maxrecs(cur, level) &&
295 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || 112 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
296 be32_to_cpu(block->bb_leftsib) < agflen) && 113 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
297 block->bb_leftsib && 114 block->bb_u.s.bb_leftsib &&
298 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK || 115 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
299 be32_to_cpu(block->bb_rightsib) < agflen) && 116 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
300 block->bb_rightsib; 117 block->bb_u.s.bb_rightsib;
301 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 118 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
302 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 119 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
303 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
@@ -311,27 +128,78 @@ xfs_btree_check_sblock(
311} 128}
312 129
313/* 130/*
314 * Checking routine: check that (short) pointer is ok. 131 * Debug routine: check that block header is ok.
132 */
133int
134xfs_btree_check_block(
135 struct xfs_btree_cur *cur, /* btree cursor */
136 struct xfs_btree_block *block, /* generic btree block pointer */
137 int level, /* level of the btree block */
138 struct xfs_buf *bp) /* buffer containing block, if any */
139{
140 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
141 return xfs_btree_check_lblock(cur, block, level, bp);
142 else
143 return xfs_btree_check_sblock(cur, block, level, bp);
144}
145
146/*
147 * Check that (long) pointer is ok.
315 */ 148 */
316int /* error (0 or EFSCORRUPTED) */ 149int /* error (0 or EFSCORRUPTED) */
150xfs_btree_check_lptr(
151 struct xfs_btree_cur *cur, /* btree cursor */
152 xfs_dfsbno_t bno, /* btree block disk address */
153 int level) /* btree block level */
154{
155 XFS_WANT_CORRUPTED_RETURN(
156 level > 0 &&
157 bno != NULLDFSBNO &&
158 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
159 return 0;
160}
161
162#ifdef DEBUG
163/*
164 * Check that (short) pointer is ok.
165 */
166STATIC int /* error (0 or EFSCORRUPTED) */
317xfs_btree_check_sptr( 167xfs_btree_check_sptr(
318 xfs_btree_cur_t *cur, /* btree cursor */ 168 struct xfs_btree_cur *cur, /* btree cursor */
319 xfs_agblock_t ptr, /* btree block disk address */ 169 xfs_agblock_t bno, /* btree block disk address */
320 int level) /* btree block level */ 170 int level) /* btree block level */
321{ 171{
322 xfs_buf_t *agbp; /* buffer for ag. freespace struct */ 172 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
323 xfs_agf_t *agf; /* ag. freespace structure */
324 173
325 agbp = cur->bc_private.a.agbp;
326 agf = XFS_BUF_TO_AGF(agbp);
327 XFS_WANT_CORRUPTED_RETURN( 174 XFS_WANT_CORRUPTED_RETURN(
328 level > 0 && 175 level > 0 &&
329 ptr != NULLAGBLOCK && ptr != 0 && 176 bno != NULLAGBLOCK &&
330 ptr < be32_to_cpu(agf->agf_length)); 177 bno != 0 &&
178 bno < agblocks);
331 return 0; 179 return 0;
332} 180}
333 181
334/* 182/*
183 * Check that block ptr is ok.
184 */
185STATIC int /* error (0 or EFSCORRUPTED) */
186xfs_btree_check_ptr(
187 struct xfs_btree_cur *cur, /* btree cursor */
188 union xfs_btree_ptr *ptr, /* btree block disk address */
189 int index, /* offset from ptr to check */
190 int level) /* btree block level */
191{
192 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
193 return xfs_btree_check_lptr(cur,
194 be64_to_cpu((&ptr->l)[index]), level);
195 } else {
196 return xfs_btree_check_sptr(cur,
197 be32_to_cpu((&ptr->s)[index]), level);
198 }
199}
200#endif
201
202/*
335 * Delete the btree cursor. 203 * Delete the btree cursor.
336 */ 204 */
337void 205void
@@ -387,16 +255,17 @@ xfs_btree_dup_cursor(
387 255
388 tp = cur->bc_tp; 256 tp = cur->bc_tp;
389 mp = cur->bc_mp; 257 mp = cur->bc_mp;
258
390 /* 259 /*
391 * Allocate a new cursor like the old one. 260 * Allocate a new cursor like the old one.
392 */ 261 */
393 new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, 262 new = cur->bc_ops->dup_cursor(cur);
394 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, 263
395 cur->bc_private.b.whichfork);
396 /* 264 /*
397 * Copy the record currently in the cursor. 265 * Copy the record currently in the cursor.
398 */ 266 */
399 new->bc_rec = cur->bc_rec; 267 new->bc_rec = cur->bc_rec;
268
400 /* 269 /*
401 * For each level current, re-get the buffer and copy the ptr value. 270 * For each level current, re-get the buffer and copy the ptr value.
402 */ 271 */
@@ -416,46 +285,174 @@ xfs_btree_dup_cursor(
416 } else 285 } else
417 new->bc_bufs[i] = NULL; 286 new->bc_bufs[i] = NULL;
418 } 287 }
419 /*
420 * For bmap btrees, copy the firstblock, flist, and flags values,
421 * since init cursor doesn't get them.
422 */
423 if (new->bc_btnum == XFS_BTNUM_BMAP) {
424 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
425 new->bc_private.b.flist = cur->bc_private.b.flist;
426 new->bc_private.b.flags = cur->bc_private.b.flags;
427 }
428 *ncur = new; 288 *ncur = new;
429 return 0; 289 return 0;
430} 290}
431 291
432/* 292/*
293 * XFS btree block layout and addressing:
294 *
295 * There are two types of blocks in the btree: leaf and non-leaf blocks.
296 *
297 * The leaf record start with a header then followed by records containing
298 * the values. A non-leaf block also starts with the same header, and
299 * then first contains lookup keys followed by an equal number of pointers
300 * to the btree blocks at the previous level.
301 *
302 * +--------+-------+-------+-------+-------+-------+-------+
303 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
304 * +--------+-------+-------+-------+-------+-------+-------+
305 *
306 * +--------+-------+-------+-------+-------+-------+-------+
307 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
308 * +--------+-------+-------+-------+-------+-------+-------+
309 *
310 * The header is called struct xfs_btree_block for reasons better left unknown
311 * and comes in different versions for short (32bit) and long (64bit) block
312 * pointers. The record and key structures are defined by the btree instances
313 * and opaque to the btree core. The block pointers are simple disk endian
314 * integers, available in a short (32bit) and long (64bit) variant.
315 *
316 * The helpers below calculate the offset of a given record, key or pointer
317 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
318 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
319 * inside the btree block is done using indices starting at one, not zero!
320 */
321
322/*
323 * Return size of the btree block header for this btree instance.
324 */
325static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
326{
327 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
328 XFS_BTREE_LBLOCK_LEN :
329 XFS_BTREE_SBLOCK_LEN;
330}
331
332/*
333 * Return size of btree block pointers for this btree instance.
334 */
335static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
336{
337 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
338 sizeof(__be64) : sizeof(__be32);
339}
340
341/*
342 * Calculate offset of the n-th record in a btree block.
343 */
344STATIC size_t
345xfs_btree_rec_offset(
346 struct xfs_btree_cur *cur,
347 int n)
348{
349 return xfs_btree_block_len(cur) +
350 (n - 1) * cur->bc_ops->rec_len;
351}
352
353/*
354 * Calculate offset of the n-th key in a btree block.
355 */
356STATIC size_t
357xfs_btree_key_offset(
358 struct xfs_btree_cur *cur,
359 int n)
360{
361 return xfs_btree_block_len(cur) +
362 (n - 1) * cur->bc_ops->key_len;
363}
364
365/*
366 * Calculate offset of the n-th block pointer in a btree block.
367 */
368STATIC size_t
369xfs_btree_ptr_offset(
370 struct xfs_btree_cur *cur,
371 int n,
372 int level)
373{
374 return xfs_btree_block_len(cur) +
375 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
376 (n - 1) * xfs_btree_ptr_len(cur);
377}
378
379/*
380 * Return a pointer to the n-th record in the btree block.
381 */
382STATIC union xfs_btree_rec *
383xfs_btree_rec_addr(
384 struct xfs_btree_cur *cur,
385 int n,
386 struct xfs_btree_block *block)
387{
388 return (union xfs_btree_rec *)
389 ((char *)block + xfs_btree_rec_offset(cur, n));
390}
391
392/*
393 * Return a pointer to the n-th key in the btree block.
394 */
395STATIC union xfs_btree_key *
396xfs_btree_key_addr(
397 struct xfs_btree_cur *cur,
398 int n,
399 struct xfs_btree_block *block)
400{
401 return (union xfs_btree_key *)
402 ((char *)block + xfs_btree_key_offset(cur, n));
403}
404
405/*
406 * Return a pointer to the n-th block pointer in the btree block.
407 */
408STATIC union xfs_btree_ptr *
409xfs_btree_ptr_addr(
410 struct xfs_btree_cur *cur,
411 int n,
412 struct xfs_btree_block *block)
413{
414 int level = xfs_btree_get_level(block);
415
416 ASSERT(block->bb_level != 0);
417
418 return (union xfs_btree_ptr *)
419 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
420}
421
422/*
423 * Get a the root block which is stored in the inode.
424 *
425 * For now this btree implementation assumes the btree root is always
426 * stored in the if_broot field of an inode fork.
427 */
428STATIC struct xfs_btree_block *
429xfs_btree_get_iroot(
430 struct xfs_btree_cur *cur)
431{
432 struct xfs_ifork *ifp;
433
434 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
435 return (struct xfs_btree_block *)ifp->if_broot;
436}
437
438/*
433 * Retrieve the block pointer from the cursor at the given level. 439 * Retrieve the block pointer from the cursor at the given level.
434 * This may be a bmap btree root or from a buffer. 440 * This may be an inode btree root or from a buffer.
435 */ 441 */
436STATIC xfs_btree_block_t * /* generic btree block pointer */ 442STATIC struct xfs_btree_block * /* generic btree block pointer */
437xfs_btree_get_block( 443xfs_btree_get_block(
438 xfs_btree_cur_t *cur, /* btree cursor */ 444 struct xfs_btree_cur *cur, /* btree cursor */
439 int level, /* level in btree */ 445 int level, /* level in btree */
440 xfs_buf_t **bpp) /* buffer containing the block */ 446 struct xfs_buf **bpp) /* buffer containing the block */
441{ 447{
442 xfs_btree_block_t *block; /* return value */ 448 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
443 xfs_buf_t *bp; /* return buffer */ 449 (level == cur->bc_nlevels - 1)) {
444 xfs_ifork_t *ifp; /* inode fork pointer */ 450 *bpp = NULL;
445 int whichfork; /* data or attr fork */ 451 return xfs_btree_get_iroot(cur);
446
447 if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
448 whichfork = cur->bc_private.b.whichfork;
449 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
450 block = (xfs_btree_block_t *)ifp->if_broot;
451 bp = NULL;
452 } else {
453 bp = cur->bc_bufs[level];
454 block = XFS_BUF_TO_BLOCK(bp);
455 } 452 }
456 ASSERT(block != NULL); 453
457 *bpp = bp; 454 *bpp = cur->bc_bufs[level];
458 return block; 455 return XFS_BUF_TO_BLOCK(*bpp);
459} 456}
460 457
461/* 458/*
@@ -505,97 +502,6 @@ xfs_btree_get_bufs(
505} 502}
506 503
507/* 504/*
508 * Allocate a new btree cursor.
509 * The cursor is either for allocation (A) or bmap (B) or inodes (I).
510 */
511xfs_btree_cur_t * /* new btree cursor */
512xfs_btree_init_cursor(
513 xfs_mount_t *mp, /* file system mount point */
514 xfs_trans_t *tp, /* transaction pointer */
515 xfs_buf_t *agbp, /* (A only) buffer for agf structure */
516 /* (I only) buffer for agi structure */
517 xfs_agnumber_t agno, /* (AI only) allocation group number */
518 xfs_btnum_t btnum, /* btree identifier */
519 xfs_inode_t *ip, /* (B only) inode owning the btree */
520 int whichfork) /* (B only) data or attr fork */
521{
522 xfs_agf_t *agf; /* (A) allocation group freespace */
523 xfs_agi_t *agi; /* (I) allocation group inodespace */
524 xfs_btree_cur_t *cur; /* return value */
525 xfs_ifork_t *ifp; /* (I) inode fork pointer */
526 int nlevels=0; /* number of levels in the btree */
527
528 ASSERT(xfs_btree_cur_zone != NULL);
529 /*
530 * Allocate a new cursor.
531 */
532 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
533 /*
534 * Deduce the number of btree levels from the arguments.
535 */
536 switch (btnum) {
537 case XFS_BTNUM_BNO:
538 case XFS_BTNUM_CNT:
539 agf = XFS_BUF_TO_AGF(agbp);
540 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
541 break;
542 case XFS_BTNUM_BMAP:
543 ifp = XFS_IFORK_PTR(ip, whichfork);
544 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
545 break;
546 case XFS_BTNUM_INO:
547 agi = XFS_BUF_TO_AGI(agbp);
548 nlevels = be32_to_cpu(agi->agi_level);
549 break;
550 default:
551 ASSERT(0);
552 }
553 /*
554 * Fill in the common fields.
555 */
556 cur->bc_tp = tp;
557 cur->bc_mp = mp;
558 cur->bc_nlevels = nlevels;
559 cur->bc_btnum = btnum;
560 cur->bc_blocklog = mp->m_sb.sb_blocklog;
561 /*
562 * Fill in private fields.
563 */
564 switch (btnum) {
565 case XFS_BTNUM_BNO:
566 case XFS_BTNUM_CNT:
567 /*
568 * Allocation btree fields.
569 */
570 cur->bc_private.a.agbp = agbp;
571 cur->bc_private.a.agno = agno;
572 break;
573 case XFS_BTNUM_INO:
574 /*
575 * Inode allocation btree fields.
576 */
577 cur->bc_private.a.agbp = agbp;
578 cur->bc_private.a.agno = agno;
579 break;
580 case XFS_BTNUM_BMAP:
581 /*
582 * Bmap btree fields.
583 */
584 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
585 cur->bc_private.b.ip = ip;
586 cur->bc_private.b.firstblock = NULLFSBLOCK;
587 cur->bc_private.b.flist = NULL;
588 cur->bc_private.b.allocated = 0;
589 cur->bc_private.b.flags = 0;
590 cur->bc_private.b.whichfork = whichfork;
591 break;
592 default:
593 ASSERT(0);
594 }
595 return cur;
596}
597
598/*
599 * Check for the cursor referring to the last block at the given level. 505 * Check for the cursor referring to the last block at the given level.
600 */ 506 */
601int /* 1=is last block, 0=not last block */ 507int /* 1=is last block, 0=not last block */
@@ -603,12 +509,12 @@ xfs_btree_islastblock(
603 xfs_btree_cur_t *cur, /* btree cursor */ 509 xfs_btree_cur_t *cur, /* btree cursor */
604 int level) /* level to check */ 510 int level) /* level to check */
605{ 511{
606 xfs_btree_block_t *block; /* generic btree block pointer */ 512 struct xfs_btree_block *block; /* generic btree block pointer */
607 xfs_buf_t *bp; /* buffer containing block */ 513 xfs_buf_t *bp; /* buffer containing block */
608 514
609 block = xfs_btree_get_block(cur, level, &bp); 515 block = xfs_btree_get_block(cur, level, &bp);
610 xfs_btree_check_block(cur, block, level, bp); 516 xfs_btree_check_block(cur, block, level, bp);
611 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) 517 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
612 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; 518 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
613 else 519 else
614 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; 520 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
@@ -618,12 +524,12 @@ xfs_btree_islastblock(
618 * Change the cursor to point to the first record at the given level. 524 * Change the cursor to point to the first record at the given level.
619 * Other levels are unaffected. 525 * Other levels are unaffected.
620 */ 526 */
621int /* success=1, failure=0 */ 527STATIC int /* success=1, failure=0 */
622xfs_btree_firstrec( 528xfs_btree_firstrec(
623 xfs_btree_cur_t *cur, /* btree cursor */ 529 xfs_btree_cur_t *cur, /* btree cursor */
624 int level) /* level to change */ 530 int level) /* level to change */
625{ 531{
626 xfs_btree_block_t *block; /* generic btree block pointer */ 532 struct xfs_btree_block *block; /* generic btree block pointer */
627 xfs_buf_t *bp; /* buffer containing block */ 533 xfs_buf_t *bp; /* buffer containing block */
628 534
629 /* 535 /*
@@ -634,7 +540,7 @@ xfs_btree_firstrec(
634 /* 540 /*
635 * It's empty, there is no such record. 541 * It's empty, there is no such record.
636 */ 542 */
637 if (!block->bb_h.bb_numrecs) 543 if (!block->bb_numrecs)
638 return 0; 544 return 0;
639 /* 545 /*
640 * Set the ptr value to 1, that's the first record/key. 546 * Set the ptr value to 1, that's the first record/key.
@@ -647,12 +553,12 @@ xfs_btree_firstrec(
647 * Change the cursor to point to the last record in the current block 553 * Change the cursor to point to the last record in the current block
648 * at the given level. Other levels are unaffected. 554 * at the given level. Other levels are unaffected.
649 */ 555 */
650int /* success=1, failure=0 */ 556STATIC int /* success=1, failure=0 */
651xfs_btree_lastrec( 557xfs_btree_lastrec(
652 xfs_btree_cur_t *cur, /* btree cursor */ 558 xfs_btree_cur_t *cur, /* btree cursor */
653 int level) /* level to change */ 559 int level) /* level to change */
654{ 560{
655 xfs_btree_block_t *block; /* generic btree block pointer */ 561 struct xfs_btree_block *block; /* generic btree block pointer */
656 xfs_buf_t *bp; /* buffer containing block */ 562 xfs_buf_t *bp; /* buffer containing block */
657 563
658 /* 564 /*
@@ -663,12 +569,12 @@ xfs_btree_lastrec(
663 /* 569 /*
664 * It's empty, there is no such record. 570 * It's empty, there is no such record.
665 */ 571 */
666 if (!block->bb_h.bb_numrecs) 572 if (!block->bb_numrecs)
667 return 0; 573 return 0;
668 /* 574 /*
669 * Set the ptr value to numrecs, that's the last record/key. 575 * Set the ptr value to numrecs, that's the last record/key.
670 */ 576 */
671 cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs); 577 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
672 return 1; 578 return 1;
673} 579}
674 580
@@ -817,66 +723,84 @@ xfs_btree_reada_bufs(
817 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); 723 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
818} 724}
819 725
726STATIC int
727xfs_btree_readahead_lblock(
728 struct xfs_btree_cur *cur,
729 int lr,
730 struct xfs_btree_block *block)
731{
732 int rval = 0;
733 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
734 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
735
736 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
737 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
738 rval++;
739 }
740
741 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
742 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
743 rval++;
744 }
745
746 return rval;
747}
748
749STATIC int
750xfs_btree_readahead_sblock(
751 struct xfs_btree_cur *cur,
752 int lr,
753 struct xfs_btree_block *block)
754{
755 int rval = 0;
756 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
757 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
758
759
760 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
761 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
762 left, 1);
763 rval++;
764 }
765
766 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
767 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
768 right, 1);
769 rval++;
770 }
771
772 return rval;
773}
774
820/* 775/*
821 * Read-ahead btree blocks, at the given level. 776 * Read-ahead btree blocks, at the given level.
822 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 777 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
823 */ 778 */
824int 779STATIC int
825xfs_btree_readahead_core( 780xfs_btree_readahead(
826 xfs_btree_cur_t *cur, /* btree cursor */ 781 struct xfs_btree_cur *cur, /* btree cursor */
827 int lev, /* level in btree */ 782 int lev, /* level in btree */
828 int lr) /* left/right bits */ 783 int lr) /* left/right bits */
829{ 784{
830 xfs_alloc_block_t *a; 785 struct xfs_btree_block *block;
831 xfs_bmbt_block_t *b; 786
832 xfs_inobt_block_t *i; 787 /*
833 int rval = 0; 788 * No readahead needed if we are at the root level and the
789 * btree root is stored in the inode.
790 */
791 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
792 (lev == cur->bc_nlevels - 1))
793 return 0;
794
795 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
796 return 0;
834 797
835 ASSERT(cur->bc_bufs[lev] != NULL);
836 cur->bc_ra[lev] |= lr; 798 cur->bc_ra[lev] |= lr;
837 switch (cur->bc_btnum) { 799 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
838 case XFS_BTNUM_BNO: 800
839 case XFS_BTNUM_CNT: 801 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
840 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); 802 return xfs_btree_readahead_lblock(cur, lr, block);
841 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) { 803 return xfs_btree_readahead_sblock(cur, lr, block);
842 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
843 be32_to_cpu(a->bb_leftsib), 1);
844 rval++;
845 }
846 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
847 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
848 be32_to_cpu(a->bb_rightsib), 1);
849 rval++;
850 }
851 break;
852 case XFS_BTNUM_BMAP:
853 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
854 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
855 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
856 rval++;
857 }
858 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
859 xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
860 rval++;
861 }
862 break;
863 case XFS_BTNUM_INO:
864 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
865 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
866 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
867 be32_to_cpu(i->bb_leftsib), 1);
868 rval++;
869 }
870 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
871 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
872 be32_to_cpu(i->bb_rightsib), 1);
873 rval++;
874 }
875 break;
876 default:
877 ASSERT(0);
878 }
879 return rval;
880} 804}
881 805
882/* 806/*
@@ -889,7 +813,7 @@ xfs_btree_setbuf(
889 int lev, /* level in btree */ 813 int lev, /* level in btree */
890 xfs_buf_t *bp) /* new buffer to set */ 814 xfs_buf_t *bp) /* new buffer to set */
891{ 815{
892 xfs_btree_block_t *b; /* btree block */ 816 struct xfs_btree_block *b; /* btree block */
893 xfs_buf_t *obp; /* old buffer pointer */ 817 xfs_buf_t *obp; /* old buffer pointer */
894 818
895 obp = cur->bc_bufs[lev]; 819 obp = cur->bc_bufs[lev];
@@ -900,7 +824,7 @@ xfs_btree_setbuf(
900 if (!bp) 824 if (!bp)
901 return; 825 return;
902 b = XFS_BUF_TO_BLOCK(bp); 826 b = XFS_BUF_TO_BLOCK(bp);
903 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { 827 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
904 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) 828 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
905 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 829 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
906 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) 830 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
@@ -912,3 +836,2855 @@ xfs_btree_setbuf(
912 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 836 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
913 } 837 }
914} 838}
839
840STATIC int
841xfs_btree_ptr_is_null(
842 struct xfs_btree_cur *cur,
843 union xfs_btree_ptr *ptr)
844{
845 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
846 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
847 else
848 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
849}
850
851STATIC void
852xfs_btree_set_ptr_null(
853 struct xfs_btree_cur *cur,
854 union xfs_btree_ptr *ptr)
855{
856 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
857 ptr->l = cpu_to_be64(NULLFSBLOCK);
858 else
859 ptr->s = cpu_to_be32(NULLAGBLOCK);
860}
861
862/*
863 * Get/set/init sibling pointers
864 */
865STATIC void
866xfs_btree_get_sibling(
867 struct xfs_btree_cur *cur,
868 struct xfs_btree_block *block,
869 union xfs_btree_ptr *ptr,
870 int lr)
871{
872 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
873
874 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
875 if (lr == XFS_BB_RIGHTSIB)
876 ptr->l = block->bb_u.l.bb_rightsib;
877 else
878 ptr->l = block->bb_u.l.bb_leftsib;
879 } else {
880 if (lr == XFS_BB_RIGHTSIB)
881 ptr->s = block->bb_u.s.bb_rightsib;
882 else
883 ptr->s = block->bb_u.s.bb_leftsib;
884 }
885}
886
887STATIC void
888xfs_btree_set_sibling(
889 struct xfs_btree_cur *cur,
890 struct xfs_btree_block *block,
891 union xfs_btree_ptr *ptr,
892 int lr)
893{
894 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
895
896 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
897 if (lr == XFS_BB_RIGHTSIB)
898 block->bb_u.l.bb_rightsib = ptr->l;
899 else
900 block->bb_u.l.bb_leftsib = ptr->l;
901 } else {
902 if (lr == XFS_BB_RIGHTSIB)
903 block->bb_u.s.bb_rightsib = ptr->s;
904 else
905 block->bb_u.s.bb_leftsib = ptr->s;
906 }
907}
908
909STATIC void
910xfs_btree_init_block(
911 struct xfs_btree_cur *cur,
912 int level,
913 int numrecs,
914 struct xfs_btree_block *new) /* new block */
915{
916 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
917 new->bb_level = cpu_to_be16(level);
918 new->bb_numrecs = cpu_to_be16(numrecs);
919
920 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
922 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
923 } else {
924 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
925 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
926 }
927}
928
929/*
930 * Return true if ptr is the last record in the btree and
931 * we need to track updateѕ to this record. The decision
932 * will be further refined in the update_lastrec method.
933 */
934STATIC int
935xfs_btree_is_lastrec(
936 struct xfs_btree_cur *cur,
937 struct xfs_btree_block *block,
938 int level)
939{
940 union xfs_btree_ptr ptr;
941
942 if (level > 0)
943 return 0;
944 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
945 return 0;
946
947 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
948 if (!xfs_btree_ptr_is_null(cur, &ptr))
949 return 0;
950 return 1;
951}
952
953STATIC void
954xfs_btree_buf_to_ptr(
955 struct xfs_btree_cur *cur,
956 struct xfs_buf *bp,
957 union xfs_btree_ptr *ptr)
958{
959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
960 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
961 XFS_BUF_ADDR(bp)));
962 else {
963 ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
964 XFS_BUF_ADDR(bp)));
965 }
966}
967
968STATIC xfs_daddr_t
969xfs_btree_ptr_to_daddr(
970 struct xfs_btree_cur *cur,
971 union xfs_btree_ptr *ptr)
972{
973 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
974 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
975
976 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
977 } else {
978 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
979 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
980
981 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
982 be32_to_cpu(ptr->s));
983 }
984}
985
986STATIC void
987xfs_btree_set_refs(
988 struct xfs_btree_cur *cur,
989 struct xfs_buf *bp)
990{
991 switch (cur->bc_btnum) {
992 case XFS_BTNUM_BNO:
993 case XFS_BTNUM_CNT:
994 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
995 break;
996 case XFS_BTNUM_INO:
997 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
998 break;
999 case XFS_BTNUM_BMAP:
1000 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1001 break;
1002 default:
1003 ASSERT(0);
1004 }
1005}
1006
1007STATIC int
1008xfs_btree_get_buf_block(
1009 struct xfs_btree_cur *cur,
1010 union xfs_btree_ptr *ptr,
1011 int flags,
1012 struct xfs_btree_block **block,
1013 struct xfs_buf **bpp)
1014{
1015 struct xfs_mount *mp = cur->bc_mp;
1016 xfs_daddr_t d;
1017
1018 /* need to sort out how callers deal with failures first */
1019 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1020
1021 d = xfs_btree_ptr_to_daddr(cur, ptr);
1022 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1023 mp->m_bsize, flags);
1024
1025 ASSERT(*bpp);
1026 ASSERT(!XFS_BUF_GETERROR(*bpp));
1027
1028 *block = XFS_BUF_TO_BLOCK(*bpp);
1029 return 0;
1030}
1031
1032/*
1033 * Read in the buffer at the given ptr and return the buffer and
1034 * the block pointer within the buffer.
1035 */
1036STATIC int
1037xfs_btree_read_buf_block(
1038 struct xfs_btree_cur *cur,
1039 union xfs_btree_ptr *ptr,
1040 int level,
1041 int flags,
1042 struct xfs_btree_block **block,
1043 struct xfs_buf **bpp)
1044{
1045 struct xfs_mount *mp = cur->bc_mp;
1046 xfs_daddr_t d;
1047 int error;
1048
1049 /* need to sort out how callers deal with failures first */
1050 ASSERT(!(flags & XFS_BUF_TRYLOCK));
1051
1052 d = xfs_btree_ptr_to_daddr(cur, ptr);
1053 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1054 mp->m_bsize, flags, bpp);
1055 if (error)
1056 return error;
1057
1058 ASSERT(*bpp != NULL);
1059 ASSERT(!XFS_BUF_GETERROR(*bpp));
1060
1061 xfs_btree_set_refs(cur, *bpp);
1062 *block = XFS_BUF_TO_BLOCK(*bpp);
1063
1064 error = xfs_btree_check_block(cur, *block, level, *bpp);
1065 if (error)
1066 xfs_trans_brelse(cur->bc_tp, *bpp);
1067 return error;
1068}
1069
1070/*
1071 * Copy keys from one btree block to another.
1072 */
1073STATIC void
1074xfs_btree_copy_keys(
1075 struct xfs_btree_cur *cur,
1076 union xfs_btree_key *dst_key,
1077 union xfs_btree_key *src_key,
1078 int numkeys)
1079{
1080 ASSERT(numkeys >= 0);
1081 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1082}
1083
1084/*
1085 * Copy records from one btree block to another.
1086 */
1087STATIC void
1088xfs_btree_copy_recs(
1089 struct xfs_btree_cur *cur,
1090 union xfs_btree_rec *dst_rec,
1091 union xfs_btree_rec *src_rec,
1092 int numrecs)
1093{
1094 ASSERT(numrecs >= 0);
1095 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1096}
1097
1098/*
1099 * Copy block pointers from one btree block to another.
1100 */
1101STATIC void
1102xfs_btree_copy_ptrs(
1103 struct xfs_btree_cur *cur,
1104 union xfs_btree_ptr *dst_ptr,
1105 union xfs_btree_ptr *src_ptr,
1106 int numptrs)
1107{
1108 ASSERT(numptrs >= 0);
1109 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1110}
1111
1112/*
1113 * Shift keys one index left/right inside a single btree block.
1114 */
1115STATIC void
1116xfs_btree_shift_keys(
1117 struct xfs_btree_cur *cur,
1118 union xfs_btree_key *key,
1119 int dir,
1120 int numkeys)
1121{
1122 char *dst_key;
1123
1124 ASSERT(numkeys >= 0);
1125 ASSERT(dir == 1 || dir == -1);
1126
1127 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1128 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1129}
1130
1131/*
1132 * Shift records one index left/right inside a single btree block.
1133 */
1134STATIC void
1135xfs_btree_shift_recs(
1136 struct xfs_btree_cur *cur,
1137 union xfs_btree_rec *rec,
1138 int dir,
1139 int numrecs)
1140{
1141 char *dst_rec;
1142
1143 ASSERT(numrecs >= 0);
1144 ASSERT(dir == 1 || dir == -1);
1145
1146 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1147 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1148}
1149
1150/*
1151 * Shift block pointers one index left/right inside a single btree block.
1152 */
1153STATIC void
1154xfs_btree_shift_ptrs(
1155 struct xfs_btree_cur *cur,
1156 union xfs_btree_ptr *ptr,
1157 int dir,
1158 int numptrs)
1159{
1160 char *dst_ptr;
1161
1162 ASSERT(numptrs >= 0);
1163 ASSERT(dir == 1 || dir == -1);
1164
1165 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1166 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1167}
1168
1169/*
1170 * Log key values from the btree block.
1171 */
1172STATIC void
1173xfs_btree_log_keys(
1174 struct xfs_btree_cur *cur,
1175 struct xfs_buf *bp,
1176 int first,
1177 int last)
1178{
1179 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1180 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1181
1182 if (bp) {
1183 xfs_trans_log_buf(cur->bc_tp, bp,
1184 xfs_btree_key_offset(cur, first),
1185 xfs_btree_key_offset(cur, last + 1) - 1);
1186 } else {
1187 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1188 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1189 }
1190
1191 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1192}
1193
1194/*
1195 * Log record values from the btree block.
1196 */
1197void
1198xfs_btree_log_recs(
1199 struct xfs_btree_cur *cur,
1200 struct xfs_buf *bp,
1201 int first,
1202 int last)
1203{
1204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1205 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1206
1207 xfs_trans_log_buf(cur->bc_tp, bp,
1208 xfs_btree_rec_offset(cur, first),
1209 xfs_btree_rec_offset(cur, last + 1) - 1);
1210
1211 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1212}
1213
1214/*
1215 * Log block pointer fields from a btree block (nonleaf).
1216 */
1217STATIC void
1218xfs_btree_log_ptrs(
1219 struct xfs_btree_cur *cur, /* btree cursor */
1220 struct xfs_buf *bp, /* buffer containing btree block */
1221 int first, /* index of first pointer to log */
1222 int last) /* index of last pointer to log */
1223{
1224 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1225 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1226
1227 if (bp) {
1228 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1229 int level = xfs_btree_get_level(block);
1230
1231 xfs_trans_log_buf(cur->bc_tp, bp,
1232 xfs_btree_ptr_offset(cur, first, level),
1233 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1234 } else {
1235 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1236 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1237 }
1238
1239 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1240}
1241
1242/*
1243 * Log fields from a btree block header.
1244 */
1245void
1246xfs_btree_log_block(
1247 struct xfs_btree_cur *cur, /* btree cursor */
1248 struct xfs_buf *bp, /* buffer containing btree block */
1249 int fields) /* mask of fields: XFS_BB_... */
1250{
1251 int first; /* first byte offset logged */
1252 int last; /* last byte offset logged */
1253 static const short soffsets[] = { /* table of offsets (short) */
1254 offsetof(struct xfs_btree_block, bb_magic),
1255 offsetof(struct xfs_btree_block, bb_level),
1256 offsetof(struct xfs_btree_block, bb_numrecs),
1257 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1258 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1259 XFS_BTREE_SBLOCK_LEN
1260 };
1261 static const short loffsets[] = { /* table of offsets (long) */
1262 offsetof(struct xfs_btree_block, bb_magic),
1263 offsetof(struct xfs_btree_block, bb_level),
1264 offsetof(struct xfs_btree_block, bb_numrecs),
1265 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1266 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1267 XFS_BTREE_LBLOCK_LEN
1268 };
1269
1270 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1271 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1272
1273 if (bp) {
1274 xfs_btree_offsets(fields,
1275 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1276 loffsets : soffsets,
1277 XFS_BB_NUM_BITS, &first, &last);
1278 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1279 } else {
1280 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1281 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1282 }
1283
1284 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1285}
1286
1287/*
1288 * Increment cursor by one record at the level.
1289 * For nonzero levels the leaf-ward information is untouched.
1290 */
1291int /* error */
1292xfs_btree_increment(
1293 struct xfs_btree_cur *cur,
1294 int level,
1295 int *stat) /* success/failure */
1296{
1297 struct xfs_btree_block *block;
1298 union xfs_btree_ptr ptr;
1299 struct xfs_buf *bp;
1300 int error; /* error return value */
1301 int lev;
1302
1303 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1304 XFS_BTREE_TRACE_ARGI(cur, level);
1305
1306 ASSERT(level < cur->bc_nlevels);
1307
1308 /* Read-ahead to the right at this level. */
1309 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1310
1311 /* Get a pointer to the btree block. */
1312 block = xfs_btree_get_block(cur, level, &bp);
1313
1314#ifdef DEBUG
1315 error = xfs_btree_check_block(cur, block, level, bp);
1316 if (error)
1317 goto error0;
1318#endif
1319
1320 /* We're done if we remain in the block after the increment. */
1321 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1322 goto out1;
1323
1324 /* Fail if we just went off the right edge of the tree. */
1325 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1326 if (xfs_btree_ptr_is_null(cur, &ptr))
1327 goto out0;
1328
1329 XFS_BTREE_STATS_INC(cur, increment);
1330
1331 /*
1332 * March up the tree incrementing pointers.
1333 * Stop when we don't go off the right edge of a block.
1334 */
1335 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1336 block = xfs_btree_get_block(cur, lev, &bp);
1337
1338#ifdef DEBUG
1339 error = xfs_btree_check_block(cur, block, lev, bp);
1340 if (error)
1341 goto error0;
1342#endif
1343
1344 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1345 break;
1346
1347 /* Read-ahead the right block for the next loop. */
1348 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1349 }
1350
1351 /*
1352 * If we went off the root then we are either seriously
1353 * confused or have the tree root in an inode.
1354 */
1355 if (lev == cur->bc_nlevels) {
1356 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1357 goto out0;
1358 ASSERT(0);
1359 error = EFSCORRUPTED;
1360 goto error0;
1361 }
1362 ASSERT(lev < cur->bc_nlevels);
1363
1364 /*
1365 * Now walk back down the tree, fixing up the cursor's buffer
1366 * pointers and key numbers.
1367 */
1368 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1369 union xfs_btree_ptr *ptrp;
1370
1371 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1372 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1373 0, &block, &bp);
1374 if (error)
1375 goto error0;
1376
1377 xfs_btree_setbuf(cur, lev, bp);
1378 cur->bc_ptrs[lev] = 1;
1379 }
1380out1:
1381 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1382 *stat = 1;
1383 return 0;
1384
1385out0:
1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1387 *stat = 0;
1388 return 0;
1389
1390error0:
1391 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1392 return error;
1393}
1394
1395/*
1396 * Decrement cursor by one record at the level.
1397 * For nonzero levels the leaf-ward information is untouched.
1398 */
1399int /* error */
1400xfs_btree_decrement(
1401 struct xfs_btree_cur *cur,
1402 int level,
1403 int *stat) /* success/failure */
1404{
1405 struct xfs_btree_block *block;
1406 xfs_buf_t *bp;
1407 int error; /* error return value */
1408 int lev;
1409 union xfs_btree_ptr ptr;
1410
1411 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1412 XFS_BTREE_TRACE_ARGI(cur, level);
1413
1414 ASSERT(level < cur->bc_nlevels);
1415
1416 /* Read-ahead to the left at this level. */
1417 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1418
1419 /* We're done if we remain in the block after the decrement. */
1420 if (--cur->bc_ptrs[level] > 0)
1421 goto out1;
1422
1423 /* Get a pointer to the btree block. */
1424 block = xfs_btree_get_block(cur, level, &bp);
1425
1426#ifdef DEBUG
1427 error = xfs_btree_check_block(cur, block, level, bp);
1428 if (error)
1429 goto error0;
1430#endif
1431
1432 /* Fail if we just went off the left edge of the tree. */
1433 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1434 if (xfs_btree_ptr_is_null(cur, &ptr))
1435 goto out0;
1436
1437 XFS_BTREE_STATS_INC(cur, decrement);
1438
1439 /*
1440 * March up the tree decrementing pointers.
1441 * Stop when we don't go off the left edge of a block.
1442 */
1443 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1444 if (--cur->bc_ptrs[lev] > 0)
1445 break;
1446 /* Read-ahead the left block for the next loop. */
1447 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1448 }
1449
1450 /*
1451 * If we went off the root then we are seriously confused.
1452 * or the root of the tree is in an inode.
1453 */
1454 if (lev == cur->bc_nlevels) {
1455 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1456 goto out0;
1457 ASSERT(0);
1458 error = EFSCORRUPTED;
1459 goto error0;
1460 }
1461 ASSERT(lev < cur->bc_nlevels);
1462
1463 /*
1464 * Now walk back down the tree, fixing up the cursor's buffer
1465 * pointers and key numbers.
1466 */
1467 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1468 union xfs_btree_ptr *ptrp;
1469
1470 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1471 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1472 0, &block, &bp);
1473 if (error)
1474 goto error0;
1475 xfs_btree_setbuf(cur, lev, bp);
1476 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1477 }
1478out1:
1479 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1480 *stat = 1;
1481 return 0;
1482
1483out0:
1484 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1485 *stat = 0;
1486 return 0;
1487
1488error0:
1489 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1490 return error;
1491}
1492
1493STATIC int
1494xfs_btree_lookup_get_block(
1495 struct xfs_btree_cur *cur, /* btree cursor */
1496 int level, /* level in the btree */
1497 union xfs_btree_ptr *pp, /* ptr to btree block */
1498 struct xfs_btree_block **blkp) /* return btree block */
1499{
1500 struct xfs_buf *bp; /* buffer pointer for btree block */
1501 int error = 0;
1502
1503 /* special case the root block if in an inode */
1504 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1505 (level == cur->bc_nlevels - 1)) {
1506 *blkp = xfs_btree_get_iroot(cur);
1507 return 0;
1508 }
1509
1510 /*
1511 * If the old buffer at this level for the disk address we are
1512 * looking for re-use it.
1513 *
1514 * Otherwise throw it away and get a new one.
1515 */
1516 bp = cur->bc_bufs[level];
1517 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1518 *blkp = XFS_BUF_TO_BLOCK(bp);
1519 return 0;
1520 }
1521
1522 error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1523 if (error)
1524 return error;
1525
1526 xfs_btree_setbuf(cur, level, bp);
1527 return 0;
1528}
1529
1530/*
1531 * Get current search key. For level 0 we don't actually have a key
1532 * structure so we make one up from the record. For all other levels
1533 * we just return the right key.
1534 */
1535STATIC union xfs_btree_key *
1536xfs_lookup_get_search_key(
1537 struct xfs_btree_cur *cur,
1538 int level,
1539 int keyno,
1540 struct xfs_btree_block *block,
1541 union xfs_btree_key *kp)
1542{
1543 if (level == 0) {
1544 cur->bc_ops->init_key_from_rec(kp,
1545 xfs_btree_rec_addr(cur, keyno, block));
1546 return kp;
1547 }
1548
1549 return xfs_btree_key_addr(cur, keyno, block);
1550}
1551
1552/*
1553 * Lookup the record. The cursor is made to point to it, based on dir.
1554 * Return 0 if can't find any such record, 1 for success.
1555 */
1556int /* error */
1557xfs_btree_lookup(
1558 struct xfs_btree_cur *cur, /* btree cursor */
1559 xfs_lookup_t dir, /* <=, ==, or >= */
1560 int *stat) /* success/failure */
1561{
1562 struct xfs_btree_block *block; /* current btree block */
1563 __int64_t diff; /* difference for the current key */
1564 int error; /* error return value */
1565 int keyno; /* current key number */
1566 int level; /* level in the btree */
1567 union xfs_btree_ptr *pp; /* ptr to btree block */
1568 union xfs_btree_ptr ptr; /* ptr to btree block */
1569
1570 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1571 XFS_BTREE_TRACE_ARGI(cur, dir);
1572
1573 XFS_BTREE_STATS_INC(cur, lookup);
1574
1575 block = NULL;
1576 keyno = 0;
1577
1578 /* initialise start pointer from cursor */
1579 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1580 pp = &ptr;
1581
1582 /*
1583 * Iterate over each level in the btree, starting at the root.
1584 * For each level above the leaves, find the key we need, based
1585 * on the lookup record, then follow the corresponding block
1586 * pointer down to the next level.
1587 */
1588 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1589 /* Get the block we need to do the lookup on. */
1590 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1591 if (error)
1592 goto error0;
1593
1594 if (diff == 0) {
1595 /*
1596 * If we already had a key match at a higher level, we
1597 * know we need to use the first entry in this block.
1598 */
1599 keyno = 1;
1600 } else {
1601 /* Otherwise search this block. Do a binary search. */
1602
1603 int high; /* high entry number */
1604 int low; /* low entry number */
1605
1606 /* Set low and high entry numbers, 1-based. */
1607 low = 1;
1608 high = xfs_btree_get_numrecs(block);
1609 if (!high) {
1610 /* Block is empty, must be an empty leaf. */
1611 ASSERT(level == 0 && cur->bc_nlevels == 1);
1612
1613 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1614 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1615 *stat = 0;
1616 return 0;
1617 }
1618
1619 /* Binary search the block. */
1620 while (low <= high) {
1621 union xfs_btree_key key;
1622 union xfs_btree_key *kp;
1623
1624 XFS_BTREE_STATS_INC(cur, compare);
1625
1626 /* keyno is average of low and high. */
1627 keyno = (low + high) >> 1;
1628
1629 /* Get current search key */
1630 kp = xfs_lookup_get_search_key(cur, level,
1631 keyno, block, &key);
1632
1633 /*
1634 * Compute difference to get next direction:
1635 * - less than, move right
1636 * - greater than, move left
1637 * - equal, we're done
1638 */
1639 diff = cur->bc_ops->key_diff(cur, kp);
1640 if (diff < 0)
1641 low = keyno + 1;
1642 else if (diff > 0)
1643 high = keyno - 1;
1644 else
1645 break;
1646 }
1647 }
1648
1649 /*
1650 * If there are more levels, set up for the next level
1651 * by getting the block number and filling in the cursor.
1652 */
1653 if (level > 0) {
1654 /*
1655 * If we moved left, need the previous key number,
1656 * unless there isn't one.
1657 */
1658 if (diff > 0 && --keyno < 1)
1659 keyno = 1;
1660 pp = xfs_btree_ptr_addr(cur, keyno, block);
1661
1662#ifdef DEBUG
1663 error = xfs_btree_check_ptr(cur, pp, 0, level);
1664 if (error)
1665 goto error0;
1666#endif
1667 cur->bc_ptrs[level] = keyno;
1668 }
1669 }
1670
1671 /* Done with the search. See if we need to adjust the results. */
1672 if (dir != XFS_LOOKUP_LE && diff < 0) {
1673 keyno++;
1674 /*
1675 * If ge search and we went off the end of the block, but it's
1676 * not the last block, we're in the wrong block.
1677 */
1678 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1679 if (dir == XFS_LOOKUP_GE &&
1680 keyno > xfs_btree_get_numrecs(block) &&
1681 !xfs_btree_ptr_is_null(cur, &ptr)) {
1682 int i;
1683
1684 cur->bc_ptrs[0] = keyno;
1685 error = xfs_btree_increment(cur, 0, &i);
1686 if (error)
1687 goto error0;
1688 XFS_WANT_CORRUPTED_RETURN(i == 1);
1689 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1690 *stat = 1;
1691 return 0;
1692 }
1693 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1694 keyno--;
1695 cur->bc_ptrs[0] = keyno;
1696
1697 /* Return if we succeeded or not. */
1698 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1699 *stat = 0;
1700 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1701 *stat = 1;
1702 else
1703 *stat = 0;
1704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1705 return 0;
1706
1707error0:
1708 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1709 return error;
1710}
1711
1712/*
1713 * Update keys at all levels from here to the root along the cursor's path.
1714 */
1715STATIC int
1716xfs_btree_updkey(
1717 struct xfs_btree_cur *cur,
1718 union xfs_btree_key *keyp,
1719 int level)
1720{
1721 struct xfs_btree_block *block;
1722 struct xfs_buf *bp;
1723 union xfs_btree_key *kp;
1724 int ptr;
1725
1726 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1727 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1728
1729 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1730
1731 /*
1732 * Go up the tree from this level toward the root.
1733 * At each level, update the key value to the value input.
1734 * Stop when we reach a level where the cursor isn't pointing
1735 * at the first entry in the block.
1736 */
1737 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1738#ifdef DEBUG
1739 int error;
1740#endif
1741 block = xfs_btree_get_block(cur, level, &bp);
1742#ifdef DEBUG
1743 error = xfs_btree_check_block(cur, block, level, bp);
1744 if (error) {
1745 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1746 return error;
1747 }
1748#endif
1749 ptr = cur->bc_ptrs[level];
1750 kp = xfs_btree_key_addr(cur, ptr, block);
1751 xfs_btree_copy_keys(cur, kp, keyp, 1);
1752 xfs_btree_log_keys(cur, bp, ptr, ptr);
1753 }
1754
1755 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1756 return 0;
1757}
1758
1759/*
1760 * Update the record referred to by cur to the value in the
1761 * given record. This either works (return 0) or gets an
1762 * EFSCORRUPTED error.
1763 */
1764int
1765xfs_btree_update(
1766 struct xfs_btree_cur *cur,
1767 union xfs_btree_rec *rec)
1768{
1769 struct xfs_btree_block *block;
1770 struct xfs_buf *bp;
1771 int error;
1772 int ptr;
1773 union xfs_btree_rec *rp;
1774
1775 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1776 XFS_BTREE_TRACE_ARGR(cur, rec);
1777
1778 /* Pick up the current block. */
1779 block = xfs_btree_get_block(cur, 0, &bp);
1780
1781#ifdef DEBUG
1782 error = xfs_btree_check_block(cur, block, 0, bp);
1783 if (error)
1784 goto error0;
1785#endif
1786 /* Get the address of the rec to be updated. */
1787 ptr = cur->bc_ptrs[0];
1788 rp = xfs_btree_rec_addr(cur, ptr, block);
1789
1790 /* Fill in the new contents and log them. */
1791 xfs_btree_copy_recs(cur, rp, rec, 1);
1792 xfs_btree_log_recs(cur, bp, ptr, ptr);
1793
1794 /*
1795 * If we are tracking the last record in the tree and
1796 * we are at the far right edge of the tree, update it.
1797 */
1798 if (xfs_btree_is_lastrec(cur, block, 0)) {
1799 cur->bc_ops->update_lastrec(cur, block, rec,
1800 ptr, LASTREC_UPDATE);
1801 }
1802
1803 /* Updating first rec in leaf. Pass new key value up to our parent. */
1804 if (ptr == 1) {
1805 union xfs_btree_key key;
1806
1807 cur->bc_ops->init_key_from_rec(&key, rec);
1808 error = xfs_btree_updkey(cur, &key, 1);
1809 if (error)
1810 goto error0;
1811 }
1812
1813 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1814 return 0;
1815
1816error0:
1817 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1818 return error;
1819}
1820
1821/*
1822 * Move 1 record left from cur/level if possible.
1823 * Update cur to reflect the new path.
1824 */
1825STATIC int /* error */
1826xfs_btree_lshift(
1827 struct xfs_btree_cur *cur,
1828 int level,
1829 int *stat) /* success/failure */
1830{
1831 union xfs_btree_key key; /* btree key */
1832 struct xfs_buf *lbp; /* left buffer pointer */
1833 struct xfs_btree_block *left; /* left btree block */
1834 int lrecs; /* left record count */
1835 struct xfs_buf *rbp; /* right buffer pointer */
1836 struct xfs_btree_block *right; /* right btree block */
1837 int rrecs; /* right record count */
1838 union xfs_btree_ptr lptr; /* left btree pointer */
1839 union xfs_btree_key *rkp = NULL; /* right btree key */
1840 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
1841 union xfs_btree_rec *rrp = NULL; /* right record pointer */
1842 int error; /* error return value */
1843
1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1845 XFS_BTREE_TRACE_ARGI(cur, level);
1846
1847 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1848 level == cur->bc_nlevels - 1)
1849 goto out0;
1850
1851 /* Set up variables for this block as "right". */
1852 right = xfs_btree_get_block(cur, level, &rbp);
1853
1854#ifdef DEBUG
1855 error = xfs_btree_check_block(cur, right, level, rbp);
1856 if (error)
1857 goto error0;
1858#endif
1859
1860 /* If we've got no left sibling then we can't shift an entry left. */
1861 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1862 if (xfs_btree_ptr_is_null(cur, &lptr))
1863 goto out0;
1864
1865 /*
1866 * If the cursor entry is the one that would be moved, don't
1867 * do it... it's too complicated.
1868 */
1869 if (cur->bc_ptrs[level] <= 1)
1870 goto out0;
1871
1872 /* Set up the left neighbor as "left". */
1873 error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1874 if (error)
1875 goto error0;
1876
1877 /* If it's full, it can't take another entry. */
1878 lrecs = xfs_btree_get_numrecs(left);
1879 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1880 goto out0;
1881
1882 rrecs = xfs_btree_get_numrecs(right);
1883
1884 /*
1885 * We add one entry to the left side and remove one for the right side.
1886 * Accout for it here, the changes will be updated on disk and logged
1887 * later.
1888 */
1889 lrecs++;
1890 rrecs--;
1891
1892 XFS_BTREE_STATS_INC(cur, lshift);
1893 XFS_BTREE_STATS_ADD(cur, moves, 1);
1894
1895 /*
1896 * If non-leaf, copy a key and a ptr to the left block.
1897 * Log the changes to the left block.
1898 */
1899 if (level > 0) {
1900 /* It's a non-leaf. Move keys and pointers. */
1901 union xfs_btree_key *lkp; /* left btree key */
1902 union xfs_btree_ptr *lpp; /* left address pointer */
1903
1904 lkp = xfs_btree_key_addr(cur, lrecs, left);
1905 rkp = xfs_btree_key_addr(cur, 1, right);
1906
1907 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1908 rpp = xfs_btree_ptr_addr(cur, 1, right);
1909#ifdef DEBUG
1910 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1911 if (error)
1912 goto error0;
1913#endif
1914 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1915 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1916
1917 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1918 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1919
1920 ASSERT(cur->bc_ops->keys_inorder(cur,
1921 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1922 } else {
1923 /* It's a leaf. Move records. */
1924 union xfs_btree_rec *lrp; /* left record pointer */
1925
1926 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1927 rrp = xfs_btree_rec_addr(cur, 1, right);
1928
1929 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1930 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1931
1932 ASSERT(cur->bc_ops->recs_inorder(cur,
1933 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1934 }
1935
1936 xfs_btree_set_numrecs(left, lrecs);
1937 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1938
1939 xfs_btree_set_numrecs(right, rrecs);
1940 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1941
1942 /*
1943 * Slide the contents of right down one entry.
1944 */
1945 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1946 if (level > 0) {
1947 /* It's a nonleaf. operate on keys and ptrs */
1948#ifdef DEBUG
1949 int i; /* loop index */
1950
1951 for (i = 0; i < rrecs; i++) {
1952 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1953 if (error)
1954 goto error0;
1955 }
1956#endif
1957 xfs_btree_shift_keys(cur,
1958 xfs_btree_key_addr(cur, 2, right),
1959 -1, rrecs);
1960 xfs_btree_shift_ptrs(cur,
1961 xfs_btree_ptr_addr(cur, 2, right),
1962 -1, rrecs);
1963
1964 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1965 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1966 } else {
1967 /* It's a leaf. operate on records */
1968 xfs_btree_shift_recs(cur,
1969 xfs_btree_rec_addr(cur, 2, right),
1970 -1, rrecs);
1971 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1972
1973 /*
1974 * If it's the first record in the block, we'll need a key
1975 * structure to pass up to the next level (updkey).
1976 */
1977 cur->bc_ops->init_key_from_rec(&key,
1978 xfs_btree_rec_addr(cur, 1, right));
1979 rkp = &key;
1980 }
1981
1982 /* Update the parent key values of right. */
1983 error = xfs_btree_updkey(cur, rkp, level + 1);
1984 if (error)
1985 goto error0;
1986
1987 /* Slide the cursor value left one. */
1988 cur->bc_ptrs[level]--;
1989
1990 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1991 *stat = 1;
1992 return 0;
1993
1994out0:
1995 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1996 *stat = 0;
1997 return 0;
1998
1999error0:
2000 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2001 return error;
2002}
2003
2004/*
2005 * Move 1 record right from cur/level if possible.
2006 * Update cur to reflect the new path.
2007 */
2008STATIC int /* error */
2009xfs_btree_rshift(
2010 struct xfs_btree_cur *cur,
2011 int level,
2012 int *stat) /* success/failure */
2013{
2014 union xfs_btree_key key; /* btree key */
2015 struct xfs_buf *lbp; /* left buffer pointer */
2016 struct xfs_btree_block *left; /* left btree block */
2017 struct xfs_buf *rbp; /* right buffer pointer */
2018 struct xfs_btree_block *right; /* right btree block */
2019 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2020 union xfs_btree_ptr rptr; /* right block pointer */
2021 union xfs_btree_key *rkp; /* right btree key */
2022 int rrecs; /* right record count */
2023 int lrecs; /* left record count */
2024 int error; /* error return value */
2025 int i; /* loop counter */
2026
2027 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2028 XFS_BTREE_TRACE_ARGI(cur, level);
2029
2030 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2031 (level == cur->bc_nlevels - 1))
2032 goto out0;
2033
2034 /* Set up variables for this block as "left". */
2035 left = xfs_btree_get_block(cur, level, &lbp);
2036
2037#ifdef DEBUG
2038 error = xfs_btree_check_block(cur, left, level, lbp);
2039 if (error)
2040 goto error0;
2041#endif
2042
2043 /* If we've got no right sibling then we can't shift an entry right. */
2044 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2045 if (xfs_btree_ptr_is_null(cur, &rptr))
2046 goto out0;
2047
2048 /*
2049 * If the cursor entry is the one that would be moved, don't
2050 * do it... it's too complicated.
2051 */
2052 lrecs = xfs_btree_get_numrecs(left);
2053 if (cur->bc_ptrs[level] >= lrecs)
2054 goto out0;
2055
2056 /* Set up the right neighbor as "right". */
2057 error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2058 if (error)
2059 goto error0;
2060
2061 /* If it's full, it can't take another entry. */
2062 rrecs = xfs_btree_get_numrecs(right);
2063 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2064 goto out0;
2065
2066 XFS_BTREE_STATS_INC(cur, rshift);
2067 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2068
2069 /*
2070 * Make a hole at the start of the right neighbor block, then
2071 * copy the last left block entry to the hole.
2072 */
2073 if (level > 0) {
2074 /* It's a nonleaf. make a hole in the keys and ptrs */
2075 union xfs_btree_key *lkp;
2076 union xfs_btree_ptr *lpp;
2077 union xfs_btree_ptr *rpp;
2078
2079 lkp = xfs_btree_key_addr(cur, lrecs, left);
2080 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2081 rkp = xfs_btree_key_addr(cur, 1, right);
2082 rpp = xfs_btree_ptr_addr(cur, 1, right);
2083
2084#ifdef DEBUG
2085 for (i = rrecs - 1; i >= 0; i--) {
2086 error = xfs_btree_check_ptr(cur, rpp, i, level);
2087 if (error)
2088 goto error0;
2089 }
2090#endif
2091
2092 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2093 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2094
2095#ifdef DEBUG
2096 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2097 if (error)
2098 goto error0;
2099#endif
2100
2101 /* Now put the new data in, and log it. */
2102 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2103 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2104
2105 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2106 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2107
2108 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2109 xfs_btree_key_addr(cur, 2, right)));
2110 } else {
2111 /* It's a leaf. make a hole in the records */
2112 union xfs_btree_rec *lrp;
2113 union xfs_btree_rec *rrp;
2114
2115 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2116 rrp = xfs_btree_rec_addr(cur, 1, right);
2117
2118 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2119
2120 /* Now put the new data in, and log it. */
2121 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2122 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2123
2124 cur->bc_ops->init_key_from_rec(&key, rrp);
2125 rkp = &key;
2126
2127 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2128 xfs_btree_rec_addr(cur, 2, right)));
2129 }
2130
2131 /*
2132 * Decrement and log left's numrecs, bump and log right's numrecs.
2133 */
2134 xfs_btree_set_numrecs(left, --lrecs);
2135 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2136
2137 xfs_btree_set_numrecs(right, ++rrecs);
2138 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2139
2140 /*
2141 * Using a temporary cursor, update the parent key values of the
2142 * block on the right.
2143 */
2144 error = xfs_btree_dup_cursor(cur, &tcur);
2145 if (error)
2146 goto error0;
2147 i = xfs_btree_lastrec(tcur, level);
2148 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2149
2150 error = xfs_btree_increment(tcur, level, &i);
2151 if (error)
2152 goto error1;
2153
2154 error = xfs_btree_updkey(tcur, rkp, level + 1);
2155 if (error)
2156 goto error1;
2157
2158 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2159
2160 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2161 *stat = 1;
2162 return 0;
2163
2164out0:
2165 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2166 *stat = 0;
2167 return 0;
2168
2169error0:
2170 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2171 return error;
2172
2173error1:
2174 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2175 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2176 return error;
2177}
2178
2179/*
2180 * Split cur/level block in half.
2181 * Return new block number and the key to its first
2182 * record (to be inserted into parent).
2183 */
2184STATIC int /* error */
2185xfs_btree_split(
2186 struct xfs_btree_cur *cur,
2187 int level,
2188 union xfs_btree_ptr *ptrp,
2189 union xfs_btree_key *key,
2190 struct xfs_btree_cur **curp,
2191 int *stat) /* success/failure */
2192{
2193 union xfs_btree_ptr lptr; /* left sibling block ptr */
2194 struct xfs_buf *lbp; /* left buffer pointer */
2195 struct xfs_btree_block *left; /* left btree block */
2196 union xfs_btree_ptr rptr; /* right sibling block ptr */
2197 struct xfs_buf *rbp; /* right buffer pointer */
2198 struct xfs_btree_block *right; /* right btree block */
2199 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2200 struct xfs_buf *rrbp; /* right-right buffer pointer */
2201 struct xfs_btree_block *rrblock; /* right-right btree block */
2202 int lrecs;
2203 int rrecs;
2204 int src_index;
2205 int error; /* error return value */
2206#ifdef DEBUG
2207 int i;
2208#endif
2209
2210 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2211 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2212
2213 XFS_BTREE_STATS_INC(cur, split);
2214
2215 /* Set up left block (current one). */
2216 left = xfs_btree_get_block(cur, level, &lbp);
2217
2218#ifdef DEBUG
2219 error = xfs_btree_check_block(cur, left, level, lbp);
2220 if (error)
2221 goto error0;
2222#endif
2223
2224 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2225
2226 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2227 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2228 if (error)
2229 goto error0;
2230 if (*stat == 0)
2231 goto out0;
2232 XFS_BTREE_STATS_INC(cur, alloc);
2233
2234 /* Set up the new block as "right". */
2235 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2236 if (error)
2237 goto error0;
2238
2239 /* Fill in the btree header for the new right block. */
2240 xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2241
2242 /*
2243 * Split the entries between the old and the new block evenly.
2244 * Make sure that if there's an odd number of entries now, that
2245 * each new block will have the same number of entries.
2246 */
2247 lrecs = xfs_btree_get_numrecs(left);
2248 rrecs = lrecs / 2;
2249 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2250 rrecs++;
2251 src_index = (lrecs - rrecs + 1);
2252
2253 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2254
2255 /*
2256 * Copy btree block entries from the left block over to the
2257 * new block, the right. Update the right block and log the
2258 * changes.
2259 */
2260 if (level > 0) {
2261 /* It's a non-leaf. Move keys and pointers. */
2262 union xfs_btree_key *lkp; /* left btree key */
2263 union xfs_btree_ptr *lpp; /* left address pointer */
2264 union xfs_btree_key *rkp; /* right btree key */
2265 union xfs_btree_ptr *rpp; /* right address pointer */
2266
2267 lkp = xfs_btree_key_addr(cur, src_index, left);
2268 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2269 rkp = xfs_btree_key_addr(cur, 1, right);
2270 rpp = xfs_btree_ptr_addr(cur, 1, right);
2271
2272#ifdef DEBUG
2273 for (i = src_index; i < rrecs; i++) {
2274 error = xfs_btree_check_ptr(cur, lpp, i, level);
2275 if (error)
2276 goto error0;
2277 }
2278#endif
2279
2280 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2281 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2282
2283 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2284 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2285
2286 /* Grab the keys to the entries moved to the right block */
2287 xfs_btree_copy_keys(cur, key, rkp, 1);
2288 } else {
2289 /* It's a leaf. Move records. */
2290 union xfs_btree_rec *lrp; /* left record pointer */
2291 union xfs_btree_rec *rrp; /* right record pointer */
2292
2293 lrp = xfs_btree_rec_addr(cur, src_index, left);
2294 rrp = xfs_btree_rec_addr(cur, 1, right);
2295
2296 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2297 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2298
2299 cur->bc_ops->init_key_from_rec(key,
2300 xfs_btree_rec_addr(cur, 1, right));
2301 }
2302
2303
2304 /*
2305 * Find the left block number by looking in the buffer.
2306 * Adjust numrecs, sibling pointers.
2307 */
2308 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2309 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2310 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2311 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2312
2313 lrecs -= rrecs;
2314 xfs_btree_set_numrecs(left, lrecs);
2315 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2316
2317 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2318 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2319
2320 /*
2321 * If there's a block to the new block's right, make that block
2322 * point back to right instead of to left.
2323 */
2324 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2325 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2326 0, &rrblock, &rrbp);
2327 if (error)
2328 goto error0;
2329 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2330 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2331 }
2332 /*
2333 * If the cursor is really in the right block, move it there.
2334 * If it's just pointing past the last entry in left, then we'll
2335 * insert there, so don't change anything in that case.
2336 */
2337 if (cur->bc_ptrs[level] > lrecs + 1) {
2338 xfs_btree_setbuf(cur, level, rbp);
2339 cur->bc_ptrs[level] -= lrecs;
2340 }
2341 /*
2342 * If there are more levels, we'll need another cursor which refers
2343 * the right block, no matter where this cursor was.
2344 */
2345 if (level + 1 < cur->bc_nlevels) {
2346 error = xfs_btree_dup_cursor(cur, curp);
2347 if (error)
2348 goto error0;
2349 (*curp)->bc_ptrs[level + 1]++;
2350 }
2351 *ptrp = rptr;
2352 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2353 *stat = 1;
2354 return 0;
2355out0:
2356 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2357 *stat = 0;
2358 return 0;
2359
2360error0:
2361 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2362 return error;
2363}
2364
2365/*
2366 * Copy the old inode root contents into a real block and make the
2367 * broot point to it.
2368 */
2369int /* error */
2370xfs_btree_new_iroot(
2371 struct xfs_btree_cur *cur, /* btree cursor */
2372 int *logflags, /* logging flags for inode */
2373 int *stat) /* return status - 0 fail */
2374{
2375 struct xfs_buf *cbp; /* buffer for cblock */
2376 struct xfs_btree_block *block; /* btree block */
2377 struct xfs_btree_block *cblock; /* child btree block */
2378 union xfs_btree_key *ckp; /* child key pointer */
2379 union xfs_btree_ptr *cpp; /* child ptr pointer */
2380 union xfs_btree_key *kp; /* pointer to btree key */
2381 union xfs_btree_ptr *pp; /* pointer to block addr */
2382 union xfs_btree_ptr nptr; /* new block addr */
2383 int level; /* btree level */
2384 int error; /* error return code */
2385#ifdef DEBUG
2386 int i; /* loop counter */
2387#endif
2388
2389 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2390 XFS_BTREE_STATS_INC(cur, newroot);
2391
2392 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2393
2394 level = cur->bc_nlevels - 1;
2395
2396 block = xfs_btree_get_iroot(cur);
2397 pp = xfs_btree_ptr_addr(cur, 1, block);
2398
2399 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2400 error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2401 if (error)
2402 goto error0;
2403 if (*stat == 0) {
2404 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2405 return 0;
2406 }
2407 XFS_BTREE_STATS_INC(cur, alloc);
2408
2409 /* Copy the root into a real block. */
2410 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2411 if (error)
2412 goto error0;
2413
2414 memcpy(cblock, block, xfs_btree_block_len(cur));
2415
2416 be16_add_cpu(&block->bb_level, 1);
2417 xfs_btree_set_numrecs(block, 1);
2418 cur->bc_nlevels++;
2419 cur->bc_ptrs[level + 1] = 1;
2420
2421 kp = xfs_btree_key_addr(cur, 1, block);
2422 ckp = xfs_btree_key_addr(cur, 1, cblock);
2423 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2424
2425 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2426#ifdef DEBUG
2427 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2428 error = xfs_btree_check_ptr(cur, pp, i, level);
2429 if (error)
2430 goto error0;
2431 }
2432#endif
2433 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2434
2435#ifdef DEBUG
2436 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2437 if (error)
2438 goto error0;
2439#endif
2440 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2441
2442 xfs_iroot_realloc(cur->bc_private.b.ip,
2443 1 - xfs_btree_get_numrecs(cblock),
2444 cur->bc_private.b.whichfork);
2445
2446 xfs_btree_setbuf(cur, level, cbp);
2447
2448 /*
2449 * Do all this logging at the end so that
2450 * the root is at the right level.
2451 */
2452 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2453 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2454 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455
2456 *logflags |=
2457 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2458 *stat = 1;
2459 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2460 return 0;
2461error0:
2462 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2463 return error;
2464}
2465
2466/*
2467 * Allocate a new root block, fill it in.
2468 */
2469STATIC int /* error */
2470xfs_btree_new_root(
2471 struct xfs_btree_cur *cur, /* btree cursor */
2472 int *stat) /* success/failure */
2473{
2474 struct xfs_btree_block *block; /* one half of the old root block */
2475 struct xfs_buf *bp; /* buffer containing block */
2476 int error; /* error return value */
2477 struct xfs_buf *lbp; /* left buffer pointer */
2478 struct xfs_btree_block *left; /* left btree block */
2479 struct xfs_buf *nbp; /* new (root) buffer */
2480 struct xfs_btree_block *new; /* new (root) btree block */
2481 int nptr; /* new value for key index, 1 or 2 */
2482 struct xfs_buf *rbp; /* right buffer pointer */
2483 struct xfs_btree_block *right; /* right btree block */
2484 union xfs_btree_ptr rptr;
2485 union xfs_btree_ptr lptr;
2486
2487 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2488 XFS_BTREE_STATS_INC(cur, newroot);
2489
2490 /* initialise our start point from the cursor */
2491 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2492
2493 /* Allocate the new block. If we can't do it, we're toast. Give up. */
2494 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2495 if (error)
2496 goto error0;
2497 if (*stat == 0)
2498 goto out0;
2499 XFS_BTREE_STATS_INC(cur, alloc);
2500
2501 /* Set up the new block. */
2502 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2503 if (error)
2504 goto error0;
2505
2506 /* Set the root in the holding structure increasing the level by 1. */
2507 cur->bc_ops->set_root(cur, &lptr, 1);
2508
2509 /*
2510 * At the previous root level there are now two blocks: the old root,
2511 * and the new block generated when it was split. We don't know which
2512 * one the cursor is pointing at, so we set up variables "left" and
2513 * "right" for each case.
2514 */
2515 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2516
2517#ifdef DEBUG
2518 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2519 if (error)
2520 goto error0;
2521#endif
2522
2523 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2524 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2525 /* Our block is left, pick up the right block. */
2526 lbp = bp;
2527 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2528 left = block;
2529 error = xfs_btree_read_buf_block(cur, &rptr,
2530 cur->bc_nlevels - 1, 0, &right, &rbp);
2531 if (error)
2532 goto error0;
2533 bp = rbp;
2534 nptr = 1;
2535 } else {
2536 /* Our block is right, pick up the left block. */
2537 rbp = bp;
2538 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2539 right = block;
2540 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2541 error = xfs_btree_read_buf_block(cur, &lptr,
2542 cur->bc_nlevels - 1, 0, &left, &lbp);
2543 if (error)
2544 goto error0;
2545 bp = lbp;
2546 nptr = 2;
2547 }
2548 /* Fill in the new block's btree header and log it. */
2549 xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2550 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2551 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2552 !xfs_btree_ptr_is_null(cur, &rptr));
2553
2554 /* Fill in the key data in the new root. */
2555 if (xfs_btree_get_level(left) > 0) {
2556 xfs_btree_copy_keys(cur,
2557 xfs_btree_key_addr(cur, 1, new),
2558 xfs_btree_key_addr(cur, 1, left), 1);
2559 xfs_btree_copy_keys(cur,
2560 xfs_btree_key_addr(cur, 2, new),
2561 xfs_btree_key_addr(cur, 1, right), 1);
2562 } else {
2563 cur->bc_ops->init_key_from_rec(
2564 xfs_btree_key_addr(cur, 1, new),
2565 xfs_btree_rec_addr(cur, 1, left));
2566 cur->bc_ops->init_key_from_rec(
2567 xfs_btree_key_addr(cur, 2, new),
2568 xfs_btree_rec_addr(cur, 1, right));
2569 }
2570 xfs_btree_log_keys(cur, nbp, 1, 2);
2571
2572 /* Fill in the pointer data in the new root. */
2573 xfs_btree_copy_ptrs(cur,
2574 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2575 xfs_btree_copy_ptrs(cur,
2576 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2577 xfs_btree_log_ptrs(cur, nbp, 1, 2);
2578
2579 /* Fix up the cursor. */
2580 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2581 cur->bc_ptrs[cur->bc_nlevels] = nptr;
2582 cur->bc_nlevels++;
2583 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2584 *stat = 1;
2585 return 0;
2586error0:
2587 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2588 return error;
2589out0:
2590 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2591 *stat = 0;
2592 return 0;
2593}
2594
2595STATIC int
2596xfs_btree_make_block_unfull(
2597 struct xfs_btree_cur *cur, /* btree cursor */
2598 int level, /* btree level */
2599 int numrecs,/* # of recs in block */
2600 int *oindex,/* old tree index */
2601 int *index, /* new tree index */
2602 union xfs_btree_ptr *nptr, /* new btree ptr */
2603 struct xfs_btree_cur **ncur, /* new btree cursor */
2604 union xfs_btree_rec *nrec, /* new record */
2605 int *stat)
2606{
2607 union xfs_btree_key key; /* new btree key value */
2608 int error = 0;
2609
2610 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2611 level == cur->bc_nlevels - 1) {
2612 struct xfs_inode *ip = cur->bc_private.b.ip;
2613
2614 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2615 /* A root block that can be made bigger. */
2616
2617 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2618 } else {
2619 /* A root block that needs replacing */
2620 int logflags = 0;
2621
2622 error = xfs_btree_new_iroot(cur, &logflags, stat);
2623 if (error || *stat == 0)
2624 return error;
2625
2626 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2627 }
2628
2629 return 0;
2630 }
2631
2632 /* First, try shifting an entry to the right neighbor. */
2633 error = xfs_btree_rshift(cur, level, stat);
2634 if (error || *stat)
2635 return error;
2636
2637 /* Next, try shifting an entry to the left neighbor. */
2638 error = xfs_btree_lshift(cur, level, stat);
2639 if (error)
2640 return error;
2641
2642 if (*stat) {
2643 *oindex = *index = cur->bc_ptrs[level];
2644 return 0;
2645 }
2646
2647 /*
2648 * Next, try splitting the current block in half.
2649 *
2650 * If this works we have to re-set our variables because we
2651 * could be in a different block now.
2652 */
2653 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2654 if (error || *stat == 0)
2655 return error;
2656
2657
2658 *index = cur->bc_ptrs[level];
2659 cur->bc_ops->init_rec_from_key(&key, nrec);
2660 return 0;
2661}
2662
2663/*
2664 * Insert one record/level. Return information to the caller
2665 * allowing the next level up to proceed if necessary.
2666 */
2667STATIC int
2668xfs_btree_insrec(
2669 struct xfs_btree_cur *cur, /* btree cursor */
2670 int level, /* level to insert record at */
2671 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2672 union xfs_btree_rec *recp, /* i/o: record data inserted */
2673 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2674 int *stat) /* success/failure */
2675{
2676 struct xfs_btree_block *block; /* btree block */
2677 struct xfs_buf *bp; /* buffer for block */
2678 union xfs_btree_key key; /* btree key */
2679 union xfs_btree_ptr nptr; /* new block ptr */
2680 struct xfs_btree_cur *ncur; /* new btree cursor */
2681 union xfs_btree_rec nrec; /* new record count */
2682 int optr; /* old key/record index */
2683 int ptr; /* key/record index */
2684 int numrecs;/* number of records */
2685 int error; /* error return value */
2686#ifdef DEBUG
2687 int i;
2688#endif
2689
2690 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2691 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2692
2693 ncur = NULL;
2694
2695 /*
2696 * If we have an external root pointer, and we've made it to the
2697 * root level, allocate a new root block and we're done.
2698 */
2699 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2700 (level >= cur->bc_nlevels)) {
2701 error = xfs_btree_new_root(cur, stat);
2702 xfs_btree_set_ptr_null(cur, ptrp);
2703
2704 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2705 return error;
2706 }
2707
2708 /* If we're off the left edge, return failure. */
2709 ptr = cur->bc_ptrs[level];
2710 if (ptr == 0) {
2711 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2712 *stat = 0;
2713 return 0;
2714 }
2715
2716 /* Make a key out of the record data to be inserted, and save it. */
2717 cur->bc_ops->init_key_from_rec(&key, recp);
2718
2719 optr = ptr;
2720
2721 XFS_BTREE_STATS_INC(cur, insrec);
2722
2723 /* Get pointers to the btree buffer and block. */
2724 block = xfs_btree_get_block(cur, level, &bp);
2725 numrecs = xfs_btree_get_numrecs(block);
2726
2727#ifdef DEBUG
2728 error = xfs_btree_check_block(cur, block, level, bp);
2729 if (error)
2730 goto error0;
2731
2732 /* Check that the new entry is being inserted in the right place. */
2733 if (ptr <= numrecs) {
2734 if (level == 0) {
2735 ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2736 xfs_btree_rec_addr(cur, ptr, block)));
2737 } else {
2738 ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2739 xfs_btree_key_addr(cur, ptr, block)));
2740 }
2741 }
2742#endif
2743
2744 /*
2745 * If the block is full, we can't insert the new entry until we
2746 * make the block un-full.
2747 */
2748 xfs_btree_set_ptr_null(cur, &nptr);
2749 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2750 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2751 &optr, &ptr, &nptr, &ncur, &nrec, stat);
2752 if (error || *stat == 0)
2753 goto error0;
2754 }
2755
2756 /*
2757 * The current block may have changed if the block was
2758 * previously full and we have just made space in it.
2759 */
2760 block = xfs_btree_get_block(cur, level, &bp);
2761 numrecs = xfs_btree_get_numrecs(block);
2762
2763#ifdef DEBUG
2764 error = xfs_btree_check_block(cur, block, level, bp);
2765 if (error)
2766 return error;
2767#endif
2768
2769 /*
2770 * At this point we know there's room for our new entry in the block
2771 * we're pointing at.
2772 */
2773 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2774
2775 if (level > 0) {
2776 /* It's a nonleaf. make a hole in the keys and ptrs */
2777 union xfs_btree_key *kp;
2778 union xfs_btree_ptr *pp;
2779
2780 kp = xfs_btree_key_addr(cur, ptr, block);
2781 pp = xfs_btree_ptr_addr(cur, ptr, block);
2782
2783#ifdef DEBUG
2784 for (i = numrecs - ptr; i >= 0; i--) {
2785 error = xfs_btree_check_ptr(cur, pp, i, level);
2786 if (error)
2787 return error;
2788 }
2789#endif
2790
2791 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2792 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2793
2794#ifdef DEBUG
2795 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2796 if (error)
2797 goto error0;
2798#endif
2799
2800 /* Now put the new data in, bump numrecs and log it. */
2801 xfs_btree_copy_keys(cur, kp, &key, 1);
2802 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2803 numrecs++;
2804 xfs_btree_set_numrecs(block, numrecs);
2805 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2806 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2807#ifdef DEBUG
2808 if (ptr < numrecs) {
2809 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2810 xfs_btree_key_addr(cur, ptr + 1, block)));
2811 }
2812#endif
2813 } else {
2814 /* It's a leaf. make a hole in the records */
2815 union xfs_btree_rec *rp;
2816
2817 rp = xfs_btree_rec_addr(cur, ptr, block);
2818
2819 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2820
2821 /* Now put the new data in, bump numrecs and log it. */
2822 xfs_btree_copy_recs(cur, rp, recp, 1);
2823 xfs_btree_set_numrecs(block, ++numrecs);
2824 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2825#ifdef DEBUG
2826 if (ptr < numrecs) {
2827 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2828 xfs_btree_rec_addr(cur, ptr + 1, block)));
2829 }
2830#endif
2831 }
2832
2833 /* Log the new number of records in the btree header. */
2834 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2835
2836 /* If we inserted at the start of a block, update the parents' keys. */
2837 if (optr == 1) {
2838 error = xfs_btree_updkey(cur, &key, level + 1);
2839 if (error)
2840 goto error0;
2841 }
2842
2843 /*
2844 * If we are tracking the last record in the tree and
2845 * we are at the far right edge of the tree, update it.
2846 */
2847 if (xfs_btree_is_lastrec(cur, block, level)) {
2848 cur->bc_ops->update_lastrec(cur, block, recp,
2849 ptr, LASTREC_INSREC);
2850 }
2851
2852 /*
2853 * Return the new block number, if any.
2854 * If there is one, give back a record value and a cursor too.
2855 */
2856 *ptrp = nptr;
2857 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2858 *recp = nrec;
2859 *curp = ncur;
2860 }
2861
2862 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2863 *stat = 1;
2864 return 0;
2865
2866error0:
2867 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2868 return error;
2869}
2870
2871/*
2872 * Insert the record at the point referenced by cur.
2873 *
2874 * A multi-level split of the tree on insert will invalidate the original
2875 * cursor. All callers of this function should assume that the cursor is
2876 * no longer valid and revalidate it.
2877 */
2878int
2879xfs_btree_insert(
2880 struct xfs_btree_cur *cur,
2881 int *stat)
2882{
2883 int error; /* error return value */
2884 int i; /* result value, 0 for failure */
2885 int level; /* current level number in btree */
2886 union xfs_btree_ptr nptr; /* new block number (split result) */
2887 struct xfs_btree_cur *ncur; /* new cursor (split result) */
2888 struct xfs_btree_cur *pcur; /* previous level's cursor */
2889 union xfs_btree_rec rec; /* record to insert */
2890
2891 level = 0;
2892 ncur = NULL;
2893 pcur = cur;
2894
2895 xfs_btree_set_ptr_null(cur, &nptr);
2896 cur->bc_ops->init_rec_from_cur(cur, &rec);
2897
2898 /*
2899 * Loop going up the tree, starting at the leaf level.
2900 * Stop when we don't get a split block, that must mean that
2901 * the insert is finished with this level.
2902 */
2903 do {
2904 /*
2905 * Insert nrec/nptr into this level of the tree.
2906 * Note if we fail, nptr will be null.
2907 */
2908 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2909 if (error) {
2910 if (pcur != cur)
2911 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2912 goto error0;
2913 }
2914
2915 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2916 level++;
2917
2918 /*
2919 * See if the cursor we just used is trash.
2920 * Can't trash the caller's cursor, but otherwise we should
2921 * if ncur is a new cursor or we're about to be done.
2922 */
2923 if (pcur != cur &&
2924 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2925 /* Save the state from the cursor before we trash it */
2926 if (cur->bc_ops->update_cursor)
2927 cur->bc_ops->update_cursor(pcur, cur);
2928 cur->bc_nlevels = pcur->bc_nlevels;
2929 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2930 }
2931 /* If we got a new cursor, switch to it. */
2932 if (ncur) {
2933 pcur = ncur;
2934 ncur = NULL;
2935 }
2936 } while (!xfs_btree_ptr_is_null(cur, &nptr));
2937
2938 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2939 *stat = i;
2940 return 0;
2941error0:
2942 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2943 return error;
2944}
2945
2946/*
2947 * Try to merge a non-leaf block back into the inode root.
2948 *
2949 * Note: the killroot names comes from the fact that we're effectively
2950 * killing the old root block. But because we can't just delete the
2951 * inode we have to copy the single block it was pointing to into the
2952 * inode.
2953 */
2954int
2955xfs_btree_kill_iroot(
2956 struct xfs_btree_cur *cur)
2957{
2958 int whichfork = cur->bc_private.b.whichfork;
2959 struct xfs_inode *ip = cur->bc_private.b.ip;
2960 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2961 struct xfs_btree_block *block;
2962 struct xfs_btree_block *cblock;
2963 union xfs_btree_key *kp;
2964 union xfs_btree_key *ckp;
2965 union xfs_btree_ptr *pp;
2966 union xfs_btree_ptr *cpp;
2967 struct xfs_buf *cbp;
2968 int level;
2969 int index;
2970 int numrecs;
2971#ifdef DEBUG
2972 union xfs_btree_ptr ptr;
2973 int i;
2974#endif
2975
2976 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2977
2978 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2979 ASSERT(cur->bc_nlevels > 1);
2980
2981 /*
2982 * Don't deal with the root block needs to be a leaf case.
2983 * We're just going to turn the thing back into extents anyway.
2984 */
2985 level = cur->bc_nlevels - 1;
2986 if (level == 1)
2987 goto out0;
2988
2989 /*
2990 * Give up if the root has multiple children.
2991 */
2992 block = xfs_btree_get_iroot(cur);
2993 if (xfs_btree_get_numrecs(block) != 1)
2994 goto out0;
2995
2996 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2997 numrecs = xfs_btree_get_numrecs(cblock);
2998
2999 /*
3000 * Only do this if the next level will fit.
3001 * Then the data must be copied up to the inode,
3002 * instead of freeing the root you free the next level.
3003 */
3004 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3005 goto out0;
3006
3007 XFS_BTREE_STATS_INC(cur, killroot);
3008
3009#ifdef DEBUG
3010 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3011 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3012 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3013 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3014#endif
3015
3016 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3017 if (index) {
3018 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3019 cur->bc_private.b.whichfork);
3020 block = ifp->if_broot;
3021 }
3022
3023 be16_add_cpu(&block->bb_numrecs, index);
3024 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3025
3026 kp = xfs_btree_key_addr(cur, 1, block);
3027 ckp = xfs_btree_key_addr(cur, 1, cblock);
3028 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3029
3030 pp = xfs_btree_ptr_addr(cur, 1, block);
3031 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3032#ifdef DEBUG
3033 for (i = 0; i < numrecs; i++) {
3034 int error;
3035
3036 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3037 if (error) {
3038 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3039 return error;
3040 }
3041 }
3042#endif
3043 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3044
3045 cur->bc_ops->free_block(cur, cbp);
3046 XFS_BTREE_STATS_INC(cur, free);
3047
3048 cur->bc_bufs[level - 1] = NULL;
3049 be16_add_cpu(&block->bb_level, -1);
3050 xfs_trans_log_inode(cur->bc_tp, ip,
3051 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
3052 cur->bc_nlevels--;
3053out0:
3054 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3055 return 0;
3056}
3057
3058STATIC int
3059xfs_btree_dec_cursor(
3060 struct xfs_btree_cur *cur,
3061 int level,
3062 int *stat)
3063{
3064 int error;
3065 int i;
3066
3067 if (level > 0) {
3068 error = xfs_btree_decrement(cur, level, &i);
3069 if (error)
3070 return error;
3071 }
3072
3073 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3074 *stat = 1;
3075 return 0;
3076}
3077
3078/*
3079 * Single level of the btree record deletion routine.
3080 * Delete record pointed to by cur/level.
3081 * Remove the record from its block then rebalance the tree.
3082 * Return 0 for error, 1 for done, 2 to go on to the next level.
3083 */
3084STATIC int /* error */
3085xfs_btree_delrec(
3086 struct xfs_btree_cur *cur, /* btree cursor */
3087 int level, /* level removing record from */
3088 int *stat) /* fail/done/go-on */
3089{
3090 struct xfs_btree_block *block; /* btree block */
3091 union xfs_btree_ptr cptr; /* current block ptr */
3092 struct xfs_buf *bp; /* buffer for block */
3093 int error; /* error return value */
3094 int i; /* loop counter */
3095 union xfs_btree_key key; /* storage for keyp */
3096 union xfs_btree_key *keyp = &key; /* passed to the next level */
3097 union xfs_btree_ptr lptr; /* left sibling block ptr */
3098 struct xfs_buf *lbp; /* left buffer pointer */
3099 struct xfs_btree_block *left; /* left btree block */
3100 int lrecs = 0; /* left record count */
3101 int ptr; /* key/record index */
3102 union xfs_btree_ptr rptr; /* right sibling block ptr */
3103 struct xfs_buf *rbp; /* right buffer pointer */
3104 struct xfs_btree_block *right; /* right btree block */
3105 struct xfs_btree_block *rrblock; /* right-right btree block */
3106 struct xfs_buf *rrbp; /* right-right buffer pointer */
3107 int rrecs = 0; /* right record count */
3108 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3109 int numrecs; /* temporary numrec count */
3110
3111 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3112 XFS_BTREE_TRACE_ARGI(cur, level);
3113
3114 tcur = NULL;
3115
3116 /* Get the index of the entry being deleted, check for nothing there. */
3117 ptr = cur->bc_ptrs[level];
3118 if (ptr == 0) {
3119 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120 *stat = 0;
3121 return 0;
3122 }
3123
3124 /* Get the buffer & block containing the record or key/ptr. */
3125 block = xfs_btree_get_block(cur, level, &bp);
3126 numrecs = xfs_btree_get_numrecs(block);
3127
3128#ifdef DEBUG
3129 error = xfs_btree_check_block(cur, block, level, bp);
3130 if (error)
3131 goto error0;
3132#endif
3133
3134 /* Fail if we're off the end of the block. */
3135 if (ptr > numrecs) {
3136 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3137 *stat = 0;
3138 return 0;
3139 }
3140
3141 XFS_BTREE_STATS_INC(cur, delrec);
3142 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3143
3144 /* Excise the entries being deleted. */
3145 if (level > 0) {
3146 /* It's a nonleaf. operate on keys and ptrs */
3147 union xfs_btree_key *lkp;
3148 union xfs_btree_ptr *lpp;
3149
3150 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3151 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3152
3153#ifdef DEBUG
3154 for (i = 0; i < numrecs - ptr; i++) {
3155 error = xfs_btree_check_ptr(cur, lpp, i, level);
3156 if (error)
3157 goto error0;
3158 }
3159#endif
3160
3161 if (ptr < numrecs) {
3162 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3163 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3164 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3165 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3166 }
3167
3168 /*
3169 * If it's the first record in the block, we'll need to pass a
3170 * key up to the next level (updkey).
3171 */
3172 if (ptr == 1)
3173 keyp = xfs_btree_key_addr(cur, 1, block);
3174 } else {
3175 /* It's a leaf. operate on records */
3176 if (ptr < numrecs) {
3177 xfs_btree_shift_recs(cur,
3178 xfs_btree_rec_addr(cur, ptr + 1, block),
3179 -1, numrecs - ptr);
3180 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3181 }
3182
3183 /*
3184 * If it's the first record in the block, we'll need a key
3185 * structure to pass up to the next level (updkey).
3186 */
3187 if (ptr == 1) {
3188 cur->bc_ops->init_key_from_rec(&key,
3189 xfs_btree_rec_addr(cur, 1, block));
3190 keyp = &key;
3191 }
3192 }
3193
3194 /*
3195 * Decrement and log the number of entries in the block.
3196 */
3197 xfs_btree_set_numrecs(block, --numrecs);
3198 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3199
3200 /*
3201 * If we are tracking the last record in the tree and
3202 * we are at the far right edge of the tree, update it.
3203 */
3204 if (xfs_btree_is_lastrec(cur, block, level)) {
3205 cur->bc_ops->update_lastrec(cur, block, NULL,
3206 ptr, LASTREC_DELREC);
3207 }
3208
3209 /*
3210 * We're at the root level. First, shrink the root block in-memory.
3211 * Try to get rid of the next level down. If we can't then there's
3212 * nothing left to do.
3213 */
3214 if (level == cur->bc_nlevels - 1) {
3215 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3216 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3217 cur->bc_private.b.whichfork);
3218
3219 error = xfs_btree_kill_iroot(cur);
3220 if (error)
3221 goto error0;
3222
3223 error = xfs_btree_dec_cursor(cur, level, stat);
3224 if (error)
3225 goto error0;
3226 *stat = 1;
3227 return 0;
3228 }
3229
3230 /*
3231 * If this is the root level, and there's only one entry left,
3232 * and it's NOT the leaf level, then we can get rid of this
3233 * level.
3234 */
3235 if (numrecs == 1 && level > 0) {
3236 union xfs_btree_ptr *pp;
3237 /*
3238 * pp is still set to the first pointer in the block.
3239 * Make it the new root of the btree.
3240 */
3241 pp = xfs_btree_ptr_addr(cur, 1, block);
3242 error = cur->bc_ops->kill_root(cur, bp, level, pp);
3243 if (error)
3244 goto error0;
3245 } else if (level > 0) {
3246 error = xfs_btree_dec_cursor(cur, level, stat);
3247 if (error)
3248 goto error0;
3249 }
3250 *stat = 1;
3251 return 0;
3252 }
3253
3254 /*
3255 * If we deleted the leftmost entry in the block, update the
3256 * key values above us in the tree.
3257 */
3258 if (ptr == 1) {
3259 error = xfs_btree_updkey(cur, keyp, level + 1);
3260 if (error)
3261 goto error0;
3262 }
3263
3264 /*
3265 * If the number of records remaining in the block is at least
3266 * the minimum, we're done.
3267 */
3268 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3269 error = xfs_btree_dec_cursor(cur, level, stat);
3270 if (error)
3271 goto error0;
3272 return 0;
3273 }
3274
3275 /*
3276 * Otherwise, we have to move some records around to keep the
3277 * tree balanced. Look at the left and right sibling blocks to
3278 * see if we can re-balance by moving only one record.
3279 */
3280 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3281 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3282
3283 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3284 /*
3285 * One child of root, need to get a chance to copy its contents
3286 * into the root and delete it. Can't go up to next level,
3287 * there's nothing to delete there.
3288 */
3289 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3290 xfs_btree_ptr_is_null(cur, &lptr) &&
3291 level == cur->bc_nlevels - 2) {
3292 error = xfs_btree_kill_iroot(cur);
3293 if (!error)
3294 error = xfs_btree_dec_cursor(cur, level, stat);
3295 if (error)
3296 goto error0;
3297 return 0;
3298 }
3299 }
3300
3301 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3302 !xfs_btree_ptr_is_null(cur, &lptr));
3303
3304 /*
3305 * Duplicate the cursor so our btree manipulations here won't
3306 * disrupt the next level up.
3307 */
3308 error = xfs_btree_dup_cursor(cur, &tcur);
3309 if (error)
3310 goto error0;
3311
3312 /*
3313 * If there's a right sibling, see if it's ok to shift an entry
3314 * out of it.
3315 */
3316 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3317 /*
3318 * Move the temp cursor to the last entry in the next block.
3319 * Actually any entry but the first would suffice.
3320 */
3321 i = xfs_btree_lastrec(tcur, level);
3322 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3323
3324 error = xfs_btree_increment(tcur, level, &i);
3325 if (error)
3326 goto error0;
3327 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3328
3329 i = xfs_btree_lastrec(tcur, level);
3330 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3331
3332 /* Grab a pointer to the block. */
3333 right = xfs_btree_get_block(tcur, level, &rbp);
3334#ifdef DEBUG
3335 error = xfs_btree_check_block(tcur, right, level, rbp);
3336 if (error)
3337 goto error0;
3338#endif
3339 /* Grab the current block number, for future use. */
3340 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3341
3342 /*
3343 * If right block is full enough so that removing one entry
3344 * won't make it too empty, and left-shifting an entry out
3345 * of right to us works, we're done.
3346 */
3347 if (xfs_btree_get_numrecs(right) - 1 >=
3348 cur->bc_ops->get_minrecs(tcur, level)) {
3349 error = xfs_btree_lshift(tcur, level, &i);
3350 if (error)
3351 goto error0;
3352 if (i) {
3353 ASSERT(xfs_btree_get_numrecs(block) >=
3354 cur->bc_ops->get_minrecs(tcur, level));
3355
3356 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3357 tcur = NULL;
3358
3359 error = xfs_btree_dec_cursor(cur, level, stat);
3360 if (error)
3361 goto error0;
3362 return 0;
3363 }
3364 }
3365
3366 /*
3367 * Otherwise, grab the number of records in right for
3368 * future reference, and fix up the temp cursor to point
3369 * to our block again (last record).
3370 */
3371 rrecs = xfs_btree_get_numrecs(right);
3372 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3373 i = xfs_btree_firstrec(tcur, level);
3374 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3375
3376 error = xfs_btree_decrement(tcur, level, &i);
3377 if (error)
3378 goto error0;
3379 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3380 }
3381 }
3382
3383 /*
3384 * If there's a left sibling, see if it's ok to shift an entry
3385 * out of it.
3386 */
3387 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3388 /*
3389 * Move the temp cursor to the first entry in the
3390 * previous block.
3391 */
3392 i = xfs_btree_firstrec(tcur, level);
3393 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3394
3395 error = xfs_btree_decrement(tcur, level, &i);
3396 if (error)
3397 goto error0;
3398 i = xfs_btree_firstrec(tcur, level);
3399 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3400
3401 /* Grab a pointer to the block. */
3402 left = xfs_btree_get_block(tcur, level, &lbp);
3403#ifdef DEBUG
3404 error = xfs_btree_check_block(cur, left, level, lbp);
3405 if (error)
3406 goto error0;
3407#endif
3408 /* Grab the current block number, for future use. */
3409 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3410
3411 /*
3412 * If left block is full enough so that removing one entry
3413 * won't make it too empty, and right-shifting an entry out
3414 * of left to us works, we're done.
3415 */
3416 if (xfs_btree_get_numrecs(left) - 1 >=
3417 cur->bc_ops->get_minrecs(tcur, level)) {
3418 error = xfs_btree_rshift(tcur, level, &i);
3419 if (error)
3420 goto error0;
3421 if (i) {
3422 ASSERT(xfs_btree_get_numrecs(block) >=
3423 cur->bc_ops->get_minrecs(tcur, level));
3424 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3425 tcur = NULL;
3426 if (level == 0)
3427 cur->bc_ptrs[0]++;
3428 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3429 *stat = 1;
3430 return 0;
3431 }
3432 }
3433
3434 /*
3435 * Otherwise, grab the number of records in right for
3436 * future reference.
3437 */
3438 lrecs = xfs_btree_get_numrecs(left);
3439 }
3440
3441 /* Delete the temp cursor, we're done with it. */
3442 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3443 tcur = NULL;
3444
3445 /* If here, we need to do a join to keep the tree balanced. */
3446 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3447
3448 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3449 lrecs + xfs_btree_get_numrecs(block) <=
3450 cur->bc_ops->get_maxrecs(cur, level)) {
3451 /*
3452 * Set "right" to be the starting block,
3453 * "left" to be the left neighbor.
3454 */
3455 rptr = cptr;
3456 right = block;
3457 rbp = bp;
3458 error = xfs_btree_read_buf_block(cur, &lptr, level,
3459 0, &left, &lbp);
3460 if (error)
3461 goto error0;
3462
3463 /*
3464 * If that won't work, see if we can join with the right neighbor block.
3465 */
3466 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3467 rrecs + xfs_btree_get_numrecs(block) <=
3468 cur->bc_ops->get_maxrecs(cur, level)) {
3469 /*
3470 * Set "left" to be the starting block,
3471 * "right" to be the right neighbor.
3472 */
3473 lptr = cptr;
3474 left = block;
3475 lbp = bp;
3476 error = xfs_btree_read_buf_block(cur, &rptr, level,
3477 0, &right, &rbp);
3478 if (error)
3479 goto error0;
3480
3481 /*
3482 * Otherwise, we can't fix the imbalance.
3483 * Just return. This is probably a logic error, but it's not fatal.
3484 */
3485 } else {
3486 error = xfs_btree_dec_cursor(cur, level, stat);
3487 if (error)
3488 goto error0;
3489 return 0;
3490 }
3491
3492 rrecs = xfs_btree_get_numrecs(right);
3493 lrecs = xfs_btree_get_numrecs(left);
3494
3495 /*
3496 * We're now going to join "left" and "right" by moving all the stuff
3497 * in "right" to "left" and deleting "right".
3498 */
3499 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3500 if (level > 0) {
3501 /* It's a non-leaf. Move keys and pointers. */
3502 union xfs_btree_key *lkp; /* left btree key */
3503 union xfs_btree_ptr *lpp; /* left address pointer */
3504 union xfs_btree_key *rkp; /* right btree key */
3505 union xfs_btree_ptr *rpp; /* right address pointer */
3506
3507 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3508 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3509 rkp = xfs_btree_key_addr(cur, 1, right);
3510 rpp = xfs_btree_ptr_addr(cur, 1, right);
3511#ifdef DEBUG
3512 for (i = 1; i < rrecs; i++) {
3513 error = xfs_btree_check_ptr(cur, rpp, i, level);
3514 if (error)
3515 goto error0;
3516 }
3517#endif
3518 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3519 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3520
3521 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3522 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3523 } else {
3524 /* It's a leaf. Move records. */
3525 union xfs_btree_rec *lrp; /* left record pointer */
3526 union xfs_btree_rec *rrp; /* right record pointer */
3527
3528 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3529 rrp = xfs_btree_rec_addr(cur, 1, right);
3530
3531 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3532 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3533 }
3534
3535 XFS_BTREE_STATS_INC(cur, join);
3536
3537 /*
3538 * Fix up the the number of records and right block pointer in the
3539 * surviving block, and log it.
3540 */
3541 xfs_btree_set_numrecs(left, lrecs + rrecs);
3542 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3543 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3544 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3545
3546 /* If there is a right sibling, point it to the remaining block. */
3547 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3548 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3549 error = xfs_btree_read_buf_block(cur, &cptr, level,
3550 0, &rrblock, &rrbp);
3551 if (error)
3552 goto error0;
3553 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3554 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3555 }
3556
3557 /* Free the deleted block. */
3558 error = cur->bc_ops->free_block(cur, rbp);
3559 if (error)
3560 goto error0;
3561 XFS_BTREE_STATS_INC(cur, free);
3562
3563 /*
3564 * If we joined with the left neighbor, set the buffer in the
3565 * cursor to the left block, and fix up the index.
3566 */
3567 if (bp != lbp) {
3568 cur->bc_bufs[level] = lbp;
3569 cur->bc_ptrs[level] += lrecs;
3570 cur->bc_ra[level] = 0;
3571 }
3572 /*
3573 * If we joined with the right neighbor and there's a level above
3574 * us, increment the cursor at that level.
3575 */
3576 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3577 (level + 1 < cur->bc_nlevels)) {
3578 error = xfs_btree_increment(cur, level + 1, &i);
3579 if (error)
3580 goto error0;
3581 }
3582
3583 /*
3584 * Readjust the ptr at this level if it's not a leaf, since it's
3585 * still pointing at the deletion point, which makes the cursor
3586 * inconsistent. If this makes the ptr 0, the caller fixes it up.
3587 * We can't use decrement because it would change the next level up.
3588 */
3589 if (level > 0)
3590 cur->bc_ptrs[level]--;
3591
3592 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3593 /* Return value means the next level up has something to do. */
3594 *stat = 2;
3595 return 0;
3596
3597error0:
3598 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3599 if (tcur)
3600 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3601 return error;
3602}
3603
3604/*
3605 * Delete the record pointed to by cur.
3606 * The cursor refers to the place where the record was (could be inserted)
3607 * when the operation returns.
3608 */
3609int /* error */
3610xfs_btree_delete(
3611 struct xfs_btree_cur *cur,
3612 int *stat) /* success/failure */
3613{
3614 int error; /* error return value */
3615 int level;
3616 int i;
3617
3618 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3619
3620 /*
3621 * Go up the tree, starting at leaf level.
3622 *
3623 * If 2 is returned then a join was done; go to the next level.
3624 * Otherwise we are done.
3625 */
3626 for (level = 0, i = 2; i == 2; level++) {
3627 error = xfs_btree_delrec(cur, level, &i);
3628 if (error)
3629 goto error0;
3630 }
3631
3632 if (i == 0) {
3633 for (level = 1; level < cur->bc_nlevels; level++) {
3634 if (cur->bc_ptrs[level] == 0) {
3635 error = xfs_btree_decrement(cur, level, &i);
3636 if (error)
3637 goto error0;
3638 break;
3639 }
3640 }
3641 }
3642
3643 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3644 *stat = i;
3645 return 0;
3646error0:
3647 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3648 return error;
3649}
3650
3651/*
3652 * Get the data from the pointed-to record.
3653 */
3654int /* error */
3655xfs_btree_get_rec(
3656 struct xfs_btree_cur *cur, /* btree cursor */
3657 union xfs_btree_rec **recp, /* output: btree record */
3658 int *stat) /* output: success/failure */
3659{
3660 struct xfs_btree_block *block; /* btree block */
3661 struct xfs_buf *bp; /* buffer pointer */
3662 int ptr; /* record number */
3663#ifdef DEBUG
3664 int error; /* error return value */
3665#endif
3666
3667 ptr = cur->bc_ptrs[0];
3668 block = xfs_btree_get_block(cur, 0, &bp);
3669
3670#ifdef DEBUG
3671 error = xfs_btree_check_block(cur, block, 0, bp);
3672 if (error)
3673 return error;
3674#endif
3675
3676 /*
3677 * Off the right end or left end, return failure.
3678 */
3679 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3680 *stat = 0;
3681 return 0;
3682 }
3683
3684 /*
3685 * Point to the record and extract its data.
3686 */
3687 *recp = xfs_btree_rec_addr(cur, ptr, block);
3688 *stat = 1;
3689 return 0;
3690}
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 1f528a2a3754..789fffdf8b2f 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -39,39 +39,19 @@ extern kmem_zone_t *xfs_btree_cur_zone;
39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) 39#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
40 40
41/* 41/*
42 * Short form header: space allocation btrees. 42 * Generic btree header.
43 */ 43 *
44typedef struct xfs_btree_sblock { 44 * This is a comination of the actual format used on disk for short and long
45 __be32 bb_magic; /* magic number for block type */ 45 * format btrees. The first three fields are shared by both format, but
46 __be16 bb_level; /* 0 is a leaf */ 46 * the pointers are different and should be used with care.
47 __be16 bb_numrecs; /* current # of data records */ 47 *
48 __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */ 48 * To get the size of the actual short or long form headers please use
49 __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */ 49 * the size macros below. Never use sizeof(xfs_btree_block).
50} xfs_btree_sblock_t;
51
52/*
53 * Long form header: bmap btrees.
54 */
55typedef struct xfs_btree_lblock {
56 __be32 bb_magic; /* magic number for block type */
57 __be16 bb_level; /* 0 is a leaf */
58 __be16 bb_numrecs; /* current # of data records */
59 __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */
60 __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */
61} xfs_btree_lblock_t;
62
63/*
64 * Combined header and structure, used by common code.
65 */ 50 */
66typedef struct xfs_btree_hdr 51struct xfs_btree_block {
67{
68 __be32 bb_magic; /* magic number for block type */ 52 __be32 bb_magic; /* magic number for block type */
69 __be16 bb_level; /* 0 is a leaf */ 53 __be16 bb_level; /* 0 is a leaf */
70 __be16 bb_numrecs; /* current # of data records */ 54 __be16 bb_numrecs; /* current # of data records */
71} xfs_btree_hdr_t;
72
73typedef struct xfs_btree_block {
74 xfs_btree_hdr_t bb_h; /* header */
75 union { 55 union {
76 struct { 56 struct {
77 __be32 bb_leftsib; 57 __be32 bb_leftsib;
@@ -82,7 +62,36 @@ typedef struct xfs_btree_block {
82 __be64 bb_rightsib; 62 __be64 bb_rightsib;
83 } l; /* long form pointers */ 63 } l; /* long form pointers */
84 } bb_u; /* rest */ 64 } bb_u; /* rest */
85} xfs_btree_block_t; 65};
66
67#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */
68#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */
69
70
71/*
72 * Generic key, ptr and record wrapper structures.
73 *
74 * These are disk format structures, and are converted where necessary
75 * by the btree specific code that needs to interpret them.
76 */
77union xfs_btree_ptr {
78 __be32 s; /* short form ptr */
79 __be64 l; /* long form ptr */
80};
81
82union xfs_btree_key {
83 xfs_bmbt_key_t bmbt;
84 xfs_bmdr_key_t bmbr; /* bmbt root block */
85 xfs_alloc_key_t alloc;
86 xfs_inobt_key_t inobt;
87};
88
89union xfs_btree_rec {
90 xfs_bmbt_rec_t bmbt;
91 xfs_bmdr_rec_t bmbr; /* bmbt root block */
92 xfs_alloc_rec_t alloc;
93 xfs_inobt_rec_t inobt;
94};
86 95
87/* 96/*
88 * For logging record fields. 97 * For logging record fields.
@@ -96,46 +105,131 @@ typedef struct xfs_btree_block {
96#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) 105#define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1)
97 106
98/* 107/*
99 * Boolean to select which form of xfs_btree_block_t.bb_u to use.
100 */
101#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP)
102
103/*
104 * Magic numbers for btree blocks. 108 * Magic numbers for btree blocks.
105 */ 109 */
106extern const __uint32_t xfs_magics[]; 110extern const __uint32_t xfs_magics[];
107 111
108/* 112/*
109 * Maximum and minimum records in a btree block. 113 * Generic stats interface
110 * Given block size, type prefix, and leaf flag (0 or 1). 114 */
111 * The divisor below is equivalent to lf ? (e1) : (e2) but that produces 115#define __XFS_BTREE_STATS_INC(type, stat) \
112 * compiler warnings. 116 XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
113 */ 117#define XFS_BTREE_STATS_INC(cur, stat) \
114#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ 118do { \
115 ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ 119 switch (cur->bc_btnum) { \
116 (((lf) * (uint)sizeof(t ## _rec_t)) + \ 120 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \
117 ((1 - (lf)) * \ 121 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
118 ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) 122 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
119#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ 123 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
120 (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) 124 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
121 125 } \
122/* 126} while (0)
123 * Record, key, and pointer address calculation macros. 127
124 * Given block size, type prefix, block pointer, and index of requested entry 128#define __XFS_BTREE_STATS_ADD(type, stat, val) \
125 * (first entry numbered 1). 129 XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
126 */ 130#define XFS_BTREE_STATS_ADD(cur, stat, val) \
127#define XFS_BTREE_REC_ADDR(t,bb,i) \ 131do { \
128 ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 132 switch (cur->bc_btnum) { \
129 ((i) - 1) * sizeof(t ## _rec_t))) 133 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
130#define XFS_BTREE_KEY_ADDR(t,bb,i) \ 134 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
131 ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 135 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
132 ((i) - 1) * sizeof(t ## _key_t))) 136 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
133#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \ 137 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
134 ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ 138 } \
135 (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) 139} while (0)
136 140
137#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ 141#define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */
138 142
143struct xfs_btree_ops {
144 /* size of the key and record structures */
145 size_t key_len;
146 size_t rec_len;
147
148 /* cursor operations */
149 struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
150 void (*update_cursor)(struct xfs_btree_cur *src,
151 struct xfs_btree_cur *dst);
152
153 /* update btree root pointer */
154 void (*set_root)(struct xfs_btree_cur *cur,
155 union xfs_btree_ptr *nptr, int level_change);
156 int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
157 int level, union xfs_btree_ptr *newroot);
158
159 /* block allocation / freeing */
160 int (*alloc_block)(struct xfs_btree_cur *cur,
161 union xfs_btree_ptr *start_bno,
162 union xfs_btree_ptr *new_bno,
163 int length, int *stat);
164 int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);
165
166 /* update last record information */
167 void (*update_lastrec)(struct xfs_btree_cur *cur,
168 struct xfs_btree_block *block,
169 union xfs_btree_rec *rec,
170 int ptr, int reason);
171
172 /* records in block/level */
173 int (*get_minrecs)(struct xfs_btree_cur *cur, int level);
174 int (*get_maxrecs)(struct xfs_btree_cur *cur, int level);
175
176 /* records on disk. Matter for the root in inode case. */
177 int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level);
178
179 /* init values of btree structures */
180 void (*init_key_from_rec)(union xfs_btree_key *key,
181 union xfs_btree_rec *rec);
182 void (*init_rec_from_key)(union xfs_btree_key *key,
183 union xfs_btree_rec *rec);
184 void (*init_rec_from_cur)(struct xfs_btree_cur *cur,
185 union xfs_btree_rec *rec);
186 void (*init_ptr_from_cur)(struct xfs_btree_cur *cur,
187 union xfs_btree_ptr *ptr);
188
189 /* difference between key value and cursor value */
190 __int64_t (*key_diff)(struct xfs_btree_cur *cur,
191 union xfs_btree_key *key);
192
193#ifdef DEBUG
194 /* check that k1 is lower than k2 */
195 int (*keys_inorder)(struct xfs_btree_cur *cur,
196 union xfs_btree_key *k1,
197 union xfs_btree_key *k2);
198
199 /* check that r1 is lower than r2 */
200 int (*recs_inorder)(struct xfs_btree_cur *cur,
201 union xfs_btree_rec *r1,
202 union xfs_btree_rec *r2);
203#endif
204
205 /* btree tracing */
206#ifdef XFS_BTREE_TRACE
207 void (*trace_enter)(struct xfs_btree_cur *, const char *,
208 char *, int, int, __psunsigned_t,
209 __psunsigned_t, __psunsigned_t,
210 __psunsigned_t, __psunsigned_t,
211 __psunsigned_t, __psunsigned_t,
212 __psunsigned_t, __psunsigned_t,
213 __psunsigned_t, __psunsigned_t);
214 void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
215 __uint64_t *, __uint64_t *);
216 void (*trace_key)(struct xfs_btree_cur *,
217 union xfs_btree_key *, __uint64_t *,
218 __uint64_t *);
219 void (*trace_record)(struct xfs_btree_cur *,
220 union xfs_btree_rec *, __uint64_t *,
221 __uint64_t *, __uint64_t *);
222#endif
223};
224
225/*
226 * Reasons for the update_lastrec method to be called.
227 */
228#define LASTREC_UPDATE 0
229#define LASTREC_INSREC 1
230#define LASTREC_DELREC 2
231
232
139/* 233/*
140 * Btree cursor structure. 234 * Btree cursor structure.
141 * This collects all information needed by the btree code in one place. 235 * This collects all information needed by the btree code in one place.
@@ -144,6 +238,8 @@ typedef struct xfs_btree_cur
144{ 238{
145 struct xfs_trans *bc_tp; /* transaction we're in, if any */ 239 struct xfs_trans *bc_tp; /* transaction we're in, if any */
146 struct xfs_mount *bc_mp; /* file system mount struct */ 240 struct xfs_mount *bc_mp; /* file system mount struct */
241 const struct xfs_btree_ops *bc_ops;
242 uint bc_flags; /* btree features - below */
147 union { 243 union {
148 xfs_alloc_rec_incore_t a; 244 xfs_alloc_rec_incore_t a;
149 xfs_bmbt_irec_t b; 245 xfs_bmbt_irec_t b;
@@ -175,94 +271,40 @@ typedef struct xfs_btree_cur
175 } bc_private; /* per-btree type data */ 271 } bc_private; /* per-btree type data */
176} xfs_btree_cur_t; 272} xfs_btree_cur_t;
177 273
274/* cursor flags */
275#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
276#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
277#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
278
279
178#define XFS_BTREE_NOERROR 0 280#define XFS_BTREE_NOERROR 0
179#define XFS_BTREE_ERROR 1 281#define XFS_BTREE_ERROR 1
180 282
181/* 283/*
182 * Convert from buffer to btree block header. 284 * Convert from buffer to btree block header.
183 */ 285 */
184#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp)) 286#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
185#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp))
186#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp))
187 287
188 288
189#ifdef __KERNEL__
190
191#ifdef DEBUG
192/* 289/*
193 * Debug routine: check that block header is ok. 290 * Check that block header is ok.
194 */ 291 */
195void 292int
196xfs_btree_check_block( 293xfs_btree_check_block(
197 xfs_btree_cur_t *cur, /* btree cursor */ 294 struct xfs_btree_cur *cur, /* btree cursor */
198 xfs_btree_block_t *block, /* generic btree block pointer */ 295 struct xfs_btree_block *block, /* generic btree block pointer */
199 int level, /* level of the btree block */
200 struct xfs_buf *bp); /* buffer containing block, if any */
201
202/*
203 * Debug routine: check that keys are in the right order.
204 */
205void
206xfs_btree_check_key(
207 xfs_btnum_t btnum, /* btree identifier */
208 void *ak1, /* pointer to left (lower) key */
209 void *ak2); /* pointer to right (higher) key */
210
211/*
212 * Debug routine: check that records are in the right order.
213 */
214void
215xfs_btree_check_rec(
216 xfs_btnum_t btnum, /* btree identifier */
217 void *ar1, /* pointer to left (lower) record */
218 void *ar2); /* pointer to right (higher) record */
219#else
220#define xfs_btree_check_block(a,b,c,d)
221#define xfs_btree_check_key(a,b,c)
222#define xfs_btree_check_rec(a,b,c)
223#endif /* DEBUG */
224
225/*
226 * Checking routine: check that long form block header is ok.
227 */
228int /* error (0 or EFSCORRUPTED) */
229xfs_btree_check_lblock(
230 xfs_btree_cur_t *cur, /* btree cursor */
231 xfs_btree_lblock_t *block, /* btree long form block pointer */
232 int level, /* level of the btree block */ 296 int level, /* level of the btree block */
233 struct xfs_buf *bp); /* buffer containing block, if any */ 297 struct xfs_buf *bp); /* buffer containing block, if any */
234 298
235/* 299/*
236 * Checking routine: check that (long) pointer is ok. 300 * Check that (long) pointer is ok.
237 */ 301 */
238int /* error (0 or EFSCORRUPTED) */ 302int /* error (0 or EFSCORRUPTED) */
239xfs_btree_check_lptr( 303xfs_btree_check_lptr(
240 xfs_btree_cur_t *cur, /* btree cursor */ 304 struct xfs_btree_cur *cur, /* btree cursor */
241 xfs_dfsbno_t ptr, /* btree block disk address */ 305 xfs_dfsbno_t ptr, /* btree block disk address */
242 int level); /* btree block level */ 306 int level); /* btree block level */
243 307
244#define xfs_btree_check_lptr_disk(cur, ptr, level) \
245 xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level)
246
247/*
248 * Checking routine: check that short form block header is ok.
249 */
250int /* error (0 or EFSCORRUPTED) */
251xfs_btree_check_sblock(
252 xfs_btree_cur_t *cur, /* btree cursor */
253 xfs_btree_sblock_t *block, /* btree short form block pointer */
254 int level, /* level of the btree block */
255 struct xfs_buf *bp); /* buffer containing block */
256
257/*
258 * Checking routine: check that (short) pointer is ok.
259 */
260int /* error (0 or EFSCORRUPTED) */
261xfs_btree_check_sptr(
262 xfs_btree_cur_t *cur, /* btree cursor */
263 xfs_agblock_t ptr, /* btree block disk address */
264 int level); /* btree block level */
265
266/* 308/*
267 * Delete the btree cursor. 309 * Delete the btree cursor.
268 */ 310 */
@@ -281,15 +323,6 @@ xfs_btree_dup_cursor(
281 xfs_btree_cur_t **ncur);/* output cursor */ 323 xfs_btree_cur_t **ncur);/* output cursor */
282 324
283/* 325/*
284 * Change the cursor to point to the first record in the current block
285 * at the given level. Other levels are unaffected.
286 */
287int /* success=1, failure=0 */
288xfs_btree_firstrec(
289 xfs_btree_cur_t *cur, /* btree cursor */
290 int level); /* level to change */
291
292/*
293 * Get a buffer for the block, return it with no data read. 326 * Get a buffer for the block, return it with no data read.
294 * Long-form addressing. 327 * Long-form addressing.
295 */ 328 */
@@ -313,20 +346,6 @@ xfs_btree_get_bufs(
313 uint lock); /* lock flags for get_buf */ 346 uint lock); /* lock flags for get_buf */
314 347
315/* 348/*
316 * Allocate a new btree cursor.
317 * The cursor is either for allocation (A) or bmap (B).
318 */
319xfs_btree_cur_t * /* new btree cursor */
320xfs_btree_init_cursor(
321 struct xfs_mount *mp, /* file system mount point */
322 struct xfs_trans *tp, /* transaction pointer */
323 struct xfs_buf *agbp, /* (A only) buffer for agf structure */
324 xfs_agnumber_t agno, /* (A only) allocation group number */
325 xfs_btnum_t btnum, /* btree identifier */
326 struct xfs_inode *ip, /* (B only) inode owning the btree */
327 int whichfork); /* (B only) data/attr fork */
328
329/*
330 * Check for the cursor referring to the last block at the given level. 349 * Check for the cursor referring to the last block at the given level.
331 */ 350 */
332int /* 1=is last block, 0=not last block */ 351int /* 1=is last block, 0=not last block */
@@ -335,15 +354,6 @@ xfs_btree_islastblock(
335 int level); /* level to check */ 354 int level); /* level to check */
336 355
337/* 356/*
338 * Change the cursor to point to the last record in the current block
339 * at the given level. Other levels are unaffected.
340 */
341int /* success=1, failure=0 */
342xfs_btree_lastrec(
343 xfs_btree_cur_t *cur, /* btree cursor */
344 int level); /* level to change */
345
346/*
347 * Compute first and last byte offsets for the fields given. 357 * Compute first and last byte offsets for the fields given.
348 * Interprets the offsets table, which contains struct field offsets. 358 * Interprets the offsets table, which contains struct field offsets.
349 */ 359 */
@@ -404,39 +414,53 @@ xfs_btree_reada_bufs(
404 xfs_extlen_t count); /* count of filesystem blocks */ 414 xfs_extlen_t count); /* count of filesystem blocks */
405 415
406/* 416/*
407 * Read-ahead btree blocks, at the given level. 417 * Set the buffer for level "lev" in the cursor to bp, releasing
408 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 418 * any previous buffer.
409 */ 419 */
410int /* readahead block count */ 420void
411xfs_btree_readahead_core( 421xfs_btree_setbuf(
412 xfs_btree_cur_t *cur, /* btree cursor */ 422 xfs_btree_cur_t *cur, /* btree cursor */
413 int lev, /* level in btree */ 423 int lev, /* level in btree */
414 int lr); /* left/right bits */ 424 struct xfs_buf *bp); /* new buffer to set */
415 425
416static inline int /* readahead block count */
417xfs_btree_readahead(
418 xfs_btree_cur_t *cur, /* btree cursor */
419 int lev, /* level in btree */
420 int lr) /* left/right bits */
421{
422 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
423 return 0;
424 426
425 return xfs_btree_readahead_core(cur, lev, lr); 427/*
426} 428 * Common btree core entry points.
429 */
430int xfs_btree_increment(struct xfs_btree_cur *, int, int *);
431int xfs_btree_decrement(struct xfs_btree_cur *, int, int *);
432int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *);
433int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *);
434int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
435int xfs_btree_kill_iroot(struct xfs_btree_cur *);
436int xfs_btree_insert(struct xfs_btree_cur *, int *);
437int xfs_btree_delete(struct xfs_btree_cur *, int *);
438int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
427 439
440/*
441 * Internal btree helpers also used by xfs_bmap.c.
442 */
443void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
444void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int);
428 445
429/* 446/*
430 * Set the buffer for level "lev" in the cursor to bp, releasing 447 * Helpers.
431 * any previous buffer.
432 */ 448 */
433void 449static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block)
434xfs_btree_setbuf( 450{
435 xfs_btree_cur_t *cur, /* btree cursor */ 451 return be16_to_cpu(block->bb_numrecs);
436 int lev, /* level in btree */ 452}
437 struct xfs_buf *bp); /* new buffer to set */ 453
454static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block,
455 __uint16_t numrecs)
456{
457 block->bb_numrecs = cpu_to_be16(numrecs);
458}
438 459
439#endif /* __KERNEL__ */ 460static inline int xfs_btree_get_level(struct xfs_btree_block *block)
461{
462 return be16_to_cpu(block->bb_level);
463}
440 464
441 465
442/* 466/*
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
new file mode 100644
index 000000000000..44ff942a0fda
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.c
@@ -0,0 +1,249 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_types.h"
20#include "xfs_inum.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_alloc_btree.h"
23#include "xfs_ialloc_btree.h"
24#include "xfs_inode.h"
25#include "xfs_btree.h"
26#include "xfs_btree_trace.h"
27
28STATIC void
29xfs_btree_trace_ptr(
30 struct xfs_btree_cur *cur,
31 union xfs_btree_ptr ptr,
32 __psunsigned_t *high,
33 __psunsigned_t *low)
34{
35 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
36 __u64 val = be64_to_cpu(ptr.l);
37 *high = val >> 32;
38 *low = (int)val;
39 } else {
40 *high = 0;
41 *low = be32_to_cpu(ptr.s);
42 }
43}
44
45/*
46 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
47 */
48void
49xfs_btree_trace_argbi(
50 const char *func,
51 struct xfs_btree_cur *cur,
52 struct xfs_buf *b,
53 int i,
54 int line)
55{
56 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
57 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
58 0, 0, 0, 0);
59}
60
61/*
62 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
63 */
64void
65xfs_btree_trace_argbii(
66 const char *func,
67 struct xfs_btree_cur *cur,
68 struct xfs_buf *b,
69 int i0,
70 int i1,
71 int line)
72{
73 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
74 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
75 0, 0, 0, 0);
76}
77
78/*
79 * Add a trace buffer entry for arguments, for 3 block-length args
80 * and an integer arg.
81 */
82void
83xfs_btree_trace_argfffi(
84 const char *func,
85 struct xfs_btree_cur *cur,
86 xfs_dfiloff_t o,
87 xfs_dfsbno_t b,
88 xfs_dfilblks_t i,
89 int j,
90 int line)
91{
92 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
93 line,
94 o >> 32, (int)o,
95 b >> 32, (int)b,
96 i >> 32, (int)i,
97 (int)j, 0, 0, 0, 0);
98}
99
100/*
101 * Add a trace buffer entry for arguments, for one integer arg.
102 */
103void
104xfs_btree_trace_argi(
105 const char *func,
106 struct xfs_btree_cur *cur,
107 int i,
108 int line)
109{
110 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
111 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
112}
113
114/*
115 * Add a trace buffer entry for arguments, for int, fsblock, key.
116 */
117void
118xfs_btree_trace_argipk(
119 const char *func,
120 struct xfs_btree_cur *cur,
121 int i,
122 union xfs_btree_ptr ptr,
123 union xfs_btree_key *key,
124 int line)
125{
126 __psunsigned_t high, low;
127 __uint64_t l0, l1;
128
129 xfs_btree_trace_ptr(cur, ptr, &high, &low);
130 cur->bc_ops->trace_key(cur, key, &l0, &l1);
131 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
132 line, i, high, low,
133 l0 >> 32, (int)l0,
134 l1 >> 32, (int)l1,
135 0, 0, 0, 0);
136}
137
138/*
139 * Add a trace buffer entry for arguments, for int, fsblock, rec.
140 */
141void
142xfs_btree_trace_argipr(
143 const char *func,
144 struct xfs_btree_cur *cur,
145 int i,
146 union xfs_btree_ptr ptr,
147 union xfs_btree_rec *rec,
148 int line)
149{
150 __psunsigned_t high, low;
151 __uint64_t l0, l1, l2;
152
153 xfs_btree_trace_ptr(cur, ptr, &high, &low);
154 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
155 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
156 line, i,
157 high, low,
158 l0 >> 32, (int)l0,
159 l1 >> 32, (int)l1,
160 l2 >> 32, (int)l2,
161 0, 0);
162}
163
164/*
165 * Add a trace buffer entry for arguments, for int, key.
166 */
167void
168xfs_btree_trace_argik(
169 const char *func,
170 struct xfs_btree_cur *cur,
171 int i,
172 union xfs_btree_key *key,
173 int line)
174{
175 __uint64_t l0, l1;
176
177 cur->bc_ops->trace_key(cur, key, &l0, &l1);
178 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
179 line, i,
180 l0 >> 32, (int)l0,
181 l1 >> 32, (int)l1,
182 0, 0, 0, 0, 0, 0);
183}
184
185/*
186 * Add a trace buffer entry for arguments, for record.
187 */
188void
189xfs_btree_trace_argr(
190 const char *func,
191 struct xfs_btree_cur *cur,
192 union xfs_btree_rec *rec,
193 int line)
194{
195 __uint64_t l0, l1, l2;
196
197 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
198 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
199 line,
200 l0 >> 32, (int)l0,
201 l1 >> 32, (int)l1,
202 l2 >> 32, (int)l2,
203 0, 0, 0, 0, 0);
204}
205
206/*
207 * Add a trace buffer entry for the cursor/operation.
208 */
209void
210xfs_btree_trace_cursor(
211 const char *func,
212 struct xfs_btree_cur *cur,
213 int type,
214 int line)
215{
216 __uint32_t s0;
217 __uint64_t l0, l1;
218 char *s;
219
220 switch (type) {
221 case XBT_ARGS:
222 s = "args";
223 break;
224 case XBT_ENTRY:
225 s = "entry";
226 break;
227 case XBT_ERROR:
228 s = "error";
229 break;
230 case XBT_EXIT:
231 s = "exit";
232 break;
233 default:
234 s = "unknown";
235 break;
236 }
237
238 cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
239 cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
240 s0,
241 l0 >> 32, (int)l0,
242 l1 >> 32, (int)l1,
243 (__psunsigned_t)cur->bc_bufs[0],
244 (__psunsigned_t)cur->bc_bufs[1],
245 (__psunsigned_t)cur->bc_bufs[2],
246 (__psunsigned_t)cur->bc_bufs[3],
247 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
248 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
249}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
new file mode 100644
index 000000000000..b3f5eb3c3c6c
--- /dev/null
+++ b/fs/xfs/xfs_btree_trace.h
@@ -0,0 +1,116 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_BTREE_TRACE_H__
19#define __XFS_BTREE_TRACE_H__
20
21struct xfs_btree_cur;
22struct xfs_buf;
23
24
25/*
26 * Trace hooks.
27 * i,j = integer (32 bit)
28 * b = btree block buffer (xfs_buf_t)
29 * p = btree ptr
30 * r = btree record
31 * k = btree key
32 */
33
34#ifdef XFS_BTREE_TRACE
35
36/*
37 * Trace buffer entry types.
38 */
39#define XFS_BTREE_KTRACE_ARGBI 1
40#define XFS_BTREE_KTRACE_ARGBII 2
41#define XFS_BTREE_KTRACE_ARGFFFI 3
42#define XFS_BTREE_KTRACE_ARGI 4
43#define XFS_BTREE_KTRACE_ARGIPK 5
44#define XFS_BTREE_KTRACE_ARGIPR 6
45#define XFS_BTREE_KTRACE_ARGIK 7
46#define XFS_BTREE_KTRACE_ARGR 8
47#define XFS_BTREE_KTRACE_CUR 9
48
49/*
50 * Sub-types for cursor traces.
51 */
52#define XBT_ARGS 0
53#define XBT_ENTRY 1
54#define XBT_ERROR 2
55#define XBT_EXIT 3
56
57void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *,
62 xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int);
63void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
64void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_key *, int);
66void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
67 union xfs_btree_ptr, union xfs_btree_rec *, int);
68void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
69 union xfs_btree_key *, int);
70void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
71 union xfs_btree_rec *, int);
72void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
73
74
75#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */
76extern ktrace_t *xfs_allocbt_trace_buf;
77
78#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */
79extern ktrace_t *xfs_inobt_trace_buf;
80
81#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */
82#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */
83extern ktrace_t *xfs_bmbt_trace_buf;
84
85
86#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
87 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
88#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
89 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
90#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \
91 xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__)
92#define XFS_BTREE_TRACE_ARGI(c, i) \
93 xfs_btree_trace_argi(__func__, c, i, __LINE__)
94#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
95 xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
96#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
97 xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
98#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
99 xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
100#define XFS_BTREE_TRACE_ARGR(c, r) \
101 xfs_btree_trace_argr(__func__, c, r, __LINE__)
102#define XFS_BTREE_TRACE_CURSOR(c, t) \
103 xfs_btree_trace_cursor(__func__, c, t, __LINE__)
104#else
105#define XFS_BTREE_TRACE_ARGBI(c, b, i)
106#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
107#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j)
108#define XFS_BTREE_TRACE_ARGI(c, i)
109#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
110#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
111#define XFS_BTREE_TRACE_ARGIK(c, i, k)
112#define XFS_BTREE_TRACE_ARGR(c, r)
113#define XFS_BTREE_TRACE_CURSOR(c, t)
114#endif /* XFS_BTREE_TRACE */
115
116#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 002fc2617c8e..92af4098c7e8 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -375,7 +375,7 @@ xfs_buf_item_unpin(
375 xfs_buf_log_item_t *bip, 375 xfs_buf_log_item_t *bip,
376 int stale) 376 int stale)
377{ 377{
378 xfs_mount_t *mp; 378 struct xfs_ail *ailp;
379 xfs_buf_t *bp; 379 xfs_buf_t *bp;
380 int freed; 380 int freed;
381 381
@@ -387,7 +387,7 @@ xfs_buf_item_unpin(
387 xfs_buftrace("XFS_UNPIN", bp); 387 xfs_buftrace("XFS_UNPIN", bp);
388 388
389 freed = atomic_dec_and_test(&bip->bli_refcount); 389 freed = atomic_dec_and_test(&bip->bli_refcount);
390 mp = bip->bli_item.li_mountp; 390 ailp = bip->bli_item.li_ailp;
391 xfs_bunpin(bp); 391 xfs_bunpin(bp);
392 if (freed && stale) { 392 if (freed && stale) {
393 ASSERT(bip->bli_flags & XFS_BLI_STALE); 393 ASSERT(bip->bli_flags & XFS_BLI_STALE);
@@ -399,17 +399,17 @@ xfs_buf_item_unpin(
399 xfs_buftrace("XFS_UNPIN STALE", bp); 399 xfs_buftrace("XFS_UNPIN STALE", bp);
400 /* 400 /*
401 * If we get called here because of an IO error, we may 401 * If we get called here because of an IO error, we may
402 * or may not have the item on the AIL. xfs_trans_delete_ail() 402 * or may not have the item on the AIL. xfs_trans_ail_delete()
403 * will take care of that situation. 403 * will take care of that situation.
404 * xfs_trans_delete_ail() drops the AIL lock. 404 * xfs_trans_ail_delete() drops the AIL lock.
405 */ 405 */
406 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 406 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); 407 xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
408 XFS_BUF_SET_FSPRIVATE(bp, NULL); 408 XFS_BUF_SET_FSPRIVATE(bp, NULL);
409 XFS_BUF_CLR_IODONE_FUNC(bp); 409 XFS_BUF_CLR_IODONE_FUNC(bp);
410 } else { 410 } else {
411 spin_lock(&mp->m_ail_lock); 411 spin_lock(&ailp->xa_lock);
412 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 412 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
413 xfs_buf_item_relse(bp); 413 xfs_buf_item_relse(bp);
414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); 414 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
415 } 415 }
@@ -707,8 +707,8 @@ xfs_buf_item_init(
707 * the first. If we do already have one, there is 707 * the first. If we do already have one, there is
708 * nothing to do here so return. 708 * nothing to do here so return.
709 */ 709 */
710 if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) 710 if (bp->b_mount != mp)
711 XFS_BUF_SET_FSPRIVATE3(bp, mp); 711 bp->b_mount = mp;
712 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); 712 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
713 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 713 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
714 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 714 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
@@ -731,6 +731,7 @@ xfs_buf_item_init(
731 bip->bli_item.li_type = XFS_LI_BUF; 731 bip->bli_item.li_type = XFS_LI_BUF;
732 bip->bli_item.li_ops = &xfs_buf_item_ops; 732 bip->bli_item.li_ops = &xfs_buf_item_ops;
733 bip->bli_item.li_mountp = mp; 733 bip->bli_item.li_mountp = mp;
734 bip->bli_item.li_ailp = mp->m_ail;
734 bip->bli_buf = bp; 735 bip->bli_buf = bp;
735 xfs_buf_hold(bp); 736 xfs_buf_hold(bp);
736 bip->bli_format.blf_type = XFS_LI_BUF; 737 bip->bli_format.blf_type = XFS_LI_BUF;
@@ -997,21 +998,7 @@ xfs_buf_iodone_callbacks(
997 xfs_buf_do_callbacks(bp, lip); 998 xfs_buf_do_callbacks(bp, lip);
998 XFS_BUF_SET_FSPRIVATE(bp, NULL); 999 XFS_BUF_SET_FSPRIVATE(bp, NULL);
999 XFS_BUF_CLR_IODONE_FUNC(bp); 1000 XFS_BUF_CLR_IODONE_FUNC(bp);
1000 1001 xfs_biodone(bp);
1001 /*
1002 * XFS_SHUT flag gets set when we go thru the
1003 * entire buffer cache and deliberately start
1004 * throwing away delayed write buffers.
1005 * Since there's no biowait done on those,
1006 * we should just brelse them.
1007 */
1008 if (XFS_BUF_ISSHUT(bp)) {
1009 XFS_BUF_UNSHUT(bp);
1010 xfs_buf_relse(bp);
1011 } else {
1012 xfs_biodone(bp);
1013 }
1014
1015 return; 1002 return;
1016 } 1003 }
1017 1004
@@ -1122,27 +1109,23 @@ xfs_buf_iodone(
1122 xfs_buf_t *bp, 1109 xfs_buf_t *bp,
1123 xfs_buf_log_item_t *bip) 1110 xfs_buf_log_item_t *bip)
1124{ 1111{
1125 struct xfs_mount *mp; 1112 struct xfs_ail *ailp = bip->bli_item.li_ailp;
1126 1113
1127 ASSERT(bip->bli_buf == bp); 1114 ASSERT(bip->bli_buf == bp);
1128 1115
1129 xfs_buf_rele(bp); 1116 xfs_buf_rele(bp);
1130 mp = bip->bli_item.li_mountp;
1131 1117
1132 /* 1118 /*
1133 * If we are forcibly shutting down, this may well be 1119 * If we are forcibly shutting down, this may well be
1134 * off the AIL already. That's because we simulate the 1120 * off the AIL already. That's because we simulate the
1135 * log-committed callbacks to unpin these buffers. Or we may never 1121 * log-committed callbacks to unpin these buffers. Or we may never
1136 * have put this item on AIL because of the transaction was 1122 * have put this item on AIL because of the transaction was
1137 * aborted forcibly. xfs_trans_delete_ail() takes care of these. 1123 * aborted forcibly. xfs_trans_ail_delete() takes care of these.
1138 * 1124 *
1139 * Either way, AIL is useless if we're forcing a shutdown. 1125 * Either way, AIL is useless if we're forcing a shutdown.
1140 */ 1126 */
1141 spin_lock(&mp->m_ail_lock); 1127 spin_lock(&ailp->xa_lock);
1142 /* 1128 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
1143 * xfs_trans_delete_ail() drops the AIL lock.
1144 */
1145 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1146 xfs_buf_item_free(bip); 1129 xfs_buf_item_free(bip);
1147} 1130}
1148 1131
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
deleted file mode 100644
index d2ce5dd70d87..000000000000
--- a/fs/xfs/xfs_clnt.h
+++ /dev/null
@@ -1,105 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_CLNT_H__
19#define __XFS_CLNT_H__
20
21/*
22 * XFS arguments structure, constructed from the arguments we
23 * are passed via the mount system call.
24 *
25 * NOTE: The mount system call is handled differently between
26 * Linux and IRIX. In IRIX we worked work with a binary data
27 * structure coming in across the syscall interface from user
28 * space (the mount userspace knows about each filesystem type
29 * and the set of valid options for it, and converts the users
30 * argument string into a binary structure _before_ making the
31 * system call), and the ABI issues that this implies.
32 *
33 * In Linux, we are passed a comma separated set of options;
34 * ie. a NULL terminated string of characters. Userspace mount
35 * code does not have any knowledge of mount options expected by
36 * each filesystem type and so each filesystem parses its mount
37 * options in kernel space.
38 *
39 * For the Linux port, we kept this structure pretty much intact
40 * and use it internally (because the existing code groks it).
41 */
42struct xfs_mount_args {
43 int flags; /* flags -> see XFSMNT_... macros below */
44 int flags2; /* flags -> see XFSMNT2_... macros below */
45 int logbufs; /* Number of log buffers, -1 to default */
46 int logbufsize; /* Size of log buffers, -1 to default */
47 char fsname[MAXNAMELEN+1]; /* data device name */
48 char rtname[MAXNAMELEN+1]; /* realtime device filename */
49 char logname[MAXNAMELEN+1]; /* journal device filename */
50 char mtpt[MAXNAMELEN+1]; /* filesystem mount point */
51 int sunit; /* stripe unit (BBs) */
52 int swidth; /* stripe width (BBs), multiple of sunit */
53 uchar_t iosizelog; /* log2 of the preferred I/O size */
54 int ihashsize; /* inode hash table size (buckets) */
55};
56
57/*
58 * XFS mount option flags -- args->flags1
59 */
60#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */
61#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount
62 * compatible */
63#define XFSMNT_INO64 0x00000004 /* move inode numbers up
64 * past 2^32 */
65#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */
66#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */
67#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit
68 * enforcement */
69#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit
70 * enforcement */
71#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */
72#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at
73 * stripe boundaries*/
74#define XFSMNT_RETERR 0x00000400 /* return error to user */
75#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies
76 * read-only mount */
77#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
83 * bits of address space */
84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
85#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit
86 * enforcement */
87#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
88#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
89#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
90#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */
91#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
92 * allocation */
93#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename
94 * symlink,mkdir,rmdir,mknod */
95#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */
96
97/*
98 * XFS mount option flags -- args->flags2
99 */
100#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred
101 * I/O size in stat(2) */
102#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams
103 * allocator */
104
105#endif /* __XFS_CLNT_H__ */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 8be0b00ede9a..70b710c1792d 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -72,27 +72,7 @@ typedef struct xfs_da_intnode {
72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; 72typedef struct xfs_da_node_hdr xfs_da_node_hdr_t;
73typedef struct xfs_da_node_entry xfs_da_node_entry_t; 73typedef struct xfs_da_node_entry xfs_da_node_entry_t;
74 74
75#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */
76
77#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize 75#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize
78#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog
79
80#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \
81 (((bno) << (mp)->m_dircook_elog) | (entry))
82#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \
83 (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash))
84#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie)
85#define XFS_DA_COOKIE_BNO(mp,cookie) \
86 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
87 (xfs_dablk_t)0 : \
88 (xfs_dablk_t)((xfs_off_t)(cookie) >> \
89 ((mp)->m_dircook_elog + 32))))
90#define XFS_DA_COOKIE_ENTRY(mp,cookie) \
91 ((((xfs_off_t)(cookie) >> 31) == -1LL ? \
92 (xfs_dablk_t)0 : \
93 (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \
94 ((1 << (mp)->m_dircook_elog) - 1))))
95
96 76
97/*======================================================================== 77/*========================================================================
98 * Btree searching and modification structure definitions. 78 * Btree searching and modification structure definitions.
@@ -226,9 +206,8 @@ struct xfs_nameops {
226}; 206};
227 207
228 208
229#ifdef __KERNEL__
230/*======================================================================== 209/*========================================================================
231 * Function prototypes for the kernel. 210 * Function prototypes.
232 *========================================================================*/ 211 *========================================================================*/
233 212
234/* 213/*
@@ -289,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
289 268
290extern struct kmem_zone *xfs_da_state_zone; 269extern struct kmem_zone *xfs_da_state_zone;
291extern struct kmem_zone *xfs_dabuf_zone; 270extern struct kmem_zone *xfs_dabuf_zone;
292#endif /* __KERNEL__ */
293 271
294#endif /* __XFS_DA_BTREE_H__ */ 272#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 75b0cd4da0ea..b4c1ee713492 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -49,9 +49,8 @@
49 */ 49 */
50int 50int
51xfs_swapext( 51xfs_swapext(
52 xfs_swapext_t __user *sxu) 52 xfs_swapext_t *sxp)
53{ 53{
54 xfs_swapext_t *sxp;
55 xfs_inode_t *ip, *tip; 54 xfs_inode_t *ip, *tip;
56 struct file *file, *target_file; 55 struct file *file, *target_file;
57 int error = 0; 56 int error = 0;
@@ -62,11 +61,6 @@ xfs_swapext(
62 goto out; 61 goto out;
63 } 62 }
64 63
65 if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) {
66 error = XFS_ERROR(EFAULT);
67 goto out_free_sxp;
68 }
69
70 /* Pull information for the target fd */ 64 /* Pull information for the target fd */
71 file = fget((int)sxp->sx_fdtarget); 65 file = fget((int)sxp->sx_fdtarget);
72 if (!file) { 66 if (!file) {
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
index da178205be68..4f55a6306558 100644
--- a/fs/xfs/xfs_dfrag.h
+++ b/fs/xfs/xfs_dfrag.h
@@ -46,7 +46,7 @@ typedef struct xfs_swapext
46/* 46/*
47 * Syscall interface for xfs_swapext 47 * Syscall interface for xfs_swapext
48 */ 48 */
49int xfs_swapext(struct xfs_swapext __user *sx); 49int xfs_swapext(struct xfs_swapext *sx);
50 50
51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, 51int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
52 struct xfs_swapext *sxp); 52 struct xfs_swapext *sxp);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index c9065eaf2a4d..162e8726df5e 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -18,32 +18,29 @@
18#ifndef __XFS_DINODE_H__ 18#ifndef __XFS_DINODE_H__
19#define __XFS_DINODE_H__ 19#define __XFS_DINODE_H__
20 20
21struct xfs_buf; 21#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
22struct xfs_mount; 22#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2))
23 23
24#define XFS_DINODE_VERSION_1 1
25#define XFS_DINODE_VERSION_2 2
26#define XFS_DINODE_GOOD_VERSION(v) \
27 (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2))
28#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
29
30/*
31 * Disk inode structure.
32 * This is just the header; the inode is expanded to fill a variable size
33 * with the last field expanding. It is split into the core and "other"
34 * because we only need the core part in the in-core inode.
35 */
36typedef struct xfs_timestamp { 24typedef struct xfs_timestamp {
37 __be32 t_sec; /* timestamp seconds */ 25 __be32 t_sec; /* timestamp seconds */
38 __be32 t_nsec; /* timestamp nanoseconds */ 26 __be32 t_nsec; /* timestamp nanoseconds */
39} xfs_timestamp_t; 27} xfs_timestamp_t;
40 28
41/* 29/*
42 * Note: Coordinate changes to this structure with the XFS_DI_* #defines 30 * On-disk inode structure.
43 * below, the offsets table in xfs_ialloc_log_di() and struct xfs_icdinode 31 *
44 * in xfs_inode.h. 32 * This is just the header or "dinode core", the inode is expanded to fill a
33 * variable size the leftover area split into a data and an attribute fork.
34 * The format of the data and attribute fork depends on the format of the
35 * inode as indicated by di_format and di_aformat. To access the data and
36 * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros
37 * below.
38 *
39 * There is a very similar struct icdinode in xfs_inode which matches the
40 * layout of the first 96 bytes of this structure, but is kept in native
41 * format instead of big endian.
45 */ 42 */
46typedef struct xfs_dinode_core { 43typedef struct xfs_dinode {
47 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 44 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
48 __be16 di_mode; /* mode and type of file */ 45 __be16 di_mode; /* mode and type of file */
49 __u8 di_version; /* inode version */ 46 __u8 di_version; /* inode version */
@@ -69,34 +66,12 @@ typedef struct xfs_dinode_core {
69 __be16 di_dmstate; /* DMIG state info */ 66 __be16 di_dmstate; /* DMIG state info */
70 __be16 di_flags; /* random flags, XFS_DIFLAG_... */ 67 __be16 di_flags; /* random flags, XFS_DIFLAG_... */
71 __be32 di_gen; /* generation number */ 68 __be32 di_gen; /* generation number */
72} xfs_dinode_core_t;
73 69
74#define DI_MAX_FLUSH 0xffff 70 /* di_next_unlinked is the only non-core field in the old dinode */
71 __be32 di_next_unlinked;/* agi unlinked list ptr */
72} __attribute__((packed)) xfs_dinode_t;
75 73
76typedef struct xfs_dinode 74#define DI_MAX_FLUSH 0xffff
77{
78 xfs_dinode_core_t di_core;
79 /*
80 * In adding anything between the core and the union, be
81 * sure to update the macros like XFS_LITINO below and
82 * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h.
83 */
84 __be32 di_next_unlinked;/* agi unlinked list ptr */
85 union {
86 xfs_bmdr_block_t di_bmbt; /* btree root block */
87 xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */
88 xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */
89 char di_c[1]; /* local contents */
90 __be32 di_dev; /* device for S_IFCHR/S_IFBLK */
91 uuid_t di_muuid; /* mount point value */
92 char di_symlink[1]; /* local symbolic link */
93 } di_u;
94 union {
95 xfs_bmdr_block_t di_abmbt; /* btree root block */
96 xfs_bmbt_rec_32_t di_abmx[1]; /* extent list */
97 xfs_attr_shortform_t di_attrsf; /* shortform attribute list */
98 } di_a;
99} xfs_dinode_t;
100 75
101/* 76/*
102 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. 77 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
@@ -107,50 +82,14 @@ typedef struct xfs_dinode
107#define XFS_MAXLINK_1 65535U 82#define XFS_MAXLINK_1 65535U
108 83
109/* 84/*
110 * Bit names for logging disk inodes only
111 */
112#define XFS_DI_MAGIC 0x0000001
113#define XFS_DI_MODE 0x0000002
114#define XFS_DI_VERSION 0x0000004
115#define XFS_DI_FORMAT 0x0000008
116#define XFS_DI_ONLINK 0x0000010
117#define XFS_DI_UID 0x0000020
118#define XFS_DI_GID 0x0000040
119#define XFS_DI_NLINK 0x0000080
120#define XFS_DI_PROJID 0x0000100
121#define XFS_DI_PAD 0x0000200
122#define XFS_DI_ATIME 0x0000400
123#define XFS_DI_MTIME 0x0000800
124#define XFS_DI_CTIME 0x0001000
125#define XFS_DI_SIZE 0x0002000
126#define XFS_DI_NBLOCKS 0x0004000
127#define XFS_DI_EXTSIZE 0x0008000
128#define XFS_DI_NEXTENTS 0x0010000
129#define XFS_DI_NAEXTENTS 0x0020000
130#define XFS_DI_FORKOFF 0x0040000
131#define XFS_DI_AFORMAT 0x0080000
132#define XFS_DI_DMEVMASK 0x0100000
133#define XFS_DI_DMSTATE 0x0200000
134#define XFS_DI_FLAGS 0x0400000
135#define XFS_DI_GEN 0x0800000
136#define XFS_DI_NEXT_UNLINKED 0x1000000
137#define XFS_DI_U 0x2000000
138#define XFS_DI_A 0x4000000
139#define XFS_DI_NUM_BITS 27
140#define XFS_DI_ALL_BITS ((1 << XFS_DI_NUM_BITS) - 1)
141#define XFS_DI_CORE_BITS (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A))
142
143/*
144 * Values for di_format 85 * Values for di_format
145 */ 86 */
146typedef enum xfs_dinode_fmt 87typedef enum xfs_dinode_fmt {
147{ 88 XFS_DINODE_FMT_DEV, /* xfs_dev_t */
148 XFS_DINODE_FMT_DEV, /* CHR, BLK: di_dev */ 89 XFS_DINODE_FMT_LOCAL, /* bulk data */
149 XFS_DINODE_FMT_LOCAL, /* DIR, REG: di_c */ 90 XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
150 /* LNK: di_symlink */ 91 XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
151 XFS_DINODE_FMT_EXTENTS, /* DIR, REG, LNK: di_bmx */ 92 XFS_DINODE_FMT_UUID /* uuid_t */
152 XFS_DINODE_FMT_BTREE, /* DIR, REG, LNK: di_bmbt */
153 XFS_DINODE_FMT_UUID /* MNT: di_uuid */
154} xfs_dinode_fmt_t; 93} xfs_dinode_fmt_t;
155 94
156/* 95/*
@@ -166,13 +105,13 @@ typedef enum xfs_dinode_fmt
166 */ 105 */
167#define XFS_LITINO(mp) ((mp)->m_litino) 106#define XFS_LITINO(mp) ((mp)->m_litino)
168#define XFS_BROOT_SIZE_ADJ \ 107#define XFS_BROOT_SIZE_ADJ \
169 (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) 108 (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
170 109
171/* 110/*
172 * Inode data & attribute fork sizes, per inode. 111 * Inode data & attribute fork sizes, per inode.
173 */ 112 */
174#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) 113#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0)
175#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) 114#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3))
176 115
177#define XFS_DFORK_DSIZE(dip,mp) \ 116#define XFS_DFORK_DSIZE(dip,mp) \
178 (XFS_DFORK_Q(dip) ? \ 117 (XFS_DFORK_Q(dip) ? \
@@ -187,23 +126,42 @@ typedef enum xfs_dinode_fmt
187 XFS_DFORK_DSIZE(dip, mp) : \ 126 XFS_DFORK_DSIZE(dip, mp) : \
188 XFS_DFORK_ASIZE(dip, mp)) 127 XFS_DFORK_ASIZE(dip, mp))
189 128
190#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) 129/*
130 * Return pointers to the data or attribute forks.
131 */
132#define XFS_DFORK_DPTR(dip) \
133 ((char *)(dip) + sizeof(struct xfs_dinode))
191#define XFS_DFORK_APTR(dip) \ 134#define XFS_DFORK_APTR(dip) \
192 ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) 135 (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip))
193#define XFS_DFORK_PTR(dip,w) \ 136#define XFS_DFORK_PTR(dip,w) \
194 ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) 137 ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip))
138
195#define XFS_DFORK_FORMAT(dip,w) \ 139#define XFS_DFORK_FORMAT(dip,w) \
196 ((w) == XFS_DATA_FORK ? \ 140 ((w) == XFS_DATA_FORK ? \
197 (dip)->di_core.di_format : \ 141 (dip)->di_format : \
198 (dip)->di_core.di_aformat) 142 (dip)->di_aformat)
199#define XFS_DFORK_NEXTENTS(dip,w) \ 143#define XFS_DFORK_NEXTENTS(dip,w) \
200 ((w) == XFS_DATA_FORK ? \ 144 ((w) == XFS_DATA_FORK ? \
201 be32_to_cpu((dip)->di_core.di_nextents) : \ 145 be32_to_cpu((dip)->di_nextents) : \
202 be16_to_cpu((dip)->di_core.di_anextents)) 146 be16_to_cpu((dip)->di_anextents))
203 147
204#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) 148#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
205 149
206/* 150/*
151 * For block and character special files the 32bit dev_t is stored at the
152 * beginning of the data fork.
153 */
154static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip)
155{
156 return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip));
157}
158
159static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
160{
161 *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev);
162}
163
164/*
207 * Values for di_flags 165 * Values for di_flags
208 * There should be a one-to-one correspondence between these flags and the 166 * There should be a one-to-one correspondence between these flags and the
209 * XFS_XFLAG_s. 167 * XFS_XFLAG_s.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index deecc9d238f8..6ac44b550d39 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -34,13 +34,6 @@ struct xfs_mount;
34struct xfs_trans; 34struct xfs_trans;
35 35
36/* 36/*
37 * Maximum size of a shortform directory.
38 */
39#define XFS_DIR2_SF_MAX_SIZE \
40 (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \
41 (uint)sizeof(xfs_agino_t))
42
43/*
44 * Inode number stored as 8 8-bit values. 37 * Inode number stored as 8 8-bit values.
45 */ 38 */
46typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; 39typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index a1e55fb9d5dd..e71e2581c0c3 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -25,7 +25,6 @@
25#include "xfs_inum.h" 25#include "xfs_inum.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_mount.h" 27#include "xfs_mount.h"
28#include "xfs_clnt.h"
29 28
30 29
31static struct xfs_dmops xfs_dmcore_stub = { 30static struct xfs_dmops xfs_dmcore_stub = {
@@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = {
38}; 37};
39 38
40int 39int
41xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 40xfs_dmops_get(struct xfs_mount *mp)
42{ 41{
43 if (args->flags & XFSMNT_DMAPI) { 42 if (mp->m_flags & XFS_MOUNT_DMAPI) {
44 cmn_err(CE_WARN, 43 cmn_err(CE_WARN,
45 "XFS: dmapi support not available in this kernel."); 44 "XFS: dmapi support not available in this kernel.");
46 return EINVAL; 45 return EINVAL;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index f227ecd1a294..92d5cd5bf4f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -153,21 +153,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
153} 153}
154#endif /* DEBUG */ 154#endif /* DEBUG */
155 155
156static void
157xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
158{
159 if (mp != NULL) {
160 char *newfmt;
161 int len = 16 + mp->m_fsname_len + strlen(fmt);
162
163 newfmt = kmem_alloc(len, KM_SLEEP);
164 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
165 icmn_err(level, newfmt, ap);
166 kmem_free(newfmt);
167 } else {
168 icmn_err(level, fmt, ap);
169 }
170}
171 156
172void 157void
173xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) 158xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...)
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 11543f10b0c6..0c93051c4651 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -159,11 +159,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
160 160
161struct xfs_mount; 161struct xfs_mount;
162/* PRINTFLIKE4 */ 162
163extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp,
164 char *fmt, va_list ap)
165 __attribute__ ((format (printf, 3, 0)));
163extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, 166extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp,
164 char *fmt, ...); 167 char *fmt, ...)
165/* PRINTFLIKE3 */ 168 __attribute__ ((format (printf, 4, 5)));
166extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); 169extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...)
170 __attribute__ ((format (printf, 3, 4)));
167 171
168extern void xfs_hex_dump(void *p, int length); 172extern void xfs_hex_dump(void *p, int length);
169 173
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 8aa28f751b2a..05a4bdd4be39 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -108,19 +108,16 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
108STATIC void 108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) 109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
110{ 110{
111 xfs_mount_t *mp; 111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112 112
113 mp = efip->efi_item.li_mountp; 113 spin_lock(&ailp->xa_lock);
114 spin_lock(&mp->m_ail_lock);
115 if (efip->efi_flags & XFS_EFI_CANCELED) { 114 if (efip->efi_flags & XFS_EFI_CANCELED) {
116 /* 115 /* xfs_trans_ail_delete() drops the AIL lock. */
117 * xfs_trans_delete_ail() drops the AIL lock. 116 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
118 */
119 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
120 xfs_efi_item_free(efip); 117 xfs_efi_item_free(efip);
121 } else { 118 } else {
122 efip->efi_flags |= XFS_EFI_COMMITTED; 119 efip->efi_flags |= XFS_EFI_COMMITTED;
123 spin_unlock(&mp->m_ail_lock); 120 spin_unlock(&ailp->xa_lock);
124 } 121 }
125} 122}
126 123
@@ -134,26 +131,23 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
134STATIC void 131STATIC void
135xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) 132xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp)
136{ 133{
137 xfs_mount_t *mp; 134 struct xfs_ail *ailp = efip->efi_item.li_ailp;
138 xfs_log_item_desc_t *lidp; 135 xfs_log_item_desc_t *lidp;
139 136
140 mp = efip->efi_item.li_mountp; 137 spin_lock(&ailp->xa_lock);
141 spin_lock(&mp->m_ail_lock);
142 if (efip->efi_flags & XFS_EFI_CANCELED) { 138 if (efip->efi_flags & XFS_EFI_CANCELED) {
143 /* 139 /*
144 * free the xaction descriptor pointing to this item 140 * free the xaction descriptor pointing to this item
145 */ 141 */
146 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); 142 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip);
147 xfs_trans_free_item(tp, lidp); 143 xfs_trans_free_item(tp, lidp);
148 /* 144
149 * pull the item off the AIL. 145 /* xfs_trans_ail_delete() drops the AIL lock. */
150 * xfs_trans_delete_ail() drops the AIL lock. 146 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
151 */
152 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
153 xfs_efi_item_free(efip); 147 xfs_efi_item_free(efip);
154 } else { 148 } else {
155 efip->efi_flags |= XFS_EFI_COMMITTED; 149 efip->efi_flags |= XFS_EFI_COMMITTED;
156 spin_unlock(&mp->m_ail_lock); 150 spin_unlock(&ailp->xa_lock);
157 } 151 }
158} 152}
159 153
@@ -268,6 +262,7 @@ xfs_efi_init(xfs_mount_t *mp,
268 efip->efi_item.li_type = XFS_LI_EFI; 262 efip->efi_item.li_type = XFS_LI_EFI;
269 efip->efi_item.li_ops = &xfs_efi_item_ops; 263 efip->efi_item.li_ops = &xfs_efi_item_ops;
270 efip->efi_item.li_mountp = mp; 264 efip->efi_item.li_mountp = mp;
265 efip->efi_item.li_ailp = mp->m_ail;
271 efip->efi_format.efi_nextents = nextents; 266 efip->efi_format.efi_nextents = nextents;
272 efip->efi_format.efi_id = (__psint_t)(void*)efip; 267 efip->efi_format.efi_id = (__psint_t)(void*)efip;
273 268
@@ -345,25 +340,22 @@ void
345xfs_efi_release(xfs_efi_log_item_t *efip, 340xfs_efi_release(xfs_efi_log_item_t *efip,
346 uint nextents) 341 uint nextents)
347{ 342{
348 xfs_mount_t *mp; 343 struct xfs_ail *ailp = efip->efi_item.li_ailp;
349 int extents_left; 344 int extents_left;
350 345
351 mp = efip->efi_item.li_mountp;
352 ASSERT(efip->efi_next_extent > 0); 346 ASSERT(efip->efi_next_extent > 0);
353 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); 347 ASSERT(efip->efi_flags & XFS_EFI_COMMITTED);
354 348
355 spin_lock(&mp->m_ail_lock); 349 spin_lock(&ailp->xa_lock);
356 ASSERT(efip->efi_next_extent >= nextents); 350 ASSERT(efip->efi_next_extent >= nextents);
357 efip->efi_next_extent -= nextents; 351 efip->efi_next_extent -= nextents;
358 extents_left = efip->efi_next_extent; 352 extents_left = efip->efi_next_extent;
359 if (extents_left == 0) { 353 if (extents_left == 0) {
360 /* 354 /* xfs_trans_ail_delete() drops the AIL lock. */
361 * xfs_trans_delete_ail() drops the AIL lock. 355 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip);
362 */
363 xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip);
364 xfs_efi_item_free(efip); 356 xfs_efi_item_free(efip);
365 } else { 357 } else {
366 spin_unlock(&mp->m_ail_lock); 358 spin_unlock(&ailp->xa_lock);
367 } 359 }
368} 360}
369 361
@@ -565,6 +557,7 @@ xfs_efd_init(xfs_mount_t *mp,
565 efdp->efd_item.li_type = XFS_LI_EFD; 557 efdp->efd_item.li_type = XFS_LI_EFD;
566 efdp->efd_item.li_ops = &xfs_efd_item_ops; 558 efdp->efd_item.li_ops = &xfs_efd_item_ops;
567 efdp->efd_item.li_mountp = mp; 559 efdp->efd_item.li_mountp = mp;
560 efdp->efd_item.li_ailp = mp->m_ail;
568 efdp->efd_efip = efip; 561 efdp->efd_efip = efip;
569 efdp->efd_format.efd_nextents = nextents; 562 efdp->efd_format.efd_nextents = nextents;
570 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 563 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 01c0cc88d3f3..589c41c38446 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -113,22 +113,14 @@ struct getbmapx {
113#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ 113#define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */
114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ 114#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */
115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ 115#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */
116#define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) 116#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */
117#ifdef __KERNEL__ 117#define BMV_IF_VALID \
118#define BMV_IF_EXTENDED 0x40000000 /* getpmapx if set */ 118 (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
119#endif
120 119
121/* bmv_oflags values - returned for for each non-header segment */ 120/* bmv_oflags values - returned for for each non-header segment */
122#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ 121#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
123 122#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */
124/* Convert getbmap <-> getbmapx - move fields from p1 to p2. */ 123#define BMV_OF_LAST 0x4 /* segment is the last in the file */
125#define GETBMAP_CONVERT(p1,p2) { \
126 p2.bmv_offset = p1.bmv_offset; \
127 p2.bmv_block = p1.bmv_block; \
128 p2.bmv_length = p1.bmv_length; \
129 p2.bmv_count = p1.bmv_count; \
130 p2.bmv_entries = p1.bmv_entries; }
131
132 124
133/* 125/*
134 * Structure for XFS_IOC_FSSETDM. 126 * Structure for XFS_IOC_FSSETDM.
@@ -426,10 +418,6 @@ typedef struct xfs_handle {
426#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS 418#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
427#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS 419#define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS
428#define XFS_IOC_GETVERSION FS_IOC_GETVERSION 420#define XFS_IOC_GETVERSION FS_IOC_GETVERSION
429/* 32-bit compat counterparts */
430#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS
431#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS
432#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION
433 421
434/* 422/*
435 * ioctl commands that replace IRIX fcntl()'s 423 * ioctl commands that replace IRIX fcntl()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 84583cf73db3..852b6d32e8d0 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -126,7 +126,7 @@ xfs_growfs_data_private(
126 xfs_extlen_t agsize; 126 xfs_extlen_t agsize;
127 xfs_extlen_t tmpsize; 127 xfs_extlen_t tmpsize;
128 xfs_alloc_rec_t *arec; 128 xfs_alloc_rec_t *arec;
129 xfs_btree_sblock_t *block; 129 struct xfs_btree_block *block;
130 xfs_buf_t *bp; 130 xfs_buf_t *bp;
131 int bucket; 131 int bucket;
132 int dpct; 132 int dpct;
@@ -251,14 +251,14 @@ xfs_growfs_data_private(
251 bp = xfs_buf_get(mp->m_ddev_targp, 251 bp = xfs_buf_get(mp->m_ddev_targp,
252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 252 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
253 BTOBB(mp->m_sb.sb_blocksize), 0); 253 BTOBB(mp->m_sb.sb_blocksize), 0);
254 block = XFS_BUF_TO_SBLOCK(bp); 254 block = XFS_BUF_TO_BLOCK(bp);
255 memset(block, 0, mp->m_sb.sb_blocksize); 255 memset(block, 0, mp->m_sb.sb_blocksize);
256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 256 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
257 block->bb_level = 0; 257 block->bb_level = 0;
258 block->bb_numrecs = cpu_to_be16(1); 258 block->bb_numrecs = cpu_to_be16(1);
259 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 259 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
260 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 260 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
261 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 261 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 262 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
263 arec->ar_blockcount = cpu_to_be32( 263 arec->ar_blockcount = cpu_to_be32(
264 agsize - be32_to_cpu(arec->ar_startblock)); 264 agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,14 +272,14 @@ xfs_growfs_data_private(
272 bp = xfs_buf_get(mp->m_ddev_targp, 272 bp = xfs_buf_get(mp->m_ddev_targp,
273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 273 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
274 BTOBB(mp->m_sb.sb_blocksize), 0); 274 BTOBB(mp->m_sb.sb_blocksize), 0);
275 block = XFS_BUF_TO_SBLOCK(bp); 275 block = XFS_BUF_TO_BLOCK(bp);
276 memset(block, 0, mp->m_sb.sb_blocksize); 276 memset(block, 0, mp->m_sb.sb_blocksize);
277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 277 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
278 block->bb_level = 0; 278 block->bb_level = 0;
279 block->bb_numrecs = cpu_to_be16(1); 279 block->bb_numrecs = cpu_to_be16(1);
280 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 280 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
281 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 281 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
282 arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); 282 arec = XFS_ALLOC_REC_ADDR(mp, block, 1);
283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); 283 arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
284 arec->ar_blockcount = cpu_to_be32( 284 arec->ar_blockcount = cpu_to_be32(
285 agsize - be32_to_cpu(arec->ar_startblock)); 285 agsize - be32_to_cpu(arec->ar_startblock));
@@ -294,13 +294,13 @@ xfs_growfs_data_private(
294 bp = xfs_buf_get(mp->m_ddev_targp, 294 bp = xfs_buf_get(mp->m_ddev_targp,
295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 295 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
296 BTOBB(mp->m_sb.sb_blocksize), 0); 296 BTOBB(mp->m_sb.sb_blocksize), 0);
297 block = XFS_BUF_TO_SBLOCK(bp); 297 block = XFS_BUF_TO_BLOCK(bp);
298 memset(block, 0, mp->m_sb.sb_blocksize); 298 memset(block, 0, mp->m_sb.sb_blocksize);
299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 299 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
300 block->bb_level = 0; 300 block->bb_level = 0;
301 block->bb_numrecs = 0; 301 block->bb_numrecs = 0;
302 block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); 302 block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
303 block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); 303 block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
304 error = xfs_bwrite(mp, bp); 304 error = xfs_bwrite(mp, bp);
305 if (error) { 305 if (error) {
306 goto error0; 306 goto error0;
@@ -435,6 +435,9 @@ xfs_growfs_data(
435 xfs_growfs_data_t *in) 435 xfs_growfs_data_t *in)
436{ 436{
437 int error; 437 int error;
438
439 if (!capable(CAP_SYS_ADMIN))
440 return XFS_ERROR(EPERM);
438 if (!mutex_trylock(&mp->m_growlock)) 441 if (!mutex_trylock(&mp->m_growlock))
439 return XFS_ERROR(EWOULDBLOCK); 442 return XFS_ERROR(EWOULDBLOCK);
440 error = xfs_growfs_data_private(mp, in); 443 error = xfs_growfs_data_private(mp, in);
@@ -448,6 +451,9 @@ xfs_growfs_log(
448 xfs_growfs_log_t *in) 451 xfs_growfs_log_t *in)
449{ 452{
450 int error; 453 int error;
454
455 if (!capable(CAP_SYS_ADMIN))
456 return XFS_ERROR(EPERM);
451 if (!mutex_trylock(&mp->m_growlock)) 457 if (!mutex_trylock(&mp->m_growlock))
452 return XFS_ERROR(EWOULDBLOCK); 458 return XFS_ERROR(EWOULDBLOCK);
453 error = xfs_growfs_log_private(mp, in); 459 error = xfs_growfs_log_private(mp, in);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index aad8c5da38af..e6ebbaeb4dc6 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -41,68 +41,6 @@
41#include "xfs_error.h" 41#include "xfs_error.h"
42#include "xfs_bmap.h" 42#include "xfs_bmap.h"
43 43
44/*
45 * Log specified fields for the inode given by bp and off.
46 */
47STATIC void
48xfs_ialloc_log_di(
49 xfs_trans_t *tp, /* transaction pointer */
50 xfs_buf_t *bp, /* inode buffer */
51 int off, /* index of inode in buffer */
52 int fields) /* bitmask of fields to log */
53{
54 int first; /* first byte number */
55 int ioffset; /* off in bytes */
56 int last; /* last byte number */
57 xfs_mount_t *mp; /* mount point structure */
58 static const short offsets[] = { /* field offsets */
59 /* keep in sync with bits */
60 offsetof(xfs_dinode_core_t, di_magic),
61 offsetof(xfs_dinode_core_t, di_mode),
62 offsetof(xfs_dinode_core_t, di_version),
63 offsetof(xfs_dinode_core_t, di_format),
64 offsetof(xfs_dinode_core_t, di_onlink),
65 offsetof(xfs_dinode_core_t, di_uid),
66 offsetof(xfs_dinode_core_t, di_gid),
67 offsetof(xfs_dinode_core_t, di_nlink),
68 offsetof(xfs_dinode_core_t, di_projid),
69 offsetof(xfs_dinode_core_t, di_pad),
70 offsetof(xfs_dinode_core_t, di_atime),
71 offsetof(xfs_dinode_core_t, di_mtime),
72 offsetof(xfs_dinode_core_t, di_ctime),
73 offsetof(xfs_dinode_core_t, di_size),
74 offsetof(xfs_dinode_core_t, di_nblocks),
75 offsetof(xfs_dinode_core_t, di_extsize),
76 offsetof(xfs_dinode_core_t, di_nextents),
77 offsetof(xfs_dinode_core_t, di_anextents),
78 offsetof(xfs_dinode_core_t, di_forkoff),
79 offsetof(xfs_dinode_core_t, di_aformat),
80 offsetof(xfs_dinode_core_t, di_dmevmask),
81 offsetof(xfs_dinode_core_t, di_dmstate),
82 offsetof(xfs_dinode_core_t, di_flags),
83 offsetof(xfs_dinode_core_t, di_gen),
84 offsetof(xfs_dinode_t, di_next_unlinked),
85 offsetof(xfs_dinode_t, di_u),
86 offsetof(xfs_dinode_t, di_a),
87 sizeof(xfs_dinode_t)
88 };
89
90
91 ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
92 ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
93 mp = tp->t_mountp;
94 /*
95 * Get the inode-relative first and last bytes for these fields
96 */
97 xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
98 /*
99 * Convert to buffer offsets and log it.
100 */
101 ioffset = off << mp->m_sb.sb_inodelog;
102 first += ioffset;
103 last += ioffset;
104 xfs_trans_log_buf(tp, bp, first, last);
105}
106 44
107/* 45/*
108 * Allocation group level functions. 46 * Allocation group level functions.
@@ -119,6 +57,102 @@ xfs_ialloc_cluster_alignment(
119} 57}
120 58
121/* 59/*
60 * Lookup the record equal to ino in the btree given by cur.
61 */
62STATIC int /* error */
63xfs_inobt_lookup_eq(
64 struct xfs_btree_cur *cur, /* btree cursor */
65 xfs_agino_t ino, /* starting inode of chunk */
66 __int32_t fcnt, /* free inode count */
67 xfs_inofree_t free, /* free inode mask */
68 int *stat) /* success/failure */
69{
70 cur->bc_rec.i.ir_startino = ino;
71 cur->bc_rec.i.ir_freecount = fcnt;
72 cur->bc_rec.i.ir_free = free;
73 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
74}
75
76/*
77 * Lookup the first record greater than or equal to ino
78 * in the btree given by cur.
79 */
80int /* error */
81xfs_inobt_lookup_ge(
82 struct xfs_btree_cur *cur, /* btree cursor */
83 xfs_agino_t ino, /* starting inode of chunk */
84 __int32_t fcnt, /* free inode count */
85 xfs_inofree_t free, /* free inode mask */
86 int *stat) /* success/failure */
87{
88 cur->bc_rec.i.ir_startino = ino;
89 cur->bc_rec.i.ir_freecount = fcnt;
90 cur->bc_rec.i.ir_free = free;
91 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
92}
93
94/*
95 * Lookup the first record less than or equal to ino
96 * in the btree given by cur.
97 */
98int /* error */
99xfs_inobt_lookup_le(
100 struct xfs_btree_cur *cur, /* btree cursor */
101 xfs_agino_t ino, /* starting inode of chunk */
102 __int32_t fcnt, /* free inode count */
103 xfs_inofree_t free, /* free inode mask */
104 int *stat) /* success/failure */
105{
106 cur->bc_rec.i.ir_startino = ino;
107 cur->bc_rec.i.ir_freecount = fcnt;
108 cur->bc_rec.i.ir_free = free;
109 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
110}
111
112/*
113 * Update the record referred to by cur to the value given
114 * by [ino, fcnt, free].
115 * This either works (return 0) or gets an EFSCORRUPTED error.
116 */
117STATIC int /* error */
118xfs_inobt_update(
119 struct xfs_btree_cur *cur, /* btree cursor */
120 xfs_agino_t ino, /* starting inode of chunk */
121 __int32_t fcnt, /* free inode count */
122 xfs_inofree_t free) /* free inode mask */
123{
124 union xfs_btree_rec rec;
125
126 rec.inobt.ir_startino = cpu_to_be32(ino);
127 rec.inobt.ir_freecount = cpu_to_be32(fcnt);
128 rec.inobt.ir_free = cpu_to_be64(free);
129 return xfs_btree_update(cur, &rec);
130}
131
132/*
133 * Get the data from the pointed-to record.
134 */
135int /* error */
136xfs_inobt_get_rec(
137 struct xfs_btree_cur *cur, /* btree cursor */
138 xfs_agino_t *ino, /* output: starting inode of chunk */
139 __int32_t *fcnt, /* output: number of free inodes */
140 xfs_inofree_t *free, /* output: free inode mask */
141 int *stat) /* output: success/failure */
142{
143 union xfs_btree_rec *rec;
144 int error;
145
146 error = xfs_btree_get_rec(cur, &rec, stat);
147 if (!error && *stat == 1) {
148 *ino = be32_to_cpu(rec->inobt.ir_startino);
149 *fcnt = be32_to_cpu(rec->inobt.ir_freecount);
150 *free = be64_to_cpu(rec->inobt.ir_free);
151 }
152 return error;
153}
154
155/*
122 * Allocate new inodes in the allocation group specified by agbp. 156 * Allocate new inodes in the allocation group specified by agbp.
123 * Return 0 for success, else error code. 157 * Return 0 for success, else error code.
124 */ 158 */
@@ -287,9 +321,9 @@ xfs_ialloc_ag_alloc(
287 * able to use the file system. 321 * able to use the file system.
288 */ 322 */
289 if (xfs_sb_version_hasnlink(&args.mp->m_sb)) 323 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
290 version = XFS_DINODE_VERSION_2; 324 version = 2;
291 else 325 else
292 version = XFS_DINODE_VERSION_1; 326 version = 1;
293 327
294 /* 328 /*
295 * Seed the new inode cluster with a random generation number. This 329 * Seed the new inode cluster with a random generation number. This
@@ -310,18 +344,25 @@ xfs_ialloc_ag_alloc(
310 XFS_BUF_LOCK); 344 XFS_BUF_LOCK);
311 ASSERT(fbuf); 345 ASSERT(fbuf);
312 ASSERT(!XFS_BUF_GETERROR(fbuf)); 346 ASSERT(!XFS_BUF_GETERROR(fbuf));
347
313 /* 348 /*
314 * Set initial values for the inodes in this buffer. 349 * Initialize all inodes in this buffer and then log them.
350 *
351 * XXX: It would be much better if we had just one transaction to
352 * log a whole cluster of inodes instead of all the indivdual
353 * transactions causing a lot of log traffic.
315 */ 354 */
316 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); 355 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
317 for (i = 0; i < ninodes; i++) { 356 for (i = 0; i < ninodes; i++) {
357 int ioffset = i << args.mp->m_sb.sb_inodelog;
358 uint isize = sizeof(struct xfs_dinode);
359
318 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 360 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 361 free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
320 free->di_core.di_version = version; 362 free->di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen); 363 free->di_gen = cpu_to_be32(gen);
322 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 364 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
323 xfs_ialloc_log_di(tp, fbuf, i, 365 xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
325 } 366 }
326 xfs_trans_inode_alloc_buf(tp, fbuf); 367 xfs_trans_inode_alloc_buf(tp, fbuf);
327 } 368 }
@@ -335,8 +376,7 @@ xfs_ialloc_ag_alloc(
335 /* 376 /*
336 * Insert records describing the new inode chunk into the btree. 377 * Insert records describing the new inode chunk into the btree.
337 */ 378 */
338 cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, 379 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
339 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
340 for (thisino = newino; 380 for (thisino = newino;
341 thisino < newino + newlen; 381 thisino < newino + newlen;
342 thisino += XFS_INODES_PER_CHUNK) { 382 thisino += XFS_INODES_PER_CHUNK) {
@@ -346,7 +386,7 @@ xfs_ialloc_ag_alloc(
346 return error; 386 return error;
347 } 387 }
348 ASSERT(i == 0); 388 ASSERT(i == 0);
349 if ((error = xfs_inobt_insert(cur, &i))) { 389 if ((error = xfs_btree_insert(cur, &i))) {
350 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 390 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
351 return error; 391 return error;
352 } 392 }
@@ -676,8 +716,7 @@ nextag:
676 */ 716 */
677 agno = tagno; 717 agno = tagno;
678 *IO_agbp = NULL; 718 *IO_agbp = NULL;
679 cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), 719 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
680 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
681 /* 720 /*
682 * If pagino is 0 (this is the root inode allocation) use newino. 721 * If pagino is 0 (this is the root inode allocation) use newino.
683 * This must work because we've just allocated some. 722 * This must work because we've just allocated some.
@@ -697,7 +736,7 @@ nextag:
697 goto error0; 736 goto error0;
698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 737 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
699 freecount += rec.ir_freecount; 738 freecount += rec.ir_freecount;
700 if ((error = xfs_inobt_increment(cur, 0, &i))) 739 if ((error = xfs_btree_increment(cur, 0, &i)))
701 goto error0; 740 goto error0;
702 } while (i == 1); 741 } while (i == 1);
703 742
@@ -741,7 +780,7 @@ nextag:
741 /* 780 /*
742 * Search left with tcur, back up 1 record. 781 * Search left with tcur, back up 1 record.
743 */ 782 */
744 if ((error = xfs_inobt_decrement(tcur, 0, &i))) 783 if ((error = xfs_btree_decrement(tcur, 0, &i)))
745 goto error1; 784 goto error1;
746 doneleft = !i; 785 doneleft = !i;
747 if (!doneleft) { 786 if (!doneleft) {
@@ -755,7 +794,7 @@ nextag:
755 /* 794 /*
756 * Search right with cur, go forward 1 record. 795 * Search right with cur, go forward 1 record.
757 */ 796 */
758 if ((error = xfs_inobt_increment(cur, 0, &i))) 797 if ((error = xfs_btree_increment(cur, 0, &i)))
759 goto error1; 798 goto error1;
760 doneright = !i; 799 doneright = !i;
761 if (!doneright) { 800 if (!doneright) {
@@ -817,7 +856,7 @@ nextag:
817 * further left. 856 * further left.
818 */ 857 */
819 if (useleft) { 858 if (useleft) {
820 if ((error = xfs_inobt_decrement(tcur, 0, 859 if ((error = xfs_btree_decrement(tcur, 0,
821 &i))) 860 &i)))
822 goto error1; 861 goto error1;
823 doneleft = !i; 862 doneleft = !i;
@@ -837,7 +876,7 @@ nextag:
837 * further right. 876 * further right.
838 */ 877 */
839 else { 878 else {
840 if ((error = xfs_inobt_increment(cur, 0, 879 if ((error = xfs_btree_increment(cur, 0,
841 &i))) 880 &i)))
842 goto error1; 881 goto error1;
843 doneright = !i; 882 doneright = !i;
@@ -892,7 +931,7 @@ nextag:
892 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 931 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
893 if (rec.ir_freecount > 0) 932 if (rec.ir_freecount > 0)
894 break; 933 break;
895 if ((error = xfs_inobt_increment(cur, 0, &i))) 934 if ((error = xfs_btree_increment(cur, 0, &i)))
896 goto error0; 935 goto error0;
897 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
898 } 937 }
@@ -926,7 +965,7 @@ nextag:
926 goto error0; 965 goto error0;
927 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 966 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
928 freecount += rec.ir_freecount; 967 freecount += rec.ir_freecount;
929 if ((error = xfs_inobt_increment(cur, 0, &i))) 968 if ((error = xfs_btree_increment(cur, 0, &i)))
930 goto error0; 969 goto error0;
931 } while (i == 1); 970 } while (i == 1);
932 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || 971 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
@@ -1022,8 +1061,7 @@ xfs_difree(
1022 /* 1061 /*
1023 * Initialize the cursor. 1062 * Initialize the cursor.
1024 */ 1063 */
1025 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1064 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1026 (xfs_inode_t *)0, 0);
1027#ifdef DEBUG 1065#ifdef DEBUG
1028 if (cur->bc_nlevels == 1) { 1066 if (cur->bc_nlevels == 1) {
1029 int freecount = 0; 1067 int freecount = 0;
@@ -1036,7 +1074,7 @@ xfs_difree(
1036 goto error0; 1074 goto error0;
1037 if (i) { 1075 if (i) {
1038 freecount += rec.ir_freecount; 1076 freecount += rec.ir_freecount;
1039 if ((error = xfs_inobt_increment(cur, 0, &i))) 1077 if ((error = xfs_btree_increment(cur, 0, &i)))
1040 goto error0; 1078 goto error0;
1041 } 1079 }
1042 } while (i == 1); 1080 } while (i == 1);
@@ -1098,8 +1136,8 @@ xfs_difree(
1098 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); 1136 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1099 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1137 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1100 1138
1101 if ((error = xfs_inobt_delete(cur, &i))) { 1139 if ((error = xfs_btree_delete(cur, &i))) {
1102 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", 1140 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n",
1103 error, mp->m_fsname); 1141 error, mp->m_fsname);
1104 goto error0; 1142 goto error0;
1105 } 1143 }
@@ -1141,7 +1179,7 @@ xfs_difree(
1141 goto error0; 1179 goto error0;
1142 if (i) { 1180 if (i) {
1143 freecount += rec.ir_freecount; 1181 freecount += rec.ir_freecount;
1144 if ((error = xfs_inobt_increment(cur, 0, &i))) 1182 if ((error = xfs_btree_increment(cur, 0, &i)))
1145 goto error0; 1183 goto error0;
1146 } 1184 }
1147 } while (i == 1); 1185 } while (i == 1);
@@ -1158,36 +1196,28 @@ error0:
1158} 1196}
1159 1197
1160/* 1198/*
1161 * Return the location of the inode in bno/off, for mapping it into a buffer. 1199 * Return the location of the inode in imap, for mapping it into a buffer.
1162 */ 1200 */
1163/*ARGSUSED*/
1164int 1201int
1165xfs_dilocate( 1202xfs_imap(
1166 xfs_mount_t *mp, /* file system mount structure */ 1203 xfs_mount_t *mp, /* file system mount structure */
1167 xfs_trans_t *tp, /* transaction pointer */ 1204 xfs_trans_t *tp, /* transaction pointer */
1168 xfs_ino_t ino, /* inode to locate */ 1205 xfs_ino_t ino, /* inode to locate */
1169 xfs_fsblock_t *bno, /* output: block containing inode */ 1206 struct xfs_imap *imap, /* location map structure */
1170 int *len, /* output: num blocks in inode cluster */ 1207 uint flags) /* flags for inode btree lookup */
1171 int *off, /* output: index in block of inode */
1172 uint flags) /* flags concerning inode lookup */
1173{ 1208{
1174 xfs_agblock_t agbno; /* block number of inode in the alloc group */ 1209 xfs_agblock_t agbno; /* block number of inode in the alloc group */
1175 xfs_buf_t *agbp; /* agi buffer */
1176 xfs_agino_t agino; /* inode number within alloc group */ 1210 xfs_agino_t agino; /* inode number within alloc group */
1177 xfs_agnumber_t agno; /* allocation group number */ 1211 xfs_agnumber_t agno; /* allocation group number */
1178 int blks_per_cluster; /* num blocks per inode cluster */ 1212 int blks_per_cluster; /* num blocks per inode cluster */
1179 xfs_agblock_t chunk_agbno; /* first block in inode chunk */ 1213 xfs_agblock_t chunk_agbno; /* first block in inode chunk */
1180 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1181 __int32_t chunk_cnt; /* count of free inodes in chunk */
1182 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1183 xfs_agblock_t cluster_agbno; /* first block in inode cluster */ 1214 xfs_agblock_t cluster_agbno; /* first block in inode cluster */
1184 xfs_btree_cur_t *cur; /* inode btree cursor */
1185 int error; /* error code */ 1215 int error; /* error code */
1186 int i; /* temp state */
1187 int offset; /* index of inode in its buffer */ 1216 int offset; /* index of inode in its buffer */
1188 int offset_agbno; /* blks from chunk start to inode */ 1217 int offset_agbno; /* blks from chunk start to inode */
1189 1218
1190 ASSERT(ino != NULLFSINO); 1219 ASSERT(ino != NULLFSINO);
1220
1191 /* 1221 /*
1192 * Split up the inode number into its parts. 1222 * Split up the inode number into its parts.
1193 */ 1223 */
@@ -1198,24 +1228,24 @@ xfs_dilocate(
1198 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1228 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1199#ifdef DEBUG 1229#ifdef DEBUG
1200 /* no diagnostics for bulkstat, ino comes from userspace */ 1230 /* no diagnostics for bulkstat, ino comes from userspace */
1201 if (flags & XFS_IMAP_BULKSTAT) 1231 if (flags & XFS_IGET_BULKSTAT)
1202 return XFS_ERROR(EINVAL); 1232 return XFS_ERROR(EINVAL);
1203 if (agno >= mp->m_sb.sb_agcount) { 1233 if (agno >= mp->m_sb.sb_agcount) {
1204 xfs_fs_cmn_err(CE_ALERT, mp, 1234 xfs_fs_cmn_err(CE_ALERT, mp,
1205 "xfs_dilocate: agno (%d) >= " 1235 "xfs_imap: agno (%d) >= "
1206 "mp->m_sb.sb_agcount (%d)", 1236 "mp->m_sb.sb_agcount (%d)",
1207 agno, mp->m_sb.sb_agcount); 1237 agno, mp->m_sb.sb_agcount);
1208 } 1238 }
1209 if (agbno >= mp->m_sb.sb_agblocks) { 1239 if (agbno >= mp->m_sb.sb_agblocks) {
1210 xfs_fs_cmn_err(CE_ALERT, mp, 1240 xfs_fs_cmn_err(CE_ALERT, mp,
1211 "xfs_dilocate: agbno (0x%llx) >= " 1241 "xfs_imap: agbno (0x%llx) >= "
1212 "mp->m_sb.sb_agblocks (0x%lx)", 1242 "mp->m_sb.sb_agblocks (0x%lx)",
1213 (unsigned long long) agbno, 1243 (unsigned long long) agbno,
1214 (unsigned long) mp->m_sb.sb_agblocks); 1244 (unsigned long) mp->m_sb.sb_agblocks);
1215 } 1245 }
1216 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1246 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1217 xfs_fs_cmn_err(CE_ALERT, mp, 1247 xfs_fs_cmn_err(CE_ALERT, mp,
1218 "xfs_dilocate: ino (0x%llx) != " 1248 "xfs_imap: ino (0x%llx) != "
1219 "XFS_AGINO_TO_INO(mp, agno, agino) " 1249 "XFS_AGINO_TO_INO(mp, agno, agino) "
1220 "(0x%llx)", 1250 "(0x%llx)",
1221 ino, XFS_AGINO_TO_INO(mp, agno, agino)); 1251 ino, XFS_AGINO_TO_INO(mp, agno, agino));
@@ -1224,65 +1254,89 @@ xfs_dilocate(
1224#endif /* DEBUG */ 1254#endif /* DEBUG */
1225 return XFS_ERROR(EINVAL); 1255 return XFS_ERROR(EINVAL);
1226 } 1256 }
1227 if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || 1257
1228 !(flags & XFS_IMAP_LOOKUP)) { 1258 /*
1259 * If the inode cluster size is the same as the blocksize or
1260 * smaller we get to the buffer by simple arithmetics.
1261 */
1262 if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
1229 offset = XFS_INO_TO_OFFSET(mp, ino); 1263 offset = XFS_INO_TO_OFFSET(mp, ino);
1230 ASSERT(offset < mp->m_sb.sb_inopblock); 1264 ASSERT(offset < mp->m_sb.sb_inopblock);
1231 *bno = XFS_AGB_TO_FSB(mp, agno, agbno); 1265
1232 *off = offset; 1266 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
1233 *len = 1; 1267 imap->im_len = XFS_FSB_TO_BB(mp, 1);
1268 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1234 return 0; 1269 return 0;
1235 } 1270 }
1271
1236 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; 1272 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1237 if (*bno != NULLFSBLOCK) { 1273
1274 /*
1275 * If we get a block number passed from bulkstat we can use it to
1276 * find the buffer easily.
1277 */
1278 if (imap->im_blkno) {
1238 offset = XFS_INO_TO_OFFSET(mp, ino); 1279 offset = XFS_INO_TO_OFFSET(mp, ino);
1239 ASSERT(offset < mp->m_sb.sb_inopblock); 1280 ASSERT(offset < mp->m_sb.sb_inopblock);
1240 cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); 1281
1241 *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1282 cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno);
1242 offset; 1283 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1243 *len = blks_per_cluster; 1284
1285 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1286 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1244 return 0; 1287 return 0;
1245 } 1288 }
1289
1290 /*
1291 * If the inode chunks are aligned then use simple maths to
1292 * find the location. Otherwise we have to do a btree
1293 * lookup to find the location.
1294 */
1246 if (mp->m_inoalign_mask) { 1295 if (mp->m_inoalign_mask) {
1247 offset_agbno = agbno & mp->m_inoalign_mask; 1296 offset_agbno = agbno & mp->m_inoalign_mask;
1248 chunk_agbno = agbno - offset_agbno; 1297 chunk_agbno = agbno - offset_agbno;
1249 } else { 1298 } else {
1299 xfs_btree_cur_t *cur; /* inode btree cursor */
1300 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1301 __int32_t chunk_cnt; /* count of free inodes in chunk */
1302 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1303 xfs_buf_t *agbp; /* agi buffer */
1304 int i; /* temp state */
1305
1250 down_read(&mp->m_peraglock); 1306 down_read(&mp->m_peraglock);
1251 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1307 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1252 up_read(&mp->m_peraglock); 1308 up_read(&mp->m_peraglock);
1253 if (error) { 1309 if (error) {
1254#ifdef DEBUG 1310 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1255 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1256 "xfs_ialloc_read_agi() returned " 1311 "xfs_ialloc_read_agi() returned "
1257 "error %d, agno %d", 1312 "error %d, agno %d",
1258 error, agno); 1313 error, agno);
1259#endif /* DEBUG */
1260 return error; 1314 return error;
1261 } 1315 }
1262 cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, 1316
1263 (xfs_inode_t *)0, 0); 1317 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1264 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { 1318 error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i);
1265#ifdef DEBUG 1319 if (error) {
1266 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1320 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1267 "xfs_inobt_lookup_le() failed"); 1321 "xfs_inobt_lookup_le() failed");
1268#endif /* DEBUG */
1269 goto error0; 1322 goto error0;
1270 } 1323 }
1271 if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, 1324
1272 &chunk_free, &i))) { 1325 error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
1273#ifdef DEBUG 1326 &chunk_free, &i);
1274 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1327 if (error) {
1328 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1275 "xfs_inobt_get_rec() failed"); 1329 "xfs_inobt_get_rec() failed");
1276#endif /* DEBUG */
1277 goto error0; 1330 goto error0;
1278 } 1331 }
1279 if (i == 0) { 1332 if (i == 0) {
1280#ifdef DEBUG 1333#ifdef DEBUG
1281 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " 1334 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1282 "xfs_inobt_get_rec() failed"); 1335 "xfs_inobt_get_rec() failed");
1283#endif /* DEBUG */ 1336#endif /* DEBUG */
1284 error = XFS_ERROR(EINVAL); 1337 error = XFS_ERROR(EINVAL);
1285 } 1338 }
1339 error0:
1286 xfs_trans_brelse(tp, agbp); 1340 xfs_trans_brelse(tp, agbp);
1287 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1341 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1288 if (error) 1342 if (error)
@@ -1290,19 +1344,35 @@ xfs_dilocate(
1290 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); 1344 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
1291 offset_agbno = agbno - chunk_agbno; 1345 offset_agbno = agbno - chunk_agbno;
1292 } 1346 }
1347
1293 ASSERT(agbno >= chunk_agbno); 1348 ASSERT(agbno >= chunk_agbno);
1294 cluster_agbno = chunk_agbno + 1349 cluster_agbno = chunk_agbno +
1295 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1350 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1296 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 1351 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1297 XFS_INO_TO_OFFSET(mp, ino); 1352 XFS_INO_TO_OFFSET(mp, ino);
1298 *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); 1353
1299 *off = offset; 1354 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
1300 *len = blks_per_cluster; 1355 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1356 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1357
1358 /*
1359 * If the inode number maps to a block outside the bounds
1360 * of the file system then return NULL rather than calling
1361 * read_buf and panicing when we get an error from the
1362 * driver.
1363 */
1364 if ((imap->im_blkno + imap->im_len) >
1365 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1366 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1367 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
1368 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
1369 (unsigned long long) imap->im_blkno,
1370 (unsigned long long) imap->im_len,
1371 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1372 return XFS_ERROR(EINVAL);
1373 }
1374
1301 return 0; 1375 return 0;
1302error0:
1303 xfs_trans_brelse(tp, agbp);
1304 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1305 return error;
1306} 1376}
1307 1377
1308/* 1378/*
@@ -1370,70 +1440,95 @@ xfs_ialloc_log_agi(
1370 xfs_trans_log_buf(tp, bp, first, last); 1440 xfs_trans_log_buf(tp, bp, first, last);
1371} 1441}
1372 1442
1443#ifdef DEBUG
1444STATIC void
1445xfs_check_agi_unlinked(
1446 struct xfs_agi *agi)
1447{
1448 int i;
1449
1450 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1451 ASSERT(agi->agi_unlinked[i]);
1452}
1453#else
1454#define xfs_check_agi_unlinked(agi)
1455#endif
1456
1373/* 1457/*
1374 * Read in the allocation group header (inode allocation section) 1458 * Read in the allocation group header (inode allocation section)
1375 */ 1459 */
1376int 1460int
1377xfs_ialloc_read_agi( 1461xfs_read_agi(
1378 xfs_mount_t *mp, /* file system mount structure */ 1462 struct xfs_mount *mp, /* file system mount structure */
1379 xfs_trans_t *tp, /* transaction pointer */ 1463 struct xfs_trans *tp, /* transaction pointer */
1380 xfs_agnumber_t agno, /* allocation group number */ 1464 xfs_agnumber_t agno, /* allocation group number */
1381 xfs_buf_t **bpp) /* allocation group hdr buf */ 1465 struct xfs_buf **bpp) /* allocation group hdr buf */
1382{ 1466{
1383 xfs_agi_t *agi; /* allocation group header */ 1467 struct xfs_agi *agi; /* allocation group header */
1384 int agi_ok; /* agi is consistent */ 1468 int agi_ok; /* agi is consistent */
1385 xfs_buf_t *bp; /* allocation group hdr buf */ 1469 int error;
1386 xfs_perag_t *pag; /* per allocation group data */
1387 int error;
1388 1470
1389 ASSERT(agno != NULLAGNUMBER); 1471 ASSERT(agno != NULLAGNUMBER);
1390 error = xfs_trans_read_buf( 1472
1391 mp, tp, mp->m_ddev_targp, 1473 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
1392 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 1474 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1393 XFS_FSS_TO_BB(mp, 1), 0, &bp); 1475 XFS_FSS_TO_BB(mp, 1), 0, bpp);
1394 if (error) 1476 if (error)
1395 return error; 1477 return error;
1396 ASSERT(bp && !XFS_BUF_GETERROR(bp)); 1478
1479 ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
1480 agi = XFS_BUF_TO_AGI(*bpp);
1397 1481
1398 /* 1482 /*
1399 * Validate the magic number of the agi block. 1483 * Validate the magic number of the agi block.
1400 */ 1484 */
1401 agi = XFS_BUF_TO_AGI(bp); 1485 agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1402 agi_ok = 1486 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1403 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1487 be32_to_cpu(agi->agi_seqno) == agno;
1404 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1405 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1488 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1406 XFS_RANDOM_IALLOC_READ_AGI))) { 1489 XFS_RANDOM_IALLOC_READ_AGI))) {
1407 XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW, 1490 XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
1408 mp, agi); 1491 mp, agi);
1409 xfs_trans_brelse(tp, bp); 1492 xfs_trans_brelse(tp, *bpp);
1410 return XFS_ERROR(EFSCORRUPTED); 1493 return XFS_ERROR(EFSCORRUPTED);
1411 } 1494 }
1495
1496 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
1497
1498 xfs_check_agi_unlinked(agi);
1499 return 0;
1500}
1501
1502int
1503xfs_ialloc_read_agi(
1504 struct xfs_mount *mp, /* file system mount structure */
1505 struct xfs_trans *tp, /* transaction pointer */
1506 xfs_agnumber_t agno, /* allocation group number */
1507 struct xfs_buf **bpp) /* allocation group hdr buf */
1508{
1509 struct xfs_agi *agi; /* allocation group header */
1510 struct xfs_perag *pag; /* per allocation group data */
1511 int error;
1512
1513 error = xfs_read_agi(mp, tp, agno, bpp);
1514 if (error)
1515 return error;
1516
1517 agi = XFS_BUF_TO_AGI(*bpp);
1412 pag = &mp->m_perag[agno]; 1518 pag = &mp->m_perag[agno];
1519
1413 if (!pag->pagi_init) { 1520 if (!pag->pagi_init) {
1414 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); 1521 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1415 pag->pagi_count = be32_to_cpu(agi->agi_count); 1522 pag->pagi_count = be32_to_cpu(agi->agi_count);
1416 pag->pagi_init = 1; 1523 pag->pagi_init = 1;
1417 } else {
1418 /*
1419 * It's possible for these to be out of sync if
1420 * we are in the middle of a forced shutdown.
1421 */
1422 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1423 XFS_FORCED_SHUTDOWN(mp));
1424 } 1524 }
1425 1525
1426#ifdef DEBUG 1526 /*
1427 { 1527 * It's possible for these to be out of sync if
1428 int i; 1528 * we are in the middle of a forced shutdown.
1429 1529 */
1430 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) 1530 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1431 ASSERT(agi->agi_unlinked[i]); 1531 XFS_FORCED_SHUTDOWN(mp));
1432 }
1433#endif
1434
1435 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
1436 *bpp = bp;
1437 return 0; 1532 return 0;
1438} 1533}
1439 1534
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 4e30ec1d13bc..50f558a4e0a8 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -20,6 +20,7 @@
20 20
21struct xfs_buf; 21struct xfs_buf;
22struct xfs_dinode; 22struct xfs_dinode;
23struct xfs_imap;
23struct xfs_mount; 24struct xfs_mount;
24struct xfs_trans; 25struct xfs_trans;
25 26
@@ -56,7 +57,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
56} 57}
57 58
58 59
59#ifdef __KERNEL__
60/* 60/*
61 * Allocate an inode on disk. 61 * Allocate an inode on disk.
62 * Mode is used to tell whether the new inode will need space, and whether 62 * Mode is used to tell whether the new inode will need space, and whether
@@ -105,17 +105,14 @@ xfs_difree(
105 xfs_ino_t *first_ino); /* first inode in deleted cluster */ 105 xfs_ino_t *first_ino); /* first inode in deleted cluster */
106 106
107/* 107/*
108 * Return the location of the inode in bno/len/off, 108 * Return the location of the inode in imap, for mapping it into a buffer.
109 * for mapping it into a buffer.
110 */ 109 */
111int 110int
112xfs_dilocate( 111xfs_imap(
113 struct xfs_mount *mp, /* file system mount structure */ 112 struct xfs_mount *mp, /* file system mount structure */
114 struct xfs_trans *tp, /* transaction pointer */ 113 struct xfs_trans *tp, /* transaction pointer */
115 xfs_ino_t ino, /* inode to locate */ 114 xfs_ino_t ino, /* inode to locate */
116 xfs_fsblock_t *bno, /* output: block containing inode */ 115 struct xfs_imap *imap, /* location map structure */
117 int *len, /* output: num blocks in cluster*/
118 int *off, /* output: index in block of inode */
119 uint flags); /* flags for inode btree lookup */ 116 uint flags); /* flags for inode btree lookup */
120 117
121/* 118/*
@@ -154,6 +151,24 @@ xfs_ialloc_pagi_init(
154 struct xfs_trans *tp, /* transaction pointer */ 151 struct xfs_trans *tp, /* transaction pointer */
155 xfs_agnumber_t agno); /* allocation group number */ 152 xfs_agnumber_t agno); /* allocation group number */
156 153
157#endif /* __KERNEL__ */ 154/*
155 * Lookup the first record greater than or equal to ino
156 * in the btree given by cur.
157 */
158int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
159 __int32_t fcnt, xfs_inofree_t free, int *stat);
160
161/*
162 * Lookup the first record less than or equal to ino
163 * in the btree given by cur.
164 */
165int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino,
166 __int32_t fcnt, xfs_inofree_t free, int *stat);
167
168/*
169 * Get the data from the pointed-to record.
170 */
171extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
172 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
158 173
159#endif /* __XFS_IALLOC_H__ */ 174#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 83502f3edef0..99f2408e8d8e 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -35,2044 +35,349 @@
35#include "xfs_dinode.h" 35#include "xfs_dinode.h"
36#include "xfs_inode.h" 36#include "xfs_inode.h"
37#include "xfs_btree.h" 37#include "xfs_btree.h"
38#include "xfs_btree_trace.h"
38#include "xfs_ialloc.h" 39#include "xfs_ialloc.h"
39#include "xfs_alloc.h" 40#include "xfs_alloc.h"
40#include "xfs_error.h" 41#include "xfs_error.h"
41 42
42STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
43STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
44STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
47STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
48STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
49STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
50 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
51STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
52 43
53/* 44STATIC int
54 * Single level of the xfs_inobt_delete record deletion routine. 45xfs_inobt_get_minrecs(
55 * Delete record pointed to by cur/level. 46 struct xfs_btree_cur *cur,
56 * Remove the record from its block then rebalance the tree. 47 int level)
57 * Return 0 for error, 1 for done, 2 to go on to the next level.
58 */
59STATIC int /* error */
60xfs_inobt_delrec(
61 xfs_btree_cur_t *cur, /* btree cursor */
62 int level, /* level removing record from */
63 int *stat) /* fail/done/go-on */
64{ 48{
65 xfs_buf_t *agbp; /* buffer for a.g. inode header */ 49 return cur->bc_mp->m_inobt_mnr[level != 0];
66 xfs_mount_t *mp; /* mount structure */ 50}
67 xfs_agi_t *agi; /* allocation group inode header */
68 xfs_inobt_block_t *block; /* btree block record/key lives in */
69 xfs_agblock_t bno; /* btree block number */
70 xfs_buf_t *bp; /* buffer for block */
71 int error; /* error return value */
72 int i; /* loop index */
73 xfs_inobt_key_t key; /* kp points here if block is level 0 */
74 xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
75 xfs_agblock_t lbno; /* left block's block number */
76 xfs_buf_t *lbp; /* left block's buffer pointer */
77 xfs_inobt_block_t *left; /* left btree block */
78 xfs_inobt_key_t *lkp; /* left block key pointer */
79 xfs_inobt_ptr_t *lpp; /* left block address pointer */
80 int lrecs = 0; /* number of records in left block */
81 xfs_inobt_rec_t *lrp; /* left block record pointer */
82 xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
83 int ptr; /* index in btree block for this rec */
84 xfs_agblock_t rbno; /* right block's block number */
85 xfs_buf_t *rbp; /* right block's buffer pointer */
86 xfs_inobt_block_t *right; /* right btree block */
87 xfs_inobt_key_t *rkp; /* right block key pointer */
88 xfs_inobt_rec_t *rp; /* pointer to btree records */
89 xfs_inobt_ptr_t *rpp; /* right block address pointer */
90 int rrecs = 0; /* number of records in right block */
91 int numrecs;
92 xfs_inobt_rec_t *rrp; /* right block record pointer */
93 xfs_btree_cur_t *tcur; /* temporary btree cursor */
94
95 mp = cur->bc_mp;
96
97 /*
98 * Get the index of the entry being deleted, check for nothing there.
99 */
100 ptr = cur->bc_ptrs[level];
101 if (ptr == 0) {
102 *stat = 0;
103 return 0;
104 }
105
106 /*
107 * Get the buffer & block containing the record or key/ptr.
108 */
109 bp = cur->bc_bufs[level];
110 block = XFS_BUF_TO_INOBT_BLOCK(bp);
111#ifdef DEBUG
112 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
113 return error;
114#endif
115 /*
116 * Fail if we're off the end of the block.
117 */
118 51
119 numrecs = be16_to_cpu(block->bb_numrecs); 52STATIC struct xfs_btree_cur *
120 if (ptr > numrecs) { 53xfs_inobt_dup_cursor(
121 *stat = 0; 54 struct xfs_btree_cur *cur)
122 return 0; 55{
123 } 56 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
124 /* 57 cur->bc_private.a.agbp, cur->bc_private.a.agno);
125 * It's a nonleaf. Excise the key and ptr being deleted, by 58}
126 * sliding the entries past them down one.
127 * Log the changed areas of the block.
128 */
129 if (level > 0) {
130 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
131 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
132#ifdef DEBUG
133 for (i = ptr; i < numrecs; i++) {
134 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
135 return error;
136 }
137#endif
138 if (ptr < numrecs) {
139 memmove(&kp[ptr - 1], &kp[ptr],
140 (numrecs - ptr) * sizeof(*kp));
141 memmove(&pp[ptr - 1], &pp[ptr],
142 (numrecs - ptr) * sizeof(*kp));
143 xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
144 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
145 }
146 }
147 /*
148 * It's a leaf. Excise the record being deleted, by sliding the
149 * entries past it down one. Log the changed areas of the block.
150 */
151 else {
152 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
153 if (ptr < numrecs) {
154 memmove(&rp[ptr - 1], &rp[ptr],
155 (numrecs - ptr) * sizeof(*rp));
156 xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
157 }
158 /*
159 * If it's the first record in the block, we'll need a key
160 * structure to pass up to the next level (updkey).
161 */
162 if (ptr == 1) {
163 key.ir_startino = rp->ir_startino;
164 kp = &key;
165 }
166 }
167 /*
168 * Decrement and log the number of entries in the block.
169 */
170 numrecs--;
171 block->bb_numrecs = cpu_to_be16(numrecs);
172 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
173 /*
174 * Is this the root level? If so, we're almost done.
175 */
176 if (level == cur->bc_nlevels - 1) {
177 /*
178 * If this is the root level,
179 * and there's only one entry left,
180 * and it's NOT the leaf level,
181 * then we can get rid of this level.
182 */
183 if (numrecs == 1 && level > 0) {
184 agbp = cur->bc_private.a.agbp;
185 agi = XFS_BUF_TO_AGI(agbp);
186 /*
187 * pp is still set to the first pointer in the block.
188 * Make it the new root of the btree.
189 */
190 bno = be32_to_cpu(agi->agi_root);
191 agi->agi_root = *pp;
192 be32_add_cpu(&agi->agi_level, -1);
193 /*
194 * Free the block.
195 */
196 if ((error = xfs_free_extent(cur->bc_tp,
197 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198 return error;
199 xfs_trans_binval(cur->bc_tp, bp);
200 xfs_ialloc_log_agi(cur->bc_tp, agbp,
201 XFS_AGI_ROOT | XFS_AGI_LEVEL);
202 /*
203 * Update the cursor so there's one fewer level.
204 */
205 cur->bc_bufs[level] = NULL;
206 cur->bc_nlevels--;
207 } else if (level > 0 &&
208 (error = xfs_inobt_decrement(cur, level, &i)))
209 return error;
210 *stat = 1;
211 return 0;
212 }
213 /*
214 * If we deleted the leftmost entry in the block, update the
215 * key values above us in the tree.
216 */
217 if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
218 return error;
219 /*
220 * If the number of records remaining in the block is at least
221 * the minimum, we're done.
222 */
223 if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
224 if (level > 0 &&
225 (error = xfs_inobt_decrement(cur, level, &i)))
226 return error;
227 *stat = 1;
228 return 0;
229 }
230 /*
231 * Otherwise, we have to move some records around to keep the
232 * tree balanced. Look at the left and right sibling blocks to
233 * see if we can re-balance by moving only one record.
234 */
235 rbno = be32_to_cpu(block->bb_rightsib);
236 lbno = be32_to_cpu(block->bb_leftsib);
237 bno = NULLAGBLOCK;
238 ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
239 /*
240 * Duplicate the cursor so our btree manipulations here won't
241 * disrupt the next level up.
242 */
243 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
244 return error;
245 /*
246 * If there's a right sibling, see if it's ok to shift an entry
247 * out of it.
248 */
249 if (rbno != NULLAGBLOCK) {
250 /*
251 * Move the temp cursor to the last entry in the next block.
252 * Actually any entry but the first would suffice.
253 */
254 i = xfs_btree_lastrec(tcur, level);
255 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
256 if ((error = xfs_inobt_increment(tcur, level, &i)))
257 goto error0;
258 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
259 i = xfs_btree_lastrec(tcur, level);
260 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261 /*
262 * Grab a pointer to the block.
263 */
264 rbp = tcur->bc_bufs[level];
265 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
266#ifdef DEBUG
267 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
268 goto error0;
269#endif
270 /*
271 * Grab the current block number, for future use.
272 */
273 bno = be32_to_cpu(right->bb_leftsib);
274 /*
275 * If right block is full enough so that removing one entry
276 * won't make it too empty, and left-shifting an entry out
277 * of right to us works, we're done.
278 */
279 if (be16_to_cpu(right->bb_numrecs) - 1 >=
280 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
281 if ((error = xfs_inobt_lshift(tcur, level, &i)))
282 goto error0;
283 if (i) {
284 ASSERT(be16_to_cpu(block->bb_numrecs) >=
285 XFS_INOBT_BLOCK_MINRECS(level, cur));
286 xfs_btree_del_cursor(tcur,
287 XFS_BTREE_NOERROR);
288 if (level > 0 &&
289 (error = xfs_inobt_decrement(cur, level,
290 &i)))
291 return error;
292 *stat = 1;
293 return 0;
294 }
295 }
296 /*
297 * Otherwise, grab the number of records in right for
298 * future reference, and fix up the temp cursor to point
299 * to our block again (last record).
300 */
301 rrecs = be16_to_cpu(right->bb_numrecs);
302 if (lbno != NULLAGBLOCK) {
303 xfs_btree_firstrec(tcur, level);
304 if ((error = xfs_inobt_decrement(tcur, level, &i)))
305 goto error0;
306 }
307 }
308 /*
309 * If there's a left sibling, see if it's ok to shift an entry
310 * out of it.
311 */
312 if (lbno != NULLAGBLOCK) {
313 /*
314 * Move the temp cursor to the first entry in the
315 * previous block.
316 */
317 xfs_btree_firstrec(tcur, level);
318 if ((error = xfs_inobt_decrement(tcur, level, &i)))
319 goto error0;
320 xfs_btree_firstrec(tcur, level);
321 /*
322 * Grab a pointer to the block.
323 */
324 lbp = tcur->bc_bufs[level];
325 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
326#ifdef DEBUG
327 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
328 goto error0;
329#endif
330 /*
331 * Grab the current block number, for future use.
332 */
333 bno = be32_to_cpu(left->bb_rightsib);
334 /*
335 * If left block is full enough so that removing one entry
336 * won't make it too empty, and right-shifting an entry out
337 * of left to us works, we're done.
338 */
339 if (be16_to_cpu(left->bb_numrecs) - 1 >=
340 XFS_INOBT_BLOCK_MINRECS(level, cur)) {
341 if ((error = xfs_inobt_rshift(tcur, level, &i)))
342 goto error0;
343 if (i) {
344 ASSERT(be16_to_cpu(block->bb_numrecs) >=
345 XFS_INOBT_BLOCK_MINRECS(level, cur));
346 xfs_btree_del_cursor(tcur,
347 XFS_BTREE_NOERROR);
348 if (level == 0)
349 cur->bc_ptrs[0]++;
350 *stat = 1;
351 return 0;
352 }
353 }
354 /*
355 * Otherwise, grab the number of records in right for
356 * future reference.
357 */
358 lrecs = be16_to_cpu(left->bb_numrecs);
359 }
360 /*
361 * Delete the temp cursor, we're done with it.
362 */
363 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
364 /*
365 * If here, we need to do a join to keep the tree balanced.
366 */
367 ASSERT(bno != NULLAGBLOCK);
368 /*
369 * See if we can join with the left neighbor block.
370 */
371 if (lbno != NULLAGBLOCK &&
372 lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
373 /*
374 * Set "right" to be the starting block,
375 * "left" to be the left neighbor.
376 */
377 rbno = bno;
378 right = block;
379 rrecs = be16_to_cpu(right->bb_numrecs);
380 rbp = bp;
381 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382 cur->bc_private.a.agno, lbno, 0, &lbp,
383 XFS_INO_BTREE_REF)))
384 return error;
385 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
386 lrecs = be16_to_cpu(left->bb_numrecs);
387 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
388 return error;
389 }
390 /*
391 * If that won't work, see if we can join with the right neighbor block.
392 */
393 else if (rbno != NULLAGBLOCK &&
394 rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
395 /*
396 * Set "left" to be the starting block,
397 * "right" to be the right neighbor.
398 */
399 lbno = bno;
400 left = block;
401 lrecs = be16_to_cpu(left->bb_numrecs);
402 lbp = bp;
403 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404 cur->bc_private.a.agno, rbno, 0, &rbp,
405 XFS_INO_BTREE_REF)))
406 return error;
407 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
408 rrecs = be16_to_cpu(right->bb_numrecs);
409 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
410 return error;
411 }
412 /*
413 * Otherwise, we can't fix the imbalance.
414 * Just return. This is probably a logic error, but it's not fatal.
415 */
416 else {
417 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
418 return error;
419 *stat = 1;
420 return 0;
421 }
422 /*
423 * We're now going to join "left" and "right" by moving all the stuff
424 * in "right" to "left" and deleting "right".
425 */
426 if (level > 0) {
427 /*
428 * It's a non-leaf. Move keys and pointers.
429 */
430 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
431 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
432 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
433 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
434#ifdef DEBUG
435 for (i = 0; i < rrecs; i++) {
436 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
437 return error;
438 }
439#endif
440 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
441 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
442 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
443 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
444 } else {
445 /*
446 * It's a leaf. Move records.
447 */
448 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
449 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
450 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
451 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
452 }
453 /*
454 * If we joined with the left neighbor, set the buffer in the
455 * cursor to the left block, and fix up the index.
456 */
457 if (bp != lbp) {
458 xfs_btree_setbuf(cur, level, lbp);
459 cur->bc_ptrs[level] += lrecs;
460 }
461 /*
462 * If we joined with the right neighbor and there's a level above
463 * us, increment the cursor at that level.
464 */
465 else if (level + 1 < cur->bc_nlevels &&
466 (error = xfs_alloc_increment(cur, level + 1, &i)))
467 return error;
468 /*
469 * Fix up the number of records in the surviving block.
470 */
471 lrecs += rrecs;
472 left->bb_numrecs = cpu_to_be16(lrecs);
473 /*
474 * Fix up the right block pointer in the surviving block, and log it.
475 */
476 left->bb_rightsib = right->bb_rightsib;
477 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
478 /*
479 * If there is a right sibling now, make it point to the
480 * remaining block.
481 */
482 if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
483 xfs_inobt_block_t *rrblock;
484 xfs_buf_t *rrbp;
485 59
486 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, 60STATIC void
487 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, 61xfs_inobt_set_root(
488 &rrbp, XFS_INO_BTREE_REF))) 62 struct xfs_btree_cur *cur,
489 return error; 63 union xfs_btree_ptr *nptr,
490 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); 64 int inc) /* level change */
491 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) 65{
492 return error; 66 struct xfs_buf *agbp = cur->bc_private.a.agbp;
493 rrblock->bb_leftsib = cpu_to_be32(lbno); 67 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
494 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
495 }
496 /*
497 * Free the deleting block.
498 */
499 if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500 cur->bc_private.a.agno, rbno), 1)))
501 return error;
502 xfs_trans_binval(cur->bc_tp, rbp);
503 /*
504 * Readjust the ptr at this level if it's not a leaf, since it's
505 * still pointing at the deletion point, which makes the cursor
506 * inconsistent. If this makes the ptr 0, the caller fixes it up.
507 * We can't use decrement because it would change the next level up.
508 */
509 if (level > 0)
510 cur->bc_ptrs[level]--;
511 /*
512 * Return value means the next level up has something to do.
513 */
514 *stat = 2;
515 return 0;
516 68
517error0: 69 agi->agi_root = nptr->s;
518 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 70 be32_add_cpu(&agi->agi_level, inc);
519 return error; 71 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
520} 72}
521 73
522/* 74STATIC int
523 * Insert one record/level. Return information to the caller 75xfs_inobt_alloc_block(
524 * allowing the next level up to proceed if necessary. 76 struct xfs_btree_cur *cur,
525 */ 77 union xfs_btree_ptr *start,
526STATIC int /* error */ 78 union xfs_btree_ptr *new,
527xfs_inobt_insrec( 79 int length,
528 xfs_btree_cur_t *cur, /* btree cursor */ 80 int *stat)
529 int level, /* level to insert record at */
530 xfs_agblock_t *bnop, /* i/o: block number inserted */
531 xfs_inobt_rec_t *recp, /* i/o: record data inserted */
532 xfs_btree_cur_t **curp, /* output: new cursor replacing cur */
533 int *stat) /* success/failure */
534{ 81{
535 xfs_inobt_block_t *block; /* btree block record/key lives in */ 82 xfs_alloc_arg_t args; /* block allocation args */
536 xfs_buf_t *bp; /* buffer for block */ 83 int error; /* error return value */
537 int error; /* error return value */ 84 xfs_agblock_t sbno = be32_to_cpu(start->s);
538 int i; /* loop index */
539 xfs_inobt_key_t key; /* key value being inserted */
540 xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */
541 xfs_agblock_t nbno; /* block number of allocated block */
542 xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
543 xfs_inobt_key_t nkey; /* new key value, from split */
544 xfs_inobt_rec_t nrec; /* new record value, for caller */
545 int numrecs;
546 int optr; /* old ptr value */
547 xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
548 int ptr; /* index in btree block for this rec */
549 xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */
550 85
551 /* 86 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
552 * GCC doesn't understand the (arguably complex) control flow in
553 * this function and complains about uninitialized structure fields
554 * without this.
555 */
556 memset(&nrec, 0, sizeof(nrec));
557 87
558 /* 88 memset(&args, 0, sizeof(args));
559 * If we made it to the root level, allocate a new root block 89 args.tp = cur->bc_tp;
560 * and we're done. 90 args.mp = cur->bc_mp;
561 */ 91 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
562 if (level >= cur->bc_nlevels) { 92 args.minlen = 1;
563 error = xfs_inobt_newroot(cur, &i); 93 args.maxlen = 1;
564 *bnop = NULLAGBLOCK; 94 args.prod = 1;
565 *stat = i; 95 args.type = XFS_ALLOCTYPE_NEAR_BNO;
96
97 error = xfs_alloc_vextent(&args);
98 if (error) {
99 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
566 return error; 100 return error;
567 } 101 }
568 /* 102 if (args.fsbno == NULLFSBLOCK) {
569 * Make a key out of the record data to be inserted, and save it. 103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
570 */
571 key.ir_startino = recp->ir_startino;
572 optr = ptr = cur->bc_ptrs[level];
573 /*
574 * If we're off the left edge, return failure.
575 */
576 if (ptr == 0) {
577 *stat = 0; 104 *stat = 0;
578 return 0; 105 return 0;
579 } 106 }
580 /* 107 ASSERT(args.len == 1);
581 * Get pointers to the btree buffer and block. 108 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
582 */ 109
583 bp = cur->bc_bufs[level]; 110 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
584 block = XFS_BUF_TO_INOBT_BLOCK(bp);
585 numrecs = be16_to_cpu(block->bb_numrecs);
586#ifdef DEBUG
587 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
588 return error;
589 /*
590 * Check that the new entry is being inserted in the right place.
591 */
592 if (ptr <= numrecs) {
593 if (level == 0) {
594 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
595 xfs_btree_check_rec(cur->bc_btnum, recp, rp);
596 } else {
597 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
598 xfs_btree_check_key(cur->bc_btnum, &key, kp);
599 }
600 }
601#endif
602 nbno = NULLAGBLOCK;
603 ncur = NULL;
604 /*
605 * If the block is full, we can't insert the new entry until we
606 * make the block un-full.
607 */
608 if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
609 /*
610 * First, try shifting an entry to the right neighbor.
611 */
612 if ((error = xfs_inobt_rshift(cur, level, &i)))
613 return error;
614 if (i) {
615 /* nothing */
616 }
617 /*
618 * Next, try shifting an entry to the left neighbor.
619 */
620 else {
621 if ((error = xfs_inobt_lshift(cur, level, &i)))
622 return error;
623 if (i) {
624 optr = ptr = cur->bc_ptrs[level];
625 } else {
626 /*
627 * Next, try splitting the current block
628 * in half. If this works we have to
629 * re-set our variables because
630 * we could be in a different block now.
631 */
632 if ((error = xfs_inobt_split(cur, level, &nbno,
633 &nkey, &ncur, &i)))
634 return error;
635 if (i) {
636 bp = cur->bc_bufs[level];
637 block = XFS_BUF_TO_INOBT_BLOCK(bp);
638#ifdef DEBUG
639 if ((error = xfs_btree_check_sblock(cur,
640 block, level, bp)))
641 return error;
642#endif
643 ptr = cur->bc_ptrs[level];
644 nrec.ir_startino = nkey.ir_startino;
645 } else {
646 /*
647 * Otherwise the insert fails.
648 */
649 *stat = 0;
650 return 0;
651 }
652 }
653 }
654 }
655 /*
656 * At this point we know there's room for our new entry in the block
657 * we're pointing at.
658 */
659 numrecs = be16_to_cpu(block->bb_numrecs);
660 if (level > 0) {
661 /*
662 * It's a non-leaf entry. Make a hole for the new data
663 * in the key and ptr regions of the block.
664 */
665 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
666 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
667#ifdef DEBUG
668 for (i = numrecs; i >= ptr; i--) {
669 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
670 return error;
671 }
672#endif
673 memmove(&kp[ptr], &kp[ptr - 1],
674 (numrecs - ptr + 1) * sizeof(*kp));
675 memmove(&pp[ptr], &pp[ptr - 1],
676 (numrecs - ptr + 1) * sizeof(*pp));
677 /*
678 * Now stuff the new data in, bump numrecs and log the new data.
679 */
680#ifdef DEBUG
681 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
682 return error;
683#endif
684 kp[ptr - 1] = key;
685 pp[ptr - 1] = cpu_to_be32(*bnop);
686 numrecs++;
687 block->bb_numrecs = cpu_to_be16(numrecs);
688 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
689 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
690 } else {
691 /*
692 * It's a leaf entry. Make a hole for the new record.
693 */
694 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
695 memmove(&rp[ptr], &rp[ptr - 1],
696 (numrecs - ptr + 1) * sizeof(*rp));
697 /*
698 * Now stuff the new record in, bump numrecs
699 * and log the new data.
700 */
701 rp[ptr - 1] = *recp;
702 numrecs++;
703 block->bb_numrecs = cpu_to_be16(numrecs);
704 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
705 }
706 /*
707 * Log the new number of records in the btree header.
708 */
709 xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
710#ifdef DEBUG
711 /*
712 * Check that the key/record is in the right place, now.
713 */
714 if (ptr < numrecs) {
715 if (level == 0)
716 xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
717 rp + ptr);
718 else
719 xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
720 kp + ptr);
721 }
722#endif
723 /*
724 * If we inserted at the start of a block, update the parents' keys.
725 */
726 if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
727 return error;
728 /*
729 * Return the new block number, if any.
730 * If there is one, give back a record value and a cursor too.
731 */
732 *bnop = nbno;
733 if (nbno != NULLAGBLOCK) {
734 *recp = nrec;
735 *curp = ncur;
736 }
737 *stat = 1; 111 *stat = 1;
738 return 0; 112 return 0;
739} 113}
740 114
741/* 115STATIC int
742 * Log header fields from a btree block. 116xfs_inobt_free_block(
743 */ 117 struct xfs_btree_cur *cur,
744STATIC void 118 struct xfs_buf *bp)
745xfs_inobt_log_block(
746 xfs_trans_t *tp, /* transaction pointer */
747 xfs_buf_t *bp, /* buffer containing btree block */
748 int fields) /* mask of fields: XFS_BB_... */
749{ 119{
750 int first; /* first byte offset logged */ 120 xfs_fsblock_t fsbno;
751 int last; /* last byte offset logged */ 121 int error;
752 static const short offsets[] = { /* table of offsets */
753 offsetof(xfs_inobt_block_t, bb_magic),
754 offsetof(xfs_inobt_block_t, bb_level),
755 offsetof(xfs_inobt_block_t, bb_numrecs),
756 offsetof(xfs_inobt_block_t, bb_leftsib),
757 offsetof(xfs_inobt_block_t, bb_rightsib),
758 sizeof(xfs_inobt_block_t)
759 };
760 122
761 xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); 123 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
762 xfs_trans_log_buf(tp, bp, first, last); 124 error = xfs_free_extent(cur->bc_tp, fsbno, 1);
125 if (error)
126 return error;
127
128 xfs_trans_binval(cur->bc_tp, bp);
129 return error;
763} 130}
764 131
765/* 132STATIC int
766 * Log keys from a btree block (nonleaf). 133xfs_inobt_get_maxrecs(
767 */ 134 struct xfs_btree_cur *cur,
768STATIC void 135 int level)
769xfs_inobt_log_keys(
770 xfs_btree_cur_t *cur, /* btree cursor */
771 xfs_buf_t *bp, /* buffer containing btree block */
772 int kfirst, /* index of first key to log */
773 int klast) /* index of last key to log */
774{ 136{
775 xfs_inobt_block_t *block; /* btree block to log from */ 137 return cur->bc_mp->m_inobt_mxr[level != 0];
776 int first; /* first byte offset logged */
777 xfs_inobt_key_t *kp; /* key pointer in btree block */
778 int last; /* last byte offset logged */
779
780 block = XFS_BUF_TO_INOBT_BLOCK(bp);
781 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
782 first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
783 last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
784 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
785} 138}
786 139
787/*
788 * Log block pointer fields from a btree block (nonleaf).
789 */
790STATIC void 140STATIC void
791xfs_inobt_log_ptrs( 141xfs_inobt_init_key_from_rec(
792 xfs_btree_cur_t *cur, /* btree cursor */ 142 union xfs_btree_key *key,
793 xfs_buf_t *bp, /* buffer containing btree block */ 143 union xfs_btree_rec *rec)
794 int pfirst, /* index of first pointer to log */
795 int plast) /* index of last pointer to log */
796{ 144{
797 xfs_inobt_block_t *block; /* btree block to log from */ 145 key->inobt.ir_startino = rec->inobt.ir_startino;
798 int first; /* first byte offset logged */
799 int last; /* last byte offset logged */
800 xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */
801
802 block = XFS_BUF_TO_INOBT_BLOCK(bp);
803 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
804 first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
805 last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
806 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
807} 146}
808 147
809/*
810 * Log records from a btree block (leaf).
811 */
812STATIC void 148STATIC void
813xfs_inobt_log_recs( 149xfs_inobt_init_rec_from_key(
814 xfs_btree_cur_t *cur, /* btree cursor */ 150 union xfs_btree_key *key,
815 xfs_buf_t *bp, /* buffer containing btree block */ 151 union xfs_btree_rec *rec)
816 int rfirst, /* index of first record to log */
817 int rlast) /* index of last record to log */
818{ 152{
819 xfs_inobt_block_t *block; /* btree block to log from */ 153 rec->inobt.ir_startino = key->inobt.ir_startino;
820 int first; /* first byte offset logged */ 154}
821 int last; /* last byte offset logged */
822 xfs_inobt_rec_t *rp; /* record pointer for btree block */
823 155
824 block = XFS_BUF_TO_INOBT_BLOCK(bp); 156STATIC void
825 rp = XFS_INOBT_REC_ADDR(block, 1, cur); 157xfs_inobt_init_rec_from_cur(
826 first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); 158 struct xfs_btree_cur *cur,
827 last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); 159 union xfs_btree_rec *rec)
828 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 160{
161 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
162 rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
163 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
829} 164}
830 165
831/* 166/*
832 * Lookup the record. The cursor is made to point to it, based on dir. 167 * intial value of ptr for lookup
833 * Return 0 if can't find any such record, 1 for success.
834 */ 168 */
835STATIC int /* error */ 169STATIC void
836xfs_inobt_lookup( 170xfs_inobt_init_ptr_from_cur(
837 xfs_btree_cur_t *cur, /* btree cursor */ 171 struct xfs_btree_cur *cur,
838 xfs_lookup_t dir, /* <=, ==, or >= */ 172 union xfs_btree_ptr *ptr)
839 int *stat) /* success/failure */
840{ 173{
841 xfs_agblock_t agbno; /* a.g. relative btree block number */ 174 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
842 xfs_agnumber_t agno; /* allocation group number */
843 xfs_inobt_block_t *block=NULL; /* current btree block */
844 __int64_t diff; /* difference for the current key */
845 int error; /* error return value */
846 int keyno=0; /* current key number */
847 int level; /* level in the btree */
848 xfs_mount_t *mp; /* file system mount point */
849
850 /*
851 * Get the allocation group header, and the root block number.
852 */
853 mp = cur->bc_mp;
854 {
855 xfs_agi_t *agi; /* a.g. inode header */
856
857 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858 agno = be32_to_cpu(agi->agi_seqno);
859 agbno = be32_to_cpu(agi->agi_root);
860 }
861 /*
862 * Iterate over each level in the btree, starting at the root.
863 * For each level above the leaves, find the key we need, based
864 * on the lookup record, then follow the corresponding block
865 * pointer down to the next level.
866 */
867 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
868 xfs_buf_t *bp; /* buffer pointer for btree block */
869 xfs_daddr_t d; /* disk address of btree block */
870
871 /*
872 * Get the disk address we're looking for.
873 */
874 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
875 /*
876 * If the old buffer at this level is for a different block,
877 * throw it away, otherwise just use it.
878 */
879 bp = cur->bc_bufs[level];
880 if (bp && XFS_BUF_ADDR(bp) != d)
881 bp = NULL;
882 if (!bp) {
883 /*
884 * Need to get a new buffer. Read it, then
885 * set it in the cursor, releasing the old one.
886 */
887 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
888 agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
889 return error;
890 xfs_btree_setbuf(cur, level, bp);
891 /*
892 * Point to the btree block, now that we have the buffer
893 */
894 block = XFS_BUF_TO_INOBT_BLOCK(bp);
895 if ((error = xfs_btree_check_sblock(cur, block, level,
896 bp)))
897 return error;
898 } else
899 block = XFS_BUF_TO_INOBT_BLOCK(bp);
900 /*
901 * If we already had a key match at a higher level, we know
902 * we need to use the first entry in this block.
903 */
904 if (diff == 0)
905 keyno = 1;
906 /*
907 * Otherwise we need to search this block. Do a binary search.
908 */
909 else {
910 int high; /* high entry number */
911 xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
912 xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
913 int low; /* low entry number */
914 175
915 /* 176 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
916 * Get a pointer to keys or records.
917 */
918 if (level > 0)
919 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
920 else
921 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
922 /*
923 * Set low and high entry numbers, 1-based.
924 */
925 low = 1;
926 if (!(high = be16_to_cpu(block->bb_numrecs))) {
927 /*
928 * If the block is empty, the tree must
929 * be an empty leaf.
930 */
931 ASSERT(level == 0 && cur->bc_nlevels == 1);
932 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
933 *stat = 0;
934 return 0;
935 }
936 /*
937 * Binary search the block.
938 */
939 while (low <= high) {
940 xfs_agino_t startino; /* key value */
941
942 /*
943 * keyno is average of low and high.
944 */
945 keyno = (low + high) >> 1;
946 /*
947 * Get startino.
948 */
949 if (level > 0) {
950 xfs_inobt_key_t *kkp;
951
952 kkp = kkbase + keyno - 1;
953 startino = be32_to_cpu(kkp->ir_startino);
954 } else {
955 xfs_inobt_rec_t *krp;
956
957 krp = krbase + keyno - 1;
958 startino = be32_to_cpu(krp->ir_startino);
959 }
960 /*
961 * Compute difference to get next direction.
962 */
963 diff = (__int64_t)
964 startino - cur->bc_rec.i.ir_startino;
965 /*
966 * Less than, move right.
967 */
968 if (diff < 0)
969 low = keyno + 1;
970 /*
971 * Greater than, move left.
972 */
973 else if (diff > 0)
974 high = keyno - 1;
975 /*
976 * Equal, we're done.
977 */
978 else
979 break;
980 }
981 }
982 /*
983 * If there are more levels, set up for the next level
984 * by getting the block number and filling in the cursor.
985 */
986 if (level > 0) {
987 /*
988 * If we moved left, need the previous key number,
989 * unless there isn't one.
990 */
991 if (diff > 0 && --keyno < 1)
992 keyno = 1;
993 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
994#ifdef DEBUG
995 if ((error = xfs_btree_check_sptr(cur, agbno, level)))
996 return error;
997#endif
998 cur->bc_ptrs[level] = keyno;
999 }
1000 }
1001 /*
1002 * Done with the search.
1003 * See if we need to adjust the results.
1004 */
1005 if (dir != XFS_LOOKUP_LE && diff < 0) {
1006 keyno++;
1007 /*
1008 * If ge search and we went off the end of the block, but it's
1009 * not the last block, we're in the wrong block.
1010 */
1011 if (dir == XFS_LOOKUP_GE &&
1012 keyno > be16_to_cpu(block->bb_numrecs) &&
1013 be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1014 int i;
1015 177
1016 cur->bc_ptrs[0] = keyno; 178 ptr->s = agi->agi_root;
1017 if ((error = xfs_inobt_increment(cur, 0, &i)))
1018 return error;
1019 ASSERT(i == 1);
1020 *stat = 1;
1021 return 0;
1022 }
1023 }
1024 else if (dir == XFS_LOOKUP_LE && diff > 0)
1025 keyno--;
1026 cur->bc_ptrs[0] = keyno;
1027 /*
1028 * Return if we succeeded or not.
1029 */
1030 if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1031 *stat = 0;
1032 else
1033 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1034 return 0;
1035} 179}
1036 180
1037/* 181STATIC __int64_t
1038 * Move 1 record left from cur/level if possible. 182xfs_inobt_key_diff(
1039 * Update cur to reflect the new path. 183 struct xfs_btree_cur *cur,
1040 */ 184 union xfs_btree_key *key)
1041STATIC int /* error */
1042xfs_inobt_lshift(
1043 xfs_btree_cur_t *cur, /* btree cursor */
1044 int level, /* level to shift record on */
1045 int *stat) /* success/failure */
1046{ 185{
1047 int error; /* error return value */ 186 return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
1048#ifdef DEBUG 187 cur->bc_rec.i.ir_startino;
1049 int i; /* loop index */
1050#endif
1051 xfs_inobt_key_t key; /* key value for leaf level upward */
1052 xfs_buf_t *lbp; /* buffer for left neighbor block */
1053 xfs_inobt_block_t *left; /* left neighbor btree block */
1054 xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */
1055 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1056 xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */
1057 int nrec; /* new number of left block entries */
1058 xfs_buf_t *rbp; /* buffer for right (current) block */
1059 xfs_inobt_block_t *right; /* right (current) btree block */
1060 xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */
1061 xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */
1062 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1063
1064 /*
1065 * Set up variables for this block as "right".
1066 */
1067 rbp = cur->bc_bufs[level];
1068 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1069#ifdef DEBUG
1070 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1071 return error;
1072#endif
1073 /*
1074 * If we've got no left sibling then we can't shift an entry left.
1075 */
1076 if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1077 *stat = 0;
1078 return 0;
1079 }
1080 /*
1081 * If the cursor entry is the one that would be moved, don't
1082 * do it... it's too complicated.
1083 */
1084 if (cur->bc_ptrs[level] <= 1) {
1085 *stat = 0;
1086 return 0;
1087 }
1088 /*
1089 * Set up the left neighbor as "left".
1090 */
1091 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092 cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093 0, &lbp, XFS_INO_BTREE_REF)))
1094 return error;
1095 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1096 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1097 return error;
1098 /*
1099 * If it's full, it can't take another entry.
1100 */
1101 if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1102 *stat = 0;
1103 return 0;
1104 }
1105 nrec = be16_to_cpu(left->bb_numrecs) + 1;
1106 /*
1107 * If non-leaf, copy a key and a ptr to the left block.
1108 */
1109 if (level > 0) {
1110 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1111 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1112 *lkp = *rkp;
1113 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1114 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1115 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1116#ifdef DEBUG
1117 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1118 return error;
1119#endif
1120 *lpp = *rpp;
1121 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1122 }
1123 /*
1124 * If leaf, copy a record to the left block.
1125 */
1126 else {
1127 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1128 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1129 *lrp = *rrp;
1130 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1131 }
1132 /*
1133 * Bump and log left's numrecs, decrement and log right's numrecs.
1134 */
1135 be16_add_cpu(&left->bb_numrecs, 1);
1136 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1137#ifdef DEBUG
1138 if (level > 0)
1139 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1140 else
1141 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1142#endif
1143 be16_add_cpu(&right->bb_numrecs, -1);
1144 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1145 /*
1146 * Slide the contents of right down one entry.
1147 */
1148 if (level > 0) {
1149#ifdef DEBUG
1150 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1151 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1152 level)))
1153 return error;
1154 }
1155#endif
1156 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1157 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1158 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1159 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1160 } else {
1161 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1162 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1163 key.ir_startino = rrp->ir_startino;
1164 rkp = &key;
1165 }
1166 /*
1167 * Update the parent key values of right.
1168 */
1169 if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1170 return error;
1171 /*
1172 * Slide the cursor value left one.
1173 */
1174 cur->bc_ptrs[level]--;
1175 *stat = 1;
1176 return 0;
1177} 188}
1178 189
1179/* 190STATIC int
1180 * Allocate a new root block, fill it in. 191xfs_inobt_kill_root(
1181 */ 192 struct xfs_btree_cur *cur,
1182STATIC int /* error */ 193 struct xfs_buf *bp,
1183xfs_inobt_newroot( 194 int level,
1184 xfs_btree_cur_t *cur, /* btree cursor */ 195 union xfs_btree_ptr *newroot)
1185 int *stat) /* success/failure */
1186{ 196{
1187 xfs_agi_t *agi; /* a.g. inode header */ 197 int error;
1188 xfs_alloc_arg_t args; /* allocation argument structure */
1189 xfs_inobt_block_t *block; /* one half of the old root block */
1190 xfs_buf_t *bp; /* buffer containing block */
1191 int error; /* error return value */
1192 xfs_inobt_key_t *kp; /* btree key pointer */
1193 xfs_agblock_t lbno; /* left block number */
1194 xfs_buf_t *lbp; /* left buffer pointer */
1195 xfs_inobt_block_t *left; /* left btree block */
1196 xfs_buf_t *nbp; /* new (root) buffer */
1197 xfs_inobt_block_t *new; /* new (root) btree block */
1198 int nptr; /* new value for key index, 1 or 2 */
1199 xfs_inobt_ptr_t *pp; /* btree address pointer */
1200 xfs_agblock_t rbno; /* right block number */
1201 xfs_buf_t *rbp; /* right buffer pointer */
1202 xfs_inobt_block_t *right; /* right btree block */
1203 xfs_inobt_rec_t *rp; /* btree record pointer */
1204 198
1205 ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); 199 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
200 XFS_BTREE_STATS_INC(cur, killroot);
1206 201
1207 /* 202 /*
1208 * Get a block & a buffer. 203 * Update the root pointer, decreasing the level by 1 and then
204 * free the old root.
1209 */ 205 */
1210 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 206 xfs_inobt_set_root(cur, newroot, -1);
1211 args.tp = cur->bc_tp; 207 error = xfs_inobt_free_block(cur, bp);
1212 args.mp = cur->bc_mp; 208 if (error) {
1213 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, 209 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1214 be32_to_cpu(agi->agi_root));
1215 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216 args.isfl = args.userdata = args.minalignslop = 0;
1217 args.minlen = args.maxlen = args.prod = 1;
1218 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1219 if ((error = xfs_alloc_vextent(&args)))
1220 return error; 210 return error;
1221 /*
1222 * None available, we fail.
1223 */
1224 if (args.fsbno == NULLFSBLOCK) {
1225 *stat = 0;
1226 return 0;
1227 }
1228 ASSERT(args.len == 1);
1229 nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1230 new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1231 /*
1232 * Set the root data in the a.g. inode structure.
1233 */
1234 agi->agi_root = cpu_to_be32(args.agbno);
1235 be32_add_cpu(&agi->agi_level, 1);
1236 xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238 /*
1239 * At the previous root level there are now two blocks: the old
1240 * root, and the new block generated when it was split.
1241 * We don't know which one the cursor is pointing at, so we
1242 * set up variables "left" and "right" for each case.
1243 */
1244 bp = cur->bc_bufs[cur->bc_nlevels - 1];
1245 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1246#ifdef DEBUG
1247 if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1248 return error;
1249#endif
1250 if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1251 /*
1252 * Our block is left, pick up the right block.
1253 */
1254 lbp = bp;
1255 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1256 left = block;
1257 rbno = be32_to_cpu(left->bb_rightsib);
1258 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1259 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1260 return error;
1261 bp = rbp;
1262 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1263 if ((error = xfs_btree_check_sblock(cur, right,
1264 cur->bc_nlevels - 1, rbp)))
1265 return error;
1266 nptr = 1;
1267 } else {
1268 /*
1269 * Our block is right, pick up the left block.
1270 */
1271 rbp = bp;
1272 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1273 right = block;
1274 lbno = be32_to_cpu(right->bb_leftsib);
1275 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1276 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1277 return error;
1278 bp = lbp;
1279 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1280 if ((error = xfs_btree_check_sblock(cur, left,
1281 cur->bc_nlevels - 1, lbp)))
1282 return error;
1283 nptr = 2;
1284 }
1285 /*
1286 * Fill in the new block's btree header and log it.
1287 */
1288 new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1289 new->bb_level = cpu_to_be16(cur->bc_nlevels);
1290 new->bb_numrecs = cpu_to_be16(2);
1291 new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1292 new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1293 xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1294 ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1295 /*
1296 * Fill in the key data in the new root.
1297 */
1298 kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1299 if (be16_to_cpu(left->bb_level) > 0) {
1300 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1301 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1302 } else {
1303 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1304 kp[0].ir_startino = rp->ir_startino;
1305 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1306 kp[1].ir_startino = rp->ir_startino;
1307 } 211 }
1308 xfs_inobt_log_keys(cur, nbp, 1, 2);
1309 /*
1310 * Fill in the pointer data in the new root.
1311 */
1312 pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1313 pp[0] = cpu_to_be32(lbno);
1314 pp[1] = cpu_to_be32(rbno);
1315 xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1316 /*
1317 * Fix up the cursor.
1318 */
1319 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1320 cur->bc_ptrs[cur->bc_nlevels] = nptr;
1321 cur->bc_nlevels++;
1322 *stat = 1;
1323 return 0;
1324}
1325 212
1326/* 213 XFS_BTREE_STATS_INC(cur, free);
1327 * Move 1 record right from cur/level if possible.
1328 * Update cur to reflect the new path.
1329 */
1330STATIC int /* error */
1331xfs_inobt_rshift(
1332 xfs_btree_cur_t *cur, /* btree cursor */
1333 int level, /* level to shift record on */
1334 int *stat) /* success/failure */
1335{
1336 int error; /* error return value */
1337 int i; /* loop index */
1338 xfs_inobt_key_t key; /* key value for leaf level upward */
1339 xfs_buf_t *lbp; /* buffer for left (current) block */
1340 xfs_inobt_block_t *left; /* left (current) btree block */
1341 xfs_inobt_key_t *lkp; /* key pointer for left block */
1342 xfs_inobt_ptr_t *lpp; /* address pointer for left block */
1343 xfs_inobt_rec_t *lrp; /* record pointer for left block */
1344 xfs_buf_t *rbp; /* buffer for right neighbor block */
1345 xfs_inobt_block_t *right; /* right neighbor btree block */
1346 xfs_inobt_key_t *rkp; /* key pointer for right block */
1347 xfs_inobt_ptr_t *rpp; /* address pointer for right block */
1348 xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */
1349 xfs_btree_cur_t *tcur; /* temporary cursor */
1350 214
1351 /* 215 cur->bc_bufs[level] = NULL;
1352 * Set up variables for this block as "left". 216 cur->bc_nlevels--;
1353 */ 217
1354 lbp = cur->bc_bufs[level]; 218 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1355 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1356#ifdef DEBUG
1357 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1358 return error;
1359#endif
1360 /*
1361 * If we've got no right sibling then we can't shift an entry right.
1362 */
1363 if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1364 *stat = 0;
1365 return 0;
1366 }
1367 /*
1368 * If the cursor entry is the one that would be moved, don't
1369 * do it... it's too complicated.
1370 */
1371 if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1372 *stat = 0;
1373 return 0;
1374 }
1375 /*
1376 * Set up the right neighbor as "right".
1377 */
1378 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380 0, &rbp, XFS_INO_BTREE_REF)))
1381 return error;
1382 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1383 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1384 return error;
1385 /*
1386 * If it's full, it can't take another entry.
1387 */
1388 if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1389 *stat = 0;
1390 return 0;
1391 }
1392 /*
1393 * Make a hole at the start of the right neighbor block, then
1394 * copy the last left block entry to the hole.
1395 */
1396 if (level > 0) {
1397 lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1398 lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1399 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1400 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1401#ifdef DEBUG
1402 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1403 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1404 return error;
1405 }
1406#endif
1407 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1408 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1409#ifdef DEBUG
1410 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1411 return error;
1412#endif
1413 *rkp = *lkp;
1414 *rpp = *lpp;
1415 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1416 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1417 } else {
1418 lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1419 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1420 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1421 *rrp = *lrp;
1422 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1423 key.ir_startino = rrp->ir_startino;
1424 rkp = &key;
1425 }
1426 /*
1427 * Decrement and log left's numrecs, bump and log right's numrecs.
1428 */
1429 be16_add_cpu(&left->bb_numrecs, -1);
1430 xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1431 be16_add_cpu(&right->bb_numrecs, 1);
1432#ifdef DEBUG
1433 if (level > 0)
1434 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1435 else
1436 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1437#endif
1438 xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1439 /*
1440 * Using a temporary cursor, update the parent key values of the
1441 * block on the right.
1442 */
1443 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1444 return error;
1445 xfs_btree_lastrec(tcur, level);
1446 if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1447 (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1448 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1449 return error;
1450 }
1451 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1452 *stat = 1;
1453 return 0; 219 return 0;
1454} 220}
1455 221
1456/*
1457 * Split cur/level block in half.
1458 * Return new block number and its first record (to be inserted into parent).
1459 */
1460STATIC int /* error */
1461xfs_inobt_split(
1462 xfs_btree_cur_t *cur, /* btree cursor */
1463 int level, /* level to split */
1464 xfs_agblock_t *bnop, /* output: block number allocated */
1465 xfs_inobt_key_t *keyp, /* output: first key of new block */
1466 xfs_btree_cur_t **curp, /* output: new cursor */
1467 int *stat) /* success/failure */
1468{
1469 xfs_alloc_arg_t args; /* allocation argument structure */
1470 int error; /* error return value */
1471 int i; /* loop index/record number */
1472 xfs_agblock_t lbno; /* left (current) block number */
1473 xfs_buf_t *lbp; /* buffer for left block */
1474 xfs_inobt_block_t *left; /* left (current) btree block */
1475 xfs_inobt_key_t *lkp; /* left btree key pointer */
1476 xfs_inobt_ptr_t *lpp; /* left btree address pointer */
1477 xfs_inobt_rec_t *lrp; /* left btree record pointer */
1478 xfs_buf_t *rbp; /* buffer for right block */
1479 xfs_inobt_block_t *right; /* right (new) btree block */
1480 xfs_inobt_key_t *rkp; /* right btree key pointer */
1481 xfs_inobt_ptr_t *rpp; /* right btree address pointer */
1482 xfs_inobt_rec_t *rrp; /* right btree record pointer */
1483
1484 /*
1485 * Set up left block (current one).
1486 */
1487 lbp = cur->bc_bufs[level];
1488 args.tp = cur->bc_tp;
1489 args.mp = cur->bc_mp;
1490 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1491 /*
1492 * Allocate the new block.
1493 * If we can't do it, we're toast. Give up.
1494 */
1495 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496 args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497 args.isfl = args.userdata = args.minalignslop = 0;
1498 args.minlen = args.maxlen = args.prod = 1;
1499 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1500 if ((error = xfs_alloc_vextent(&args)))
1501 return error;
1502 if (args.fsbno == NULLFSBLOCK) {
1503 *stat = 0;
1504 return 0;
1505 }
1506 ASSERT(args.len == 1);
1507 rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1508 /*
1509 * Set up the new block as "right".
1510 */
1511 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1512 /*
1513 * "Left" is the current (according to the cursor) block.
1514 */
1515 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1516#ifdef DEBUG 222#ifdef DEBUG
1517 if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) 223STATIC int
1518 return error; 224xfs_inobt_keys_inorder(
1519#endif 225 struct xfs_btree_cur *cur,
1520 /* 226 union xfs_btree_key *k1,
1521 * Fill in the btree header for the new block. 227 union xfs_btree_key *k2)
1522 */ 228{
1523 right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); 229 return be32_to_cpu(k1->inobt.ir_startino) <
1524 right->bb_level = left->bb_level; 230 be32_to_cpu(k2->inobt.ir_startino);
1525 right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1526 /*
1527 * Make sure that if there's an odd number of entries now, that
1528 * each new block will have the same number of entries.
1529 */
1530 if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1531 cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1532 be16_add_cpu(&right->bb_numrecs, 1);
1533 i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1534 /*
1535 * For non-leaf blocks, copy keys and addresses over to the new block.
1536 */
1537 if (level > 0) {
1538 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1539 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1540 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1541 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1542#ifdef DEBUG
1543 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1544 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1545 return error;
1546 }
1547#endif
1548 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1549 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1550 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1551 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1552 *keyp = *rkp;
1553 }
1554 /*
1555 * For leaf blocks, copy records over to the new block.
1556 */
1557 else {
1558 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1559 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1560 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1561 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1562 keyp->ir_startino = rrp->ir_startino;
1563 }
1564 /*
1565 * Find the left block number by looking in the buffer.
1566 * Adjust numrecs, sibling pointers.
1567 */
1568 be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1569 right->bb_rightsib = left->bb_rightsib;
1570 left->bb_rightsib = cpu_to_be32(args.agbno);
1571 right->bb_leftsib = cpu_to_be32(lbno);
1572 xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1573 xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1574 /*
1575 * If there's a block to the new block's right, make that block
1576 * point back to right instead of to left.
1577 */
1578 if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1579 xfs_inobt_block_t *rrblock; /* rr btree block */
1580 xfs_buf_t *rrbp; /* buffer for rrblock */
1581
1582 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1583 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1584 XFS_INO_BTREE_REF)))
1585 return error;
1586 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1587 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1588 return error;
1589 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1590 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1591 }
1592 /*
1593 * If the cursor is really in the right block, move it there.
1594 * If it's just pointing past the last entry in left, then we'll
1595 * insert there, so don't change anything in that case.
1596 */
1597 if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1598 xfs_btree_setbuf(cur, level, rbp);
1599 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1600 }
1601 /*
1602 * If there are more levels, we'll need another cursor which refers
1603 * the right block, no matter where this cursor was.
1604 */
1605 if (level + 1 < cur->bc_nlevels) {
1606 if ((error = xfs_btree_dup_cursor(cur, curp)))
1607 return error;
1608 (*curp)->bc_ptrs[level + 1]++;
1609 }
1610 *bnop = args.agbno;
1611 *stat = 1;
1612 return 0;
1613} 231}
1614 232
1615/* 233STATIC int
1616 * Update keys at all levels from here to the root along the cursor's path. 234xfs_inobt_recs_inorder(
1617 */ 235 struct xfs_btree_cur *cur,
1618STATIC int /* error */ 236 union xfs_btree_rec *r1,
1619xfs_inobt_updkey( 237 union xfs_btree_rec *r2)
1620 xfs_btree_cur_t *cur, /* btree cursor */
1621 xfs_inobt_key_t *keyp, /* new key value to update to */
1622 int level) /* starting level for update */
1623{ 238{
1624 int ptr; /* index of key in block */ 239 return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <=
1625 240 be32_to_cpu(r2->inobt.ir_startino);
1626 /*
1627 * Go up the tree from this level toward the root.
1628 * At each level, update the key value to the value input.
1629 * Stop when we reach a level where the cursor isn't pointing
1630 * at the first entry in the block.
1631 */
1632 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1633 xfs_buf_t *bp; /* buffer for block */
1634 xfs_inobt_block_t *block; /* btree block */
1635#ifdef DEBUG
1636 int error; /* error return value */
1637#endif
1638 xfs_inobt_key_t *kp; /* ptr to btree block keys */
1639
1640 bp = cur->bc_bufs[level];
1641 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1642#ifdef DEBUG
1643 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1644 return error;
1645#endif
1646 ptr = cur->bc_ptrs[level];
1647 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1648 *kp = *keyp;
1649 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1650 }
1651 return 0;
1652} 241}
242#endif /* DEBUG */
1653 243
1654/* 244#ifdef XFS_BTREE_TRACE
1655 * Externally visible routines. 245ktrace_t *xfs_inobt_trace_buf;
1656 */
1657 246
1658/* 247STATIC void
1659 * Decrement cursor by one record at the level. 248xfs_inobt_trace_enter(
1660 * For nonzero levels the leaf-ward information is untouched. 249 struct xfs_btree_cur *cur,
1661 */ 250 const char *func,
1662int /* error */ 251 char *s,
1663xfs_inobt_decrement( 252 int type,
1664 xfs_btree_cur_t *cur, /* btree cursor */ 253 int line,
1665 int level, /* level in btree, 0 is leaf */ 254 __psunsigned_t a0,
1666 int *stat) /* success/failure */ 255 __psunsigned_t a1,
256 __psunsigned_t a2,
257 __psunsigned_t a3,
258 __psunsigned_t a4,
259 __psunsigned_t a5,
260 __psunsigned_t a6,
261 __psunsigned_t a7,
262 __psunsigned_t a8,
263 __psunsigned_t a9,
264 __psunsigned_t a10)
1667{ 265{
1668 xfs_inobt_block_t *block; /* btree block */ 266 ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
1669 int error; 267 (void *)func, (void *)s, NULL, (void *)cur,
1670 int lev; /* btree level */ 268 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1671 269 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1672 ASSERT(level < cur->bc_nlevels); 270 (void *)a8, (void *)a9, (void *)a10);
1673 /*
1674 * Read-ahead to the left at this level.
1675 */
1676 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1677 /*
1678 * Decrement the ptr at this level. If we're still in the block
1679 * then we're done.
1680 */
1681 if (--cur->bc_ptrs[level] > 0) {
1682 *stat = 1;
1683 return 0;
1684 }
1685 /*
1686 * Get a pointer to the btree block.
1687 */
1688 block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1689#ifdef DEBUG
1690 if ((error = xfs_btree_check_sblock(cur, block, level,
1691 cur->bc_bufs[level])))
1692 return error;
1693#endif
1694 /*
1695 * If we just went off the left edge of the tree, return failure.
1696 */
1697 if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1698 *stat = 0;
1699 return 0;
1700 }
1701 /*
1702 * March up the tree decrementing pointers.
1703 * Stop when we don't go off the left edge of a block.
1704 */
1705 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1706 if (--cur->bc_ptrs[lev] > 0)
1707 break;
1708 /*
1709 * Read-ahead the left block, we're going to read it
1710 * in the next loop.
1711 */
1712 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1713 }
1714 /*
1715 * If we went off the root then we are seriously confused.
1716 */
1717 ASSERT(lev < cur->bc_nlevels);
1718 /*
1719 * Now walk back down the tree, fixing up the cursor's buffer
1720 * pointers and key numbers.
1721 */
1722 for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1723 xfs_agblock_t agbno; /* block number of btree block */
1724 xfs_buf_t *bp; /* buffer containing btree block */
1725
1726 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728 cur->bc_private.a.agno, agbno, 0, &bp,
1729 XFS_INO_BTREE_REF)))
1730 return error;
1731 lev--;
1732 xfs_btree_setbuf(cur, lev, bp);
1733 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1734 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1735 return error;
1736 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1737 }
1738 *stat = 1;
1739 return 0;
1740} 271}
1741 272
1742/* 273STATIC void
1743 * Delete the record pointed to by cur. 274xfs_inobt_trace_cursor(
1744 * The cursor refers to the place where the record was (could be inserted) 275 struct xfs_btree_cur *cur,
1745 * when the operation returns. 276 __uint32_t *s0,
1746 */ 277 __uint64_t *l0,
1747int /* error */ 278 __uint64_t *l1)
1748xfs_inobt_delete(
1749 xfs_btree_cur_t *cur, /* btree cursor */
1750 int *stat) /* success/failure */
1751{ 279{
1752 int error; 280 *s0 = cur->bc_private.a.agno;
1753 int i; /* result code */ 281 *l0 = cur->bc_rec.i.ir_startino;
1754 int level; /* btree level */ 282 *l1 = cur->bc_rec.i.ir_free;
1755
1756 /*
1757 * Go up the tree, starting at leaf level.
1758 * If 2 is returned then a join was done; go to the next level.
1759 * Otherwise we are done.
1760 */
1761 for (level = 0, i = 2; i == 2; level++) {
1762 if ((error = xfs_inobt_delrec(cur, level, &i)))
1763 return error;
1764 }
1765 if (i == 0) {
1766 for (level = 1; level < cur->bc_nlevels; level++) {
1767 if (cur->bc_ptrs[level] == 0) {
1768 if ((error = xfs_inobt_decrement(cur, level, &i)))
1769 return error;
1770 break;
1771 }
1772 }
1773 }
1774 *stat = i;
1775 return 0;
1776} 283}
1777 284
1778 285STATIC void
1779/* 286xfs_inobt_trace_key(
1780 * Get the data from the pointed-to record. 287 struct xfs_btree_cur *cur,
1781 */ 288 union xfs_btree_key *key,
1782int /* error */ 289 __uint64_t *l0,
1783xfs_inobt_get_rec( 290 __uint64_t *l1)
1784 xfs_btree_cur_t *cur, /* btree cursor */
1785 xfs_agino_t *ino, /* output: starting inode of chunk */
1786 __int32_t *fcnt, /* output: number of free inodes */
1787 xfs_inofree_t *free, /* output: free inode mask */
1788 int *stat) /* output: success/failure */
1789{ 291{
1790 xfs_inobt_block_t *block; /* btree block */ 292 *l0 = be32_to_cpu(key->inobt.ir_startino);
1791 xfs_buf_t *bp; /* buffer containing btree block */ 293 *l1 = 0;
1792#ifdef DEBUG
1793 int error; /* error return value */
1794#endif
1795 int ptr; /* record number */
1796 xfs_inobt_rec_t *rec; /* record data */
1797
1798 bp = cur->bc_bufs[0];
1799 ptr = cur->bc_ptrs[0];
1800 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1801#ifdef DEBUG
1802 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1803 return error;
1804#endif
1805 /*
1806 * Off the right end or left end, return failure.
1807 */
1808 if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1809 *stat = 0;
1810 return 0;
1811 }
1812 /*
1813 * Point to the record and extract its data.
1814 */
1815 rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1816 *ino = be32_to_cpu(rec->ir_startino);
1817 *fcnt = be32_to_cpu(rec->ir_freecount);
1818 *free = be64_to_cpu(rec->ir_free);
1819 *stat = 1;
1820 return 0;
1821} 294}
1822 295
1823/* 296STATIC void
1824 * Increment cursor by one record at the level. 297xfs_inobt_trace_record(
1825 * For nonzero levels the leaf-ward information is untouched. 298 struct xfs_btree_cur *cur,
1826 */ 299 union xfs_btree_rec *rec,
1827int /* error */ 300 __uint64_t *l0,
1828xfs_inobt_increment( 301 __uint64_t *l1,
1829 xfs_btree_cur_t *cur, /* btree cursor */ 302 __uint64_t *l2)
1830 int level, /* level in btree, 0 is leaf */
1831 int *stat) /* success/failure */
1832{ 303{
1833 xfs_inobt_block_t *block; /* btree block */ 304 *l0 = be32_to_cpu(rec->inobt.ir_startino);
1834 xfs_buf_t *bp; /* buffer containing btree block */ 305 *l1 = be32_to_cpu(rec->inobt.ir_freecount);
1835 int error; /* error return value */ 306 *l2 = be64_to_cpu(rec->inobt.ir_free);
1836 int lev; /* btree level */ 307}
308#endif /* XFS_BTREE_TRACE */
309
310static const struct xfs_btree_ops xfs_inobt_ops = {
311 .rec_len = sizeof(xfs_inobt_rec_t),
312 .key_len = sizeof(xfs_inobt_key_t),
313
314 .dup_cursor = xfs_inobt_dup_cursor,
315 .set_root = xfs_inobt_set_root,
316 .kill_root = xfs_inobt_kill_root,
317 .alloc_block = xfs_inobt_alloc_block,
318 .free_block = xfs_inobt_free_block,
319 .get_minrecs = xfs_inobt_get_minrecs,
320 .get_maxrecs = xfs_inobt_get_maxrecs,
321 .init_key_from_rec = xfs_inobt_init_key_from_rec,
322 .init_rec_from_key = xfs_inobt_init_rec_from_key,
323 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
324 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
325 .key_diff = xfs_inobt_key_diff,
1837 326
1838 ASSERT(level < cur->bc_nlevels);
1839 /*
1840 * Read-ahead to the right at this level.
1841 */
1842 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1843 /*
1844 * Get a pointer to the btree block.
1845 */
1846 bp = cur->bc_bufs[level];
1847 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1848#ifdef DEBUG
1849 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1850 return error;
1851#endif
1852 /*
1853 * Increment the ptr at this level. If we're still in the block
1854 * then we're done.
1855 */
1856 if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1857 *stat = 1;
1858 return 0;
1859 }
1860 /*
1861 * If we just went off the right edge of the tree, return failure.
1862 */
1863 if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1864 *stat = 0;
1865 return 0;
1866 }
1867 /*
1868 * March up the tree incrementing pointers.
1869 * Stop when we don't go off the right edge of a block.
1870 */
1871 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1872 bp = cur->bc_bufs[lev];
1873 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1874#ifdef DEBUG 327#ifdef DEBUG
1875 if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) 328 .keys_inorder = xfs_inobt_keys_inorder,
1876 return error; 329 .recs_inorder = xfs_inobt_recs_inorder,
1877#endif 330#endif
1878 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1879 break;
1880 /*
1881 * Read-ahead the right block, we're going to read it
1882 * in the next loop.
1883 */
1884 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1885 }
1886 /*
1887 * If we went off the root then we are seriously confused.
1888 */
1889 ASSERT(lev < cur->bc_nlevels);
1890 /*
1891 * Now walk back down the tree, fixing up the cursor's buffer
1892 * pointers and key numbers.
1893 */
1894 for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1895 lev > level; ) {
1896 xfs_agblock_t agbno; /* block number of btree block */
1897 331
1898 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); 332#ifdef XFS_BTREE_TRACE
1899 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, 333 .trace_enter = xfs_inobt_trace_enter,
1900 cur->bc_private.a.agno, agbno, 0, &bp, 334 .trace_cursor = xfs_inobt_trace_cursor,
1901 XFS_INO_BTREE_REF))) 335 .trace_key = xfs_inobt_trace_key,
1902 return error; 336 .trace_record = xfs_inobt_trace_record,
1903 lev--; 337#endif
1904 xfs_btree_setbuf(cur, lev, bp); 338};
1905 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1906 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1907 return error;
1908 cur->bc_ptrs[lev] = 1;
1909 }
1910 *stat = 1;
1911 return 0;
1912}
1913 339
1914/* 340/*
1915 * Insert the current record at the point referenced by cur. 341 * Allocate a new inode btree cursor.
1916 * The cursor may be inconsistent on return if splits have been done.
1917 */ 342 */
1918int /* error */ 343struct xfs_btree_cur * /* new inode btree cursor */
1919xfs_inobt_insert( 344xfs_inobt_init_cursor(
1920 xfs_btree_cur_t *cur, /* btree cursor */ 345 struct xfs_mount *mp, /* file system mount point */
1921 int *stat) /* success/failure */ 346 struct xfs_trans *tp, /* transaction pointer */
347 struct xfs_buf *agbp, /* buffer for agi structure */
348 xfs_agnumber_t agno) /* allocation group number */
1922{ 349{
1923 int error; /* error return value */ 350 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
1924 int i; /* result value, 0 for failure */ 351 struct xfs_btree_cur *cur;
1925 int level; /* current level number in btree */
1926 xfs_agblock_t nbno; /* new block number (split result) */
1927 xfs_btree_cur_t *ncur; /* new cursor (split result) */
1928 xfs_inobt_rec_t nrec; /* record being inserted this level */
1929 xfs_btree_cur_t *pcur; /* previous level's cursor */
1930 352
1931 level = 0; 353 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
1932 nbno = NULLAGBLOCK;
1933 nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1934 nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1935 nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1936 ncur = NULL;
1937 pcur = cur;
1938 /*
1939 * Loop going up the tree, starting at the leaf level.
1940 * Stop when we don't get a split block, that must mean that
1941 * the insert is finished with this level.
1942 */
1943 do {
1944 /*
1945 * Insert nrec/nbno into this level of the tree.
1946 * Note if we fail, nbno will be null.
1947 */
1948 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1949 &i))) {
1950 if (pcur != cur)
1951 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1952 return error;
1953 }
1954 /*
1955 * See if the cursor we just used is trash.
1956 * Can't trash the caller's cursor, but otherwise we should
1957 * if ncur is a new cursor or we're about to be done.
1958 */
1959 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1960 cur->bc_nlevels = pcur->bc_nlevels;
1961 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1962 }
1963 /*
1964 * If we got a new cursor, switch to it.
1965 */
1966 if (ncur) {
1967 pcur = ncur;
1968 ncur = NULL;
1969 }
1970 } while (nbno != NULLAGBLOCK);
1971 *stat = i;
1972 return 0;
1973}
1974 354
1975/* 355 cur->bc_tp = tp;
1976 * Lookup the record equal to ino in the btree given by cur. 356 cur->bc_mp = mp;
1977 */ 357 cur->bc_nlevels = be32_to_cpu(agi->agi_level);
1978int /* error */ 358 cur->bc_btnum = XFS_BTNUM_INO;
1979xfs_inobt_lookup_eq( 359 cur->bc_blocklog = mp->m_sb.sb_blocklog;
1980 xfs_btree_cur_t *cur, /* btree cursor */
1981 xfs_agino_t ino, /* starting inode of chunk */
1982 __int32_t fcnt, /* free inode count */
1983 xfs_inofree_t free, /* free inode mask */
1984 int *stat) /* success/failure */
1985{
1986 cur->bc_rec.i.ir_startino = ino;
1987 cur->bc_rec.i.ir_freecount = fcnt;
1988 cur->bc_rec.i.ir_free = free;
1989 return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1990}
1991 360
1992/* 361 cur->bc_ops = &xfs_inobt_ops;
1993 * Lookup the first record greater than or equal to ino
1994 * in the btree given by cur.
1995 */
1996int /* error */
1997xfs_inobt_lookup_ge(
1998 xfs_btree_cur_t *cur, /* btree cursor */
1999 xfs_agino_t ino, /* starting inode of chunk */
2000 __int32_t fcnt, /* free inode count */
2001 xfs_inofree_t free, /* free inode mask */
2002 int *stat) /* success/failure */
2003{
2004 cur->bc_rec.i.ir_startino = ino;
2005 cur->bc_rec.i.ir_freecount = fcnt;
2006 cur->bc_rec.i.ir_free = free;
2007 return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2008}
2009 362
2010/* 363 cur->bc_private.a.agbp = agbp;
2011 * Lookup the first record less than or equal to ino 364 cur->bc_private.a.agno = agno;
2012 * in the btree given by cur. 365
2013 */ 366 return cur;
2014int /* error */
2015xfs_inobt_lookup_le(
2016 xfs_btree_cur_t *cur, /* btree cursor */
2017 xfs_agino_t ino, /* starting inode of chunk */
2018 __int32_t fcnt, /* free inode count */
2019 xfs_inofree_t free, /* free inode mask */
2020 int *stat) /* success/failure */
2021{
2022 cur->bc_rec.i.ir_startino = ino;
2023 cur->bc_rec.i.ir_freecount = fcnt;
2024 cur->bc_rec.i.ir_free = free;
2025 return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2026} 367}
2027 368
2028/* 369/*
2029 * Update the record referred to by cur, to the value given 370 * Calculate number of records in an inobt btree block.
2030 * by [ino, fcnt, free].
2031 * This either works (return 0) or gets an EFSCORRUPTED error.
2032 */ 371 */
2033int /* error */ 372int
2034xfs_inobt_update( 373xfs_inobt_maxrecs(
2035 xfs_btree_cur_t *cur, /* btree cursor */ 374 struct xfs_mount *mp,
2036 xfs_agino_t ino, /* starting inode of chunk */ 375 int blocklen,
2037 __int32_t fcnt, /* free inode count */ 376 int leaf)
2038 xfs_inofree_t free) /* free inode mask */
2039{ 377{
2040 xfs_inobt_block_t *block; /* btree block to update */ 378 blocklen -= XFS_INOBT_BLOCK_LEN(mp);
2041 xfs_buf_t *bp; /* buffer containing btree block */
2042 int error; /* error return value */
2043 int ptr; /* current record number (updating) */
2044 xfs_inobt_rec_t *rp; /* pointer to updated record */
2045 379
2046 /* 380 if (leaf)
2047 * Pick up the current block. 381 return blocklen / sizeof(xfs_inobt_rec_t);
2048 */ 382 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
2049 bp = cur->bc_bufs[0];
2050 block = XFS_BUF_TO_INOBT_BLOCK(bp);
2051#ifdef DEBUG
2052 if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2053 return error;
2054#endif
2055 /*
2056 * Get the address of the rec to be updated.
2057 */
2058 ptr = cur->bc_ptrs[0];
2059 rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2060 /*
2061 * Fill in the new contents and log them.
2062 */
2063 rp->ir_startino = cpu_to_be32(ino);
2064 rp->ir_freecount = cpu_to_be32(fcnt);
2065 rp->ir_free = cpu_to_be64(free);
2066 xfs_inobt_log_recs(cur, bp, ptr, ptr);
2067 /*
2068 * Updating first record in leaf. Pass new key value up to our parent.
2069 */
2070 if (ptr == 1) {
2071 xfs_inobt_key_t key; /* key containing [ino] */
2072
2073 key.ir_startino = cpu_to_be32(ino);
2074 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2075 return error;
2076 }
2077 return 0;
2078} 383}
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 8efc4a5b8b92..37e5dd01a577 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -24,7 +24,6 @@
24 24
25struct xfs_buf; 25struct xfs_buf;
26struct xfs_btree_cur; 26struct xfs_btree_cur;
27struct xfs_btree_sblock;
28struct xfs_mount; 27struct xfs_mount;
29 28
30/* 29/*
@@ -70,11 +69,6 @@ typedef struct xfs_inobt_key {
70/* btree pointer type */ 69/* btree pointer type */
71typedef __be32 xfs_inobt_ptr_t; 70typedef __be32 xfs_inobt_ptr_t;
72 71
73/* btree block header type */
74typedef struct xfs_btree_sblock xfs_inobt_block_t;
75
76#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp))
77
78/* 72/*
79 * Bit manipulations for ir_free. 73 * Bit manipulations for ir_free.
80 */ 74 */
@@ -85,14 +79,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
85#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) 79#define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i))
86 80
87/* 81/*
88 * Real block structures have a size equal to the disk block size.
89 */
90#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
91#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
92#define XFS_INOBT_IS_LAST_REC(cur) \
93 ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs))
94
95/*
96 * Maximum number of inode btree levels. 82 * Maximum number of inode btree levels.
97 */ 83 */
98#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) 84#define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels)
@@ -104,75 +90,38 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
104#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) 90#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
105 91
106/* 92/*
107 * Record, key, and pointer address macros for btree blocks. 93 * Btree block header size depends on a superblock flag.
108 */ 94 *
109#define XFS_INOBT_REC_ADDR(bb,i,cur) \ 95 * (not quite yet, but soon)
110 (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
111
112#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
113 (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
114
115#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
116 (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
117 i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
118
119/*
120 * Decrement cursor by one record at the level.
121 * For nonzero levels the leaf-ward information is untouched.
122 */
123extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat);
124
125/*
126 * Delete the record pointed to by cur.
127 * The cursor refers to the place where the record was (could be inserted)
128 * when the operation returns.
129 */
130extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat);
131
132/*
133 * Get the data from the pointed-to record.
134 */
135extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino,
136 __int32_t *fcnt, xfs_inofree_t *free, int *stat);
137
138/*
139 * Increment cursor by one record at the level.
140 * For nonzero levels the leaf-ward information is untouched.
141 */
142extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat);
143
144/*
145 * Insert the current record at the point referenced by cur.
146 * The cursor may be inconsistent on return if splits have been done.
147 */
148extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat);
149
150/*
151 * Lookup the record equal to ino in the btree given by cur.
152 */
153extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino,
154 __int32_t fcnt, xfs_inofree_t free, int *stat);
155
156/*
157 * Lookup the first record greater than or equal to ino
158 * in the btree given by cur.
159 */
160extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino,
161 __int32_t fcnt, xfs_inofree_t free, int *stat);
162
163/*
164 * Lookup the first record less than or equal to ino
165 * in the btree given by cur.
166 */ 96 */
167extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, 97#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN
168 __int32_t fcnt, xfs_inofree_t free, int *stat);
169 98
170/* 99/*
171 * Update the record referred to by cur, to the value given 100 * Record, key, and pointer address macros for btree blocks.
172 * by [ino, fcnt, free]. 101 *
173 * This either works (return 0) or gets an EFSCORRUPTED error. 102 * (note that some of these may appear unused, but they are used in userspace)
174 */ 103 */
175extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, 104#define XFS_INOBT_REC_ADDR(mp, block, index) \
176 __int32_t fcnt, xfs_inofree_t free); 105 ((xfs_inobt_rec_t *) \
106 ((char *)(block) + \
107 XFS_INOBT_BLOCK_LEN(mp) + \
108 (((index) - 1) * sizeof(xfs_inobt_rec_t))))
109
110#define XFS_INOBT_KEY_ADDR(mp, block, index) \
111 ((xfs_inobt_key_t *) \
112 ((char *)(block) + \
113 XFS_INOBT_BLOCK_LEN(mp) + \
114 ((index) - 1) * sizeof(xfs_inobt_key_t)))
115
116#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \
117 ((xfs_inobt_ptr_t *) \
118 ((char *)(block) + \
119 XFS_INOBT_BLOCK_LEN(mp) + \
120 (maxrecs) * sizeof(xfs_inobt_key_t) + \
121 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))
122
123extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
124 struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t);
125extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
177 126
178#endif /* __XFS_IALLOC_BTREE_H__ */ 127#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c2..e2fb6210d4c5 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,281 +38,283 @@
38#include "xfs_ialloc.h" 38#include "xfs_ialloc.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_trans_priv.h"
42#include "xfs_inode_item.h"
43#include "xfs_bmap.h"
44#include "xfs_btree_trace.h"
45#include "xfs_dir2_trace.h"
46
41 47
42/* 48/*
43 * Look up an inode by number in the given file system. 49 * Allocate and initialise an xfs_inode.
44 * The inode is looked up in the cache held in each AG.
45 * If the inode is found in the cache, attach it to the provided
46 * vnode.
47 *
48 * If it is not in core, read it in from the file system's device,
49 * add it to the cache and attach the provided vnode.
50 *
51 * The inode is locked according to the value of the lock_flags parameter.
52 * This flag parameter indicates how and if the inode's IO lock and inode lock
53 * should be taken.
54 *
55 * mp -- the mount point structure for the current file system. It points
56 * to the inode hash table.
57 * tp -- a pointer to the current transaction if there is one. This is
58 * simply passed through to the xfs_iread() call.
59 * ino -- the number of the inode desired. This is the unique identifier
60 * within the file system for the inode being requested.
61 * lock_flags -- flags indicating how to lock the inode. See the comment
62 * for xfs_ilock() for a list of valid values.
63 * bno -- the block number starting the buffer containing the inode,
64 * if known (as by bulkstat), else 0.
65 */ 50 */
66STATIC int 51STATIC struct xfs_inode *
67xfs_iget_core( 52xfs_inode_alloc(
68 struct inode *inode, 53 struct xfs_mount *mp,
69 xfs_mount_t *mp, 54 xfs_ino_t ino)
70 xfs_trans_t *tp,
71 xfs_ino_t ino,
72 uint flags,
73 uint lock_flags,
74 xfs_inode_t **ipp,
75 xfs_daddr_t bno)
76{ 55{
77 struct inode *old_inode; 56 struct xfs_inode *ip;
78 xfs_inode_t *ip;
79 xfs_inode_t *iq;
80 int error;
81 unsigned long first_index, mask;
82 xfs_perag_t *pag;
83 xfs_agino_t agino;
84 57
85 /* the radix tree exists only in inode capable AGs */ 58 /*
86 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) 59 * if this didn't occur in transactions, we could use
87 return EINVAL; 60 * KM_MAYFAIL and return NULL here on ENOMEM. Set the
61 * code up to do this anyway.
62 */
63 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
64 if (!ip)
65 return NULL;
88 66
89 /* get the perag structure and ensure that it's inode capable */ 67 ASSERT(atomic_read(&ip->i_iocount) == 0);
90 pag = xfs_get_perag(mp, ino); 68 ASSERT(atomic_read(&ip->i_pincount) == 0);
91 if (!pag->pagi_inodeok) 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
92 return EINVAL; 70 ASSERT(completion_done(&ip->i_flush));
93 ASSERT(pag->pag_ici_init);
94 agino = XFS_INO_TO_AGINO(mp, ino);
95 71
96again: 72 /*
97 read_lock(&pag->pag_ici_lock); 73 * initialise the VFS inode here to get failures
98 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */
82 ip->i_ino = ino;
83 ip->i_mount = mp;
84 memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
85 ip->i_afp = NULL;
86 memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
87 ip->i_flags = 0;
88 ip->i_update_core = 0;
89 ip->i_update_size = 0;
90 ip->i_delayed_blks = 0;
91 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
92 ip->i_size = 0;
93 ip->i_new_size = 0;
94
95 /*
96 * Initialize inode's trace buffers.
97 */
98#ifdef XFS_INODE_TRACE
99 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
100#endif
101#ifdef XFS_BMAP_TRACE
102 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
103#endif
104#ifdef XFS_BTREE_TRACE
105 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
106#endif
107#ifdef XFS_RW_TRACE
108 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
109#endif
110#ifdef XFS_ILOCK_TRACE
111 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
112#endif
113#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif
116
117 return ip;
118}
119
120/*
121 * Check the validity of the inode we just found it the cache
122 */
123static int
124xfs_iget_cache_hit(
125 struct xfs_perag *pag,
126 struct xfs_inode *ip,
127 int flags,
128 int lock_flags) __releases(pag->pag_ici_lock)
129{
130 struct xfs_mount *mp = ip->i_mount;
131 int error = EAGAIN;
132
133 /*
134 * If INEW is set this inode is being set up
135 * If IRECLAIM is set this inode is being torn down
136 * Pause and try again.
137 */
138 if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
139 XFS_STATS_INC(xs_ig_frecycle);
140 goto out_error;
141 }
142
143 /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
144 if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
99 145
100 if (ip != NULL) {
101 /* 146 /*
102 * If INEW is set this inode is being set up 147 * If lookup is racing with unlink, then we should return an
103 * we need to pause and try again. 148 * error immediately so we don't remove it from the reclaim
149 * list and potentially leak the inode.
104 */ 150 */
105 if (xfs_iflags_test(ip, XFS_INEW)) { 151 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
106 read_unlock(&pag->pag_ici_lock); 152 error = ENOENT;
107 delay(1); 153 goto out_error;
108 XFS_STATS_INC(xs_ig_frecycle);
109
110 goto again;
111 } 154 }
112 155
113 old_inode = ip->i_vnode; 156 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
114 if (old_inode == NULL) {
115 /*
116 * If IRECLAIM is set this inode is
117 * on its way out of the system,
118 * we need to pause and try again.
119 */
120 if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
121 read_unlock(&pag->pag_ici_lock);
122 delay(1);
123 XFS_STATS_INC(xs_ig_frecycle);
124
125 goto again;
126 }
127 ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
128
129 /*
130 * If lookup is racing with unlink, then we
131 * should return an error immediately so we
132 * don't remove it from the reclaim list and
133 * potentially leak the inode.
134 */
135 if ((ip->i_d.di_mode == 0) &&
136 !(flags & XFS_IGET_CREATE)) {
137 read_unlock(&pag->pag_ici_lock);
138 xfs_put_perag(mp, pag);
139 return ENOENT;
140 }
141
142 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
143
144 XFS_STATS_INC(xs_ig_found);
145 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
146 read_unlock(&pag->pag_ici_lock);
147
148 XFS_MOUNT_ILOCK(mp);
149 list_del_init(&ip->i_reclaim);
150 XFS_MOUNT_IUNLOCK(mp);
151
152 goto finish_inode;
153
154 } else if (inode != old_inode) {
155 /* The inode is being torn down, pause and
156 * try again.
157 */
158 if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
159 read_unlock(&pag->pag_ici_lock);
160 delay(1);
161 XFS_STATS_INC(xs_ig_frecycle);
162
163 goto again;
164 }
165/* Chances are the other vnode (the one in the inode) is being torn
166* down right now, and we landed on top of it. Question is, what do
167* we do? Unhook the old inode and hook up the new one?
168*/
169 cmn_err(CE_PANIC,
170 "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
171 old_inode, inode);
172 }
173 157
174 /* 158 /*
175 * Inode cache hit 159 * We need to re-initialise the VFS inode as it has been
160 * 'freed' by the VFS. Do this here so we can deal with
161 * errors cleanly, then tag it so it can be set up correctly
162 * later.
176 */ 163 */
177 read_unlock(&pag->pag_ici_lock); 164 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
178 XFS_STATS_INC(xs_ig_found); 165 error = ENOMEM;
179 166 goto out_error;
180finish_inode:
181 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
182 xfs_put_perag(mp, pag);
183 return ENOENT;
184 } 167 }
185 168
186 if (lock_flags != 0) 169 /*
187 xfs_ilock(ip, lock_flags); 170 * We must set the XFS_INEW flag before clearing the
171 * XFS_IRECLAIMABLE flag so that if a racing lookup does
172 * not find the XFS_IRECLAIMABLE above but has the igrab()
173 * below succeed we can safely check XFS_INEW to detect
174 * that this inode is still being initialised.
175 */
176 xfs_iflags_set(ip, XFS_INEW);
177 xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
178
179 /* clear the radix tree reclaim flag as well. */
180 __xfs_inode_clear_reclaim_tag(mp, pag, ip);
181 } else if (!igrab(VFS_I(ip))) {
182 /* If the VFS inode is being torn down, pause and try again. */
183 XFS_STATS_INC(xs_ig_frecycle);
184 goto out_error;
185 } else if (xfs_iflags_test(ip, XFS_INEW)) {
186 /*
187 * We are racing with another cache hit that is
188 * currently recycling this inode out of the XFS_IRECLAIMABLE
189 * state. Wait for the initialisation to complete before
190 * continuing.
191 */
192 wait_on_inode(VFS_I(ip));
193 }
188 194
189 xfs_iflags_clear(ip, XFS_ISTALE); 195 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
190 xfs_itrace_exit_tag(ip, "xfs_iget.found"); 196 error = ENOENT;
191 goto return_ip; 197 iput(VFS_I(ip));
198 goto out_error;
192 } 199 }
193 200
194 /* 201 /* We've got a live one. */
195 * Inode cache miss
196 */
197 read_unlock(&pag->pag_ici_lock); 202 read_unlock(&pag->pag_ici_lock);
198 XFS_STATS_INC(xs_ig_missed);
199 203
200 /* 204 if (lock_flags != 0)
201 * Read the disk inode attributes into a new inode structure and get 205 xfs_ilock(ip, lock_flags);
202 * a new vnode for it. This should also initialize i_ino and i_mount.
203 */
204 error = xfs_iread(mp, tp, ino, &ip, bno,
205 (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
206 if (error) {
207 xfs_put_perag(mp, pag);
208 return error;
209 }
210 206
211 xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); 207 xfs_iflags_clear(ip, XFS_ISTALE);
208 xfs_itrace_exit_tag(ip, "xfs_iget.found");
209 XFS_STATS_INC(xs_ig_found);
210 return 0;
211
212out_error:
213 read_unlock(&pag->pag_ici_lock);
214 return error;
215}
212 216
213 217
214 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 218static int
215 "xfsino", ip->i_ino); 219xfs_iget_cache_miss(
216 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 220 struct xfs_mount *mp,
217 init_waitqueue_head(&ip->i_ipin_wait); 221 struct xfs_perag *pag,
218 atomic_set(&ip->i_pincount, 0); 222 xfs_trans_t *tp,
223 xfs_ino_t ino,
224 struct xfs_inode **ipp,
225 xfs_daddr_t bno,
226 int flags,
227 int lock_flags) __releases(pag->pag_ici_lock)
228{
229 struct xfs_inode *ip;
230 int error;
231 unsigned long first_index, mask;
232 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
219 233
220 /* 234 ip = xfs_inode_alloc(mp, ino);
221 * Because we want to use a counting completion, complete 235 if (!ip)
222 * the flush completion once to allow a single access to 236 return ENOMEM;
223 * the flush completion without blocking.
224 */
225 init_completion(&ip->i_flush);
226 complete(&ip->i_flush);
227 237
228 if (lock_flags) 238 error = xfs_iread(mp, tp, ip, bno, flags);
229 xfs_ilock(ip, lock_flags); 239 if (error)
240 goto out_destroy;
241
242 xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
230 243
231 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 244 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
232 xfs_idestroy(ip); 245 error = ENOENT;
233 xfs_put_perag(mp, pag); 246 goto out_destroy;
234 return ENOENT;
235 } 247 }
236 248
249 if (lock_flags)
250 xfs_ilock(ip, lock_flags);
251
237 /* 252 /*
238 * Preload the radix tree so we can insert safely under the 253 * Preload the radix tree so we can insert safely under the
239 * write spinlock. 254 * write spinlock. Note that we cannot sleep inside the preload
255 * region.
240 */ 256 */
241 if (radix_tree_preload(GFP_KERNEL)) { 257 if (radix_tree_preload(GFP_KERNEL)) {
242 xfs_idestroy(ip); 258 error = EAGAIN;
243 delay(1); 259 goto out_unlock;
244 goto again;
245 } 260 }
261
246 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); 262 mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
247 first_index = agino & mask; 263 first_index = agino & mask;
248 write_lock(&pag->pag_ici_lock); 264 write_lock(&pag->pag_ici_lock);
249 /* 265
250 * insert the new inode 266 /* insert the new inode */
251 */
252 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 267 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
253 if (unlikely(error)) { 268 if (unlikely(error)) {
254 BUG_ON(error != -EEXIST); 269 WARN_ON(error != -EEXIST);
255 write_unlock(&pag->pag_ici_lock);
256 radix_tree_preload_end();
257 xfs_idestroy(ip);
258 XFS_STATS_INC(xs_ig_dup); 270 XFS_STATS_INC(xs_ig_dup);
259 goto again; 271 error = EAGAIN;
272 goto out_preload_end;
260 } 273 }
261 274
262 /* 275 /* These values _must_ be set before releasing the radix tree lock! */
263 * These values _must_ be set before releasing the radix tree lock!
264 */
265 ip->i_udquot = ip->i_gdquot = NULL; 276 ip->i_udquot = ip->i_gdquot = NULL;
266 xfs_iflags_set(ip, XFS_INEW); 277 xfs_iflags_set(ip, XFS_INEW);
267 278
268 write_unlock(&pag->pag_ici_lock); 279 write_unlock(&pag->pag_ici_lock);
269 radix_tree_preload_end(); 280 radix_tree_preload_end();
270
271 /*
272 * Link ip to its mount and thread it on the mount's inode list.
273 */
274 XFS_MOUNT_ILOCK(mp);
275 if ((iq = mp->m_inodes)) {
276 ASSERT(iq->i_mprev->i_mnext == iq);
277 ip->i_mprev = iq->i_mprev;
278 iq->i_mprev->i_mnext = ip;
279 iq->i_mprev = ip;
280 ip->i_mnext = iq;
281 } else {
282 ip->i_mnext = ip;
283 ip->i_mprev = ip;
284 }
285 mp->m_inodes = ip;
286
287 XFS_MOUNT_IUNLOCK(mp);
288 xfs_put_perag(mp, pag);
289
290 return_ip:
291 ASSERT(ip->i_df.if_ext_max ==
292 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
293
294 xfs_iflags_set(ip, XFS_IMODIFIED);
295 *ipp = ip; 281 *ipp = ip;
296
297 /*
298 * Set up the Linux with the Linux inode.
299 */
300 ip->i_vnode = inode;
301 inode->i_private = ip;
302
303 /*
304 * If we have a real type for an on-disk inode, we can set ops(&unlock)
305 * now. If it's a new inode being created, xfs_ialloc will handle it.
306 */
307 if (ip->i_d.di_mode != 0)
308 xfs_setup_inode(ip);
309 return 0; 282 return 0;
310}
311 283
284out_preload_end:
285 write_unlock(&pag->pag_ici_lock);
286 radix_tree_preload_end();
287out_unlock:
288 if (lock_flags)
289 xfs_iunlock(ip, lock_flags);
290out_destroy:
291 xfs_destroy_inode(ip);
292 return error;
293}
312 294
313/* 295/*
314 * The 'normal' internal xfs_iget, if needed it will 296 * Look up an inode by number in the given file system.
315 * 'allocate', or 'get', the vnode. 297 * The inode is looked up in the cache held in each AG.
298 * If the inode is found in the cache, initialise the vfs inode
299 * if necessary.
300 *
301 * If it is not in core, read it in from the file system's device,
302 * add it to the cache and initialise the vfs inode.
303 *
304 * The inode is locked according to the value of the lock_flags parameter.
305 * This flag parameter indicates how and if the inode's IO lock and inode lock
306 * should be taken.
307 *
308 * mp -- the mount point structure for the current file system. It points
309 * to the inode hash table.
310 * tp -- a pointer to the current transaction if there is one. This is
311 * simply passed through to the xfs_iread() call.
312 * ino -- the number of the inode desired. This is the unique identifier
313 * within the file system for the inode being requested.
314 * lock_flags -- flags indicating how to lock the inode. See the comment
315 * for xfs_ilock() for a list of valid values.
316 * bno -- the block number starting the buffer containing the inode,
317 * if known (as by bulkstat), else 0.
316 */ 318 */
317int 319int
318xfs_iget( 320xfs_iget(
@@ -324,61 +326,64 @@ xfs_iget(
324 xfs_inode_t **ipp, 326 xfs_inode_t **ipp,
325 xfs_daddr_t bno) 327 xfs_daddr_t bno)
326{ 328{
327 struct inode *inode;
328 xfs_inode_t *ip; 329 xfs_inode_t *ip;
329 int error; 330 int error;
331 xfs_perag_t *pag;
332 xfs_agino_t agino;
330 333
331 XFS_STATS_INC(xs_ig_attempts); 334 /* the radix tree exists only in inode capable AGs */
335 if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
336 return EINVAL;
332 337
333retry: 338 /* get the perag structure and ensure that it's inode capable */
334 inode = iget_locked(mp->m_super, ino); 339 pag = xfs_get_perag(mp, ino);
335 if (!inode) 340 if (!pag->pagi_inodeok)
336 /* If we got no inode we are out of memory */ 341 return EINVAL;
337 return ENOMEM; 342 ASSERT(pag->pag_ici_init);
343 agino = XFS_INO_TO_AGINO(mp, ino);
338 344
339 if (inode->i_state & I_NEW) { 345again:
340 XFS_STATS_INC(vn_active); 346 error = 0;
341 XFS_STATS_INC(vn_alloc); 347 read_lock(&pag->pag_ici_lock);
342 348 ip = radix_tree_lookup(&pag->pag_ici_root, agino);
343 error = xfs_iget_core(inode, mp, tp, ino, flags, 349
344 lock_flags, ipp, bno); 350 if (ip) {
345 if (error) { 351 error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
346 make_bad_inode(inode); 352 if (error)
347 if (inode->i_state & I_NEW) 353 goto out_error_or_again;
348 unlock_new_inode(inode); 354 } else {
349 iput(inode); 355 read_unlock(&pag->pag_ici_lock);
350 } 356 XFS_STATS_INC(xs_ig_missed);
351 return error; 357
358 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
359 flags, lock_flags);
360 if (error)
361 goto out_error_or_again;
352 } 362 }
363 xfs_put_perag(mp, pag);
353 364
365 *ipp = ip;
366
367 ASSERT(ip->i_df.if_ext_max ==
368 XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
354 /* 369 /*
355 * If the inode is not fully constructed due to 370 * If we have a real type for an on-disk inode, we can set ops(&unlock)
356 * filehandle mismatches wait for the inode to go 371 * now. If it's a new inode being created, xfs_ialloc will handle it.
357 * away and try again.
358 *
359 * iget_locked will call __wait_on_freeing_inode
360 * to wait for the inode to go away.
361 */ 372 */
362 if (is_bad_inode(inode)) { 373 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
363 iput(inode); 374 xfs_setup_inode(ip);
364 delay(1); 375 return 0;
365 goto retry;
366 }
367 376
368 ip = XFS_I(inode); 377out_error_or_again:
369 if (!ip) { 378 if (error == EAGAIN) {
370 iput(inode);
371 delay(1); 379 delay(1);
372 goto retry; 380 goto again;
373 } 381 }
374 382 xfs_put_perag(mp, pag);
375 if (lock_flags != 0) 383 return error;
376 xfs_ilock(ip, lock_flags);
377 XFS_STATS_INC(xs_ig_found);
378 *ipp = ip;
379 return 0;
380} 384}
381 385
386
382/* 387/*
383 * Look for the inode corresponding to the given ino in the hash table. 388 * Look for the inode corresponding to the given ino in the hash table.
384 * If it is there and its i_transp pointer matches tp, return it. 389 * If it is there and its i_transp pointer matches tp, return it.
@@ -444,99 +449,109 @@ xfs_iput_new(
444 IRELE(ip); 449 IRELE(ip);
445} 450}
446 451
447
448/* 452/*
449 * This routine embodies the part of the reclaim code that pulls 453 * This is called free all the memory associated with an inode.
450 * the inode from the inode hash table and the mount structure's 454 * It must free the inode itself and any buffers allocated for
451 * inode list. 455 * if_extents/if_data and if_broot. It must also free the lock
452 * This should only be called from xfs_reclaim(). 456 * associated with the inode.
457 *
458 * Note: because we don't initialise everything on reallocation out
459 * of the zone, we must ensure we nullify everything correctly before
460 * freeing the structure.
453 */ 461 */
454void 462void
455xfs_ireclaim(xfs_inode_t *ip) 463xfs_ireclaim(
464 struct xfs_inode *ip)
456{ 465{
457 /* 466 struct xfs_mount *mp = ip->i_mount;
458 * Remove from old hash list and mount list. 467 struct xfs_perag *pag;
459 */
460 XFS_STATS_INC(xs_ig_reclaims);
461 468
462 xfs_iextract(ip); 469 XFS_STATS_INC(xs_ig_reclaims);
463
464 /*
465 * Here we do a spurious inode lock in order to coordinate with
466 * xfs_sync(). This is because xfs_sync() references the inodes
467 * in the mount list without taking references on the corresponding
468 * vnodes. We make that OK here by ensuring that we wait until
469 * the inode is unlocked in xfs_sync() before we go ahead and
470 * free it. We get both the regular lock and the io lock because
471 * the xfs_sync() code may need to drop the regular one but will
472 * still hold the io lock.
473 */
474 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
475
476 /*
477 * Release dquots (and their references) if any. An inode may escape
478 * xfs_inactive and get here via vn_alloc->vn_reclaim path.
479 */
480 XFS_QM_DQDETACH(ip->i_mount, ip);
481
482 /*
483 * Pull our behavior descriptor from the vnode chain.
484 */
485 if (ip->i_vnode) {
486 ip->i_vnode->i_private = NULL;
487 ip->i_vnode = NULL;
488 }
489 470
490 /* 471 /*
491 * Free all memory associated with the inode. 472 * Remove the inode from the per-AG radix tree. It doesn't matter
473 * if it was never added to it because radix_tree_delete can deal
474 * with that case just fine.
492 */ 475 */
493 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 476 pag = xfs_get_perag(mp, ip->i_ino);
494 xfs_idestroy(ip);
495}
496
497/*
498 * This routine removes an about-to-be-destroyed inode from
499 * all of the lists in which it is located with the exception
500 * of the behavior chain.
501 */
502void
503xfs_iextract(
504 xfs_inode_t *ip)
505{
506 xfs_mount_t *mp = ip->i_mount;
507 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
508 xfs_inode_t *iq;
509
510 write_lock(&pag->pag_ici_lock); 477 write_lock(&pag->pag_ici_lock);
511 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); 478 radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
512 write_unlock(&pag->pag_ici_lock); 479 write_unlock(&pag->pag_ici_lock);
513 xfs_put_perag(mp, pag); 480 xfs_put_perag(mp, pag);
514 481
515 /* 482 /*
516 * Remove from mount's inode list. 483 * Here we do an (almost) spurious inode lock in order to coordinate
484 * with inode cache radix tree lookups. This is because the lookup
485 * can reference the inodes in the cache without taking references.
486 *
487 * We make that OK here by ensuring that we wait until the inode is
488 * unlocked after the lookup before we go ahead and free it. We get
489 * both the ilock and the iolock because the code may need to drop the
490 * ilock one but will still hold the iolock.
517 */ 491 */
518 XFS_MOUNT_ILOCK(mp); 492 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
519 ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
520 iq = ip->i_mnext;
521 iq->i_mprev = ip->i_mprev;
522 ip->i_mprev->i_mnext = iq;
523
524 /* 493 /*
525 * Fix up the head pointer if it points to the inode being deleted. 494 * Release dquots (and their references) if any.
526 */ 495 */
527 if (mp->m_inodes == ip) { 496 XFS_QM_DQDETACH(ip->i_mount, ip);
528 if (ip == iq) { 497 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
529 mp->m_inodes = NULL; 498
530 } else { 499 switch (ip->i_d.di_mode & S_IFMT) {
531 mp->m_inodes = iq; 500 case S_IFREG:
532 } 501 case S_IFDIR:
502 case S_IFLNK:
503 xfs_idestroy_fork(ip, XFS_DATA_FORK);
504 break;
533 } 505 }
534 506
535 /* Deal with the deleted inodes list */ 507 if (ip->i_afp)
536 list_del_init(&ip->i_reclaim); 508 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
537 509
538 mp->m_ireclaims++; 510#ifdef XFS_INODE_TRACE
539 XFS_MOUNT_IUNLOCK(mp); 511 ktrace_free(ip->i_trace);
512#endif
513#ifdef XFS_BMAP_TRACE
514 ktrace_free(ip->i_xtrace);
515#endif
516#ifdef XFS_BTREE_TRACE
517 ktrace_free(ip->i_btrace);
518#endif
519#ifdef XFS_RW_TRACE
520 ktrace_free(ip->i_rwtrace);
521#endif
522#ifdef XFS_ILOCK_TRACE
523 ktrace_free(ip->i_lock_trace);
524#endif
525#ifdef XFS_DIR2_TRACE
526 ktrace_free(ip->i_dir_trace);
527#endif
528 if (ip->i_itemp) {
529 /*
530 * Only if we are shutting down the fs will we see an
531 * inode still in the AIL. If it is there, we should remove
532 * it to prevent a use-after-free from occurring.
533 */
534 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
535 struct xfs_ail *ailp = lip->li_ailp;
536
537 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
538 XFS_FORCED_SHUTDOWN(ip->i_mount));
539 if (lip->li_flags & XFS_LI_IN_AIL) {
540 spin_lock(&ailp->xa_lock);
541 if (lip->li_flags & XFS_LI_IN_AIL)
542 xfs_trans_ail_delete(ailp, lip);
543 else
544 spin_unlock(&ailp->xa_lock);
545 }
546 xfs_inode_item_destroy(ip);
547 ip->i_itemp = NULL;
548 }
549 /* asserts to verify all state is correct here */
550 ASSERT(atomic_read(&ip->i_iocount) == 0);
551 ASSERT(atomic_read(&ip->i_pincount) == 0);
552 ASSERT(!spin_is_locked(&ip->i_flags_lock));
553 ASSERT(completion_done(&ip->i_flush));
554 kmem_zone_free(xfs_inode_zone, ip);
540} 555}
541 556
542/* 557/*
@@ -737,7 +752,7 @@ xfs_iunlock(
737 * it is in the AIL and anyone is waiting on it. Don't do 752 * it is in the AIL and anyone is waiting on it. Don't do
738 * this if the caller has asked us not to. 753 * this if the caller has asked us not to.
739 */ 754 */
740 xfs_trans_unlocked_item(ip->i_mount, 755 xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
741 (xfs_log_item_t*)(ip->i_itemp)); 756 (xfs_log_item_t*)(ip->i_itemp));
742 } 757 }
743 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 758 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
@@ -790,3 +805,51 @@ xfs_isilocked(
790} 805}
791#endif 806#endif
792 807
808#ifdef XFS_INODE_TRACE
809
810#define KTRACE_ENTER(ip, vk, s, line, ra) \
811 ktrace_enter((ip)->i_trace, \
812/* 0 */ (void *)(__psint_t)(vk), \
813/* 1 */ (void *)(s), \
814/* 2 */ (void *)(__psint_t) line, \
815/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
816/* 4 */ (void *)(ra), \
817/* 5 */ NULL, \
818/* 6 */ (void *)(__psint_t)current_cpu(), \
819/* 7 */ (void *)(__psint_t)current_pid(), \
820/* 8 */ (void *)__return_address, \
821/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
822
823/*
824 * Vnode tracing code.
825 */
826void
827_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
828{
829 KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
830}
831
832void
833_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
834{
835 KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
836}
837
838void
839xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
840{
841 KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
842}
843
844void
845_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
846{
847 KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
848}
849
850void
851xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
852{
853 KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
854}
855#endif /* XFS_INODE_TRACE */
diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h
deleted file mode 100644
index d36450003983..000000000000
--- a/fs/xfs/xfs_imap.h
+++ /dev/null
@@ -1,40 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_IMAP_H__
19#define __XFS_IMAP_H__
20
21/*
22 * This is the structure passed to xfs_imap() to map
23 * an inode number to its on disk location.
24 */
25typedef struct xfs_imap {
26 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
27 uint im_len; /* length in BBs of inode chunk */
28 xfs_agblock_t im_agblkno; /* logical block of inode chunk in ag */
29 ushort im_ioffset; /* inode offset in block in "inodes" */
30 ushort im_boffset; /* inode offset in block in bytes */
31} xfs_imap_t;
32
33#ifdef __KERNEL__
34struct xfs_mount;
35struct xfs_trans;
36int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
37 xfs_imap_t *, uint);
38#endif
39
40#endif /* __XFS_IMAP_H__ */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a391b955df01..5a5e035e5d38 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -23,7 +23,6 @@
23#include "xfs_bit.h" 23#include "xfs_bit.h"
24#include "xfs_log.h" 24#include "xfs_log.h"
25#include "xfs_inum.h" 25#include "xfs_inum.h"
26#include "xfs_imap.h"
27#include "xfs_trans.h" 26#include "xfs_trans.h"
28#include "xfs_trans_priv.h" 27#include "xfs_trans_priv.h"
29#include "xfs_sb.h" 28#include "xfs_sb.h"
@@ -41,6 +40,7 @@
41#include "xfs_buf_item.h" 40#include "xfs_buf_item.h"
42#include "xfs_inode_item.h" 41#include "xfs_inode_item.h"
43#include "xfs_btree.h" 42#include "xfs_btree.h"
43#include "xfs_btree_trace.h"
44#include "xfs_alloc.h" 44#include "xfs_alloc.h"
45#include "xfs_ialloc.h" 45#include "xfs_ialloc.h"
46#include "xfs_bmap.h" 46#include "xfs_bmap.h"
@@ -133,10 +133,10 @@ STATIC int
133xfs_imap_to_bp( 133xfs_imap_to_bp(
134 xfs_mount_t *mp, 134 xfs_mount_t *mp,
135 xfs_trans_t *tp, 135 xfs_trans_t *tp,
136 xfs_imap_t *imap, 136 struct xfs_imap *imap,
137 xfs_buf_t **bpp, 137 xfs_buf_t **bpp,
138 uint buf_flags, 138 uint buf_flags,
139 uint imap_flags) 139 uint iget_flags)
140{ 140{
141 int error; 141 int error;
142 int i; 142 int i;
@@ -173,12 +173,12 @@ xfs_imap_to_bp(
173 173
174 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 174 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
175 (i << mp->m_sb.sb_inodelog)); 175 (i << mp->m_sb.sb_inodelog));
176 di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && 176 di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
177 XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); 177 XFS_DINODE_GOOD_VERSION(dip->di_version);
178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 178 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
179 XFS_ERRTAG_ITOBP_INOTOBP, 179 XFS_ERRTAG_ITOBP_INOTOBP,
180 XFS_RANDOM_ITOBP_INOTOBP))) { 180 XFS_RANDOM_ITOBP_INOTOBP))) {
181 if (imap_flags & XFS_IMAP_BULKSTAT) { 181 if (iget_flags & XFS_IGET_BULKSTAT) {
182 xfs_trans_brelse(tp, bp); 182 xfs_trans_brelse(tp, bp);
183 return XFS_ERROR(EINVAL); 183 return XFS_ERROR(EINVAL);
184 } 184 }
@@ -190,7 +190,7 @@ xfs_imap_to_bp(
190 "daddr %lld #%d (magic=%x)", 190 "daddr %lld #%d (magic=%x)",
191 XFS_BUFTARG_NAME(mp->m_ddev_targp), 191 XFS_BUFTARG_NAME(mp->m_ddev_targp),
192 (unsigned long long)imap->im_blkno, i, 192 (unsigned long long)imap->im_blkno, i,
193 be16_to_cpu(dip->di_core.di_magic)); 193 be16_to_cpu(dip->di_magic));
194#endif 194#endif
195 xfs_trans_brelse(tp, bp); 195 xfs_trans_brelse(tp, bp);
196 return XFS_ERROR(EFSCORRUPTED); 196 return XFS_ERROR(EFSCORRUPTED);
@@ -221,25 +221,26 @@ xfs_imap_to_bp(
221 * Use xfs_imap() to determine the size and location of the 221 * Use xfs_imap() to determine the size and location of the
222 * buffer to read from disk. 222 * buffer to read from disk.
223 */ 223 */
224STATIC int 224int
225xfs_inotobp( 225xfs_inotobp(
226 xfs_mount_t *mp, 226 xfs_mount_t *mp,
227 xfs_trans_t *tp, 227 xfs_trans_t *tp,
228 xfs_ino_t ino, 228 xfs_ino_t ino,
229 xfs_dinode_t **dipp, 229 xfs_dinode_t **dipp,
230 xfs_buf_t **bpp, 230 xfs_buf_t **bpp,
231 int *offset) 231 int *offset,
232 uint imap_flags)
232{ 233{
233 xfs_imap_t imap; 234 struct xfs_imap imap;
234 xfs_buf_t *bp; 235 xfs_buf_t *bp;
235 int error; 236 int error;
236 237
237 imap.im_blkno = 0; 238 imap.im_blkno = 0;
238 error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); 239 error = xfs_imap(mp, tp, ino, &imap, imap_flags);
239 if (error) 240 if (error)
240 return error; 241 return error;
241 242
242 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); 243 error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags);
243 if (error) 244 if (error)
244 return error; 245 return error;
245 246
@@ -260,15 +261,11 @@ xfs_inotobp(
260 * If a non-zero error is returned, then the contents of bpp and 261 * If a non-zero error is returned, then the contents of bpp and
261 * dipp are undefined. 262 * dipp are undefined.
262 * 263 *
263 * If the inode is new and has not yet been initialized, use xfs_imap() 264 * The inode is expected to already been mapped to its buffer and read
264 * to determine the size and location of the buffer to read from disk. 265 * in once, thus we can use the mapping information stored in the inode
265 * If the inode has already been mapped to its buffer and read in once, 266 * rather than calling xfs_imap(). This allows us to avoid the overhead
266 * then use the mapping information stored in the inode rather than 267 * of looking at the inode btree for small block file systems
267 * calling xfs_imap(). This allows us to avoid the overhead of looking 268 * (see xfs_imap()).
268 * at the inode btree for small block file systems (see xfs_dilocate()).
269 * We can tell whether the inode has been mapped in before by comparing
270 * its disk block address to 0. Only uninitialized inodes will have
271 * 0 for the disk block address.
272 */ 269 */
273int 270int
274xfs_itobp( 271xfs_itobp(
@@ -277,40 +274,14 @@ xfs_itobp(
277 xfs_inode_t *ip, 274 xfs_inode_t *ip,
278 xfs_dinode_t **dipp, 275 xfs_dinode_t **dipp,
279 xfs_buf_t **bpp, 276 xfs_buf_t **bpp,
280 xfs_daddr_t bno,
281 uint imap_flags,
282 uint buf_flags) 277 uint buf_flags)
283{ 278{
284 xfs_imap_t imap;
285 xfs_buf_t *bp; 279 xfs_buf_t *bp;
286 int error; 280 int error;
287 281
288 if (ip->i_blkno == (xfs_daddr_t)0) { 282 ASSERT(ip->i_imap.im_blkno != 0);
289 imap.im_blkno = bno;
290 error = xfs_imap(mp, tp, ip->i_ino, &imap,
291 XFS_IMAP_LOOKUP | imap_flags);
292 if (error)
293 return error;
294 283
295 /* 284 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
296 * Fill in the fields in the inode that will be used to
297 * map the inode to its buffer from now on.
298 */
299 ip->i_blkno = imap.im_blkno;
300 ip->i_len = imap.im_len;
301 ip->i_boffset = imap.im_boffset;
302 } else {
303 /*
304 * We've already mapped the inode once, so just use the
305 * mapping that we saved the first time.
306 */
307 imap.im_blkno = ip->i_blkno;
308 imap.im_len = ip->i_len;
309 imap.im_boffset = ip->i_boffset;
310 }
311 ASSERT(bno == 0 || bno == imap.im_blkno);
312
313 error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
314 if (error) 285 if (error)
315 return error; 286 return error;
316 287
@@ -321,7 +292,7 @@ xfs_itobp(
321 return EAGAIN; 292 return EAGAIN;
322 } 293 }
323 294
324 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 295 *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
325 *bpp = bp; 296 *bpp = bp;
326 return 0; 297 return 0;
327} 298}
@@ -348,26 +319,26 @@ xfs_iformat(
348 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 319 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
349 error = 0; 320 error = 0;
350 321
351 if (unlikely(be32_to_cpu(dip->di_core.di_nextents) + 322 if (unlikely(be32_to_cpu(dip->di_nextents) +
352 be16_to_cpu(dip->di_core.di_anextents) > 323 be16_to_cpu(dip->di_anextents) >
353 be64_to_cpu(dip->di_core.di_nblocks))) { 324 be64_to_cpu(dip->di_nblocks))) {
354 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 325 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
355 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 326 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
356 (unsigned long long)ip->i_ino, 327 (unsigned long long)ip->i_ino,
357 (int)(be32_to_cpu(dip->di_core.di_nextents) + 328 (int)(be32_to_cpu(dip->di_nextents) +
358 be16_to_cpu(dip->di_core.di_anextents)), 329 be16_to_cpu(dip->di_anextents)),
359 (unsigned long long) 330 (unsigned long long)
360 be64_to_cpu(dip->di_core.di_nblocks)); 331 be64_to_cpu(dip->di_nblocks));
361 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 332 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
362 ip->i_mount, dip); 333 ip->i_mount, dip);
363 return XFS_ERROR(EFSCORRUPTED); 334 return XFS_ERROR(EFSCORRUPTED);
364 } 335 }
365 336
366 if (unlikely(dip->di_core.di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 337 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
367 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 338 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
368 "corrupt dinode %Lu, forkoff = 0x%x.", 339 "corrupt dinode %Lu, forkoff = 0x%x.",
369 (unsigned long long)ip->i_ino, 340 (unsigned long long)ip->i_ino,
370 dip->di_core.di_forkoff); 341 dip->di_forkoff);
371 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 342 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
372 ip->i_mount, dip); 343 ip->i_mount, dip);
373 return XFS_ERROR(EFSCORRUPTED); 344 return XFS_ERROR(EFSCORRUPTED);
@@ -378,25 +349,25 @@ xfs_iformat(
378 case S_IFCHR: 349 case S_IFCHR:
379 case S_IFBLK: 350 case S_IFBLK:
380 case S_IFSOCK: 351 case S_IFSOCK:
381 if (unlikely(dip->di_core.di_format != XFS_DINODE_FMT_DEV)) { 352 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
382 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 353 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
383 ip->i_mount, dip); 354 ip->i_mount, dip);
384 return XFS_ERROR(EFSCORRUPTED); 355 return XFS_ERROR(EFSCORRUPTED);
385 } 356 }
386 ip->i_d.di_size = 0; 357 ip->i_d.di_size = 0;
387 ip->i_size = 0; 358 ip->i_size = 0;
388 ip->i_df.if_u2.if_rdev = be32_to_cpu(dip->di_u.di_dev); 359 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
389 break; 360 break;
390 361
391 case S_IFREG: 362 case S_IFREG:
392 case S_IFLNK: 363 case S_IFLNK:
393 case S_IFDIR: 364 case S_IFDIR:
394 switch (dip->di_core.di_format) { 365 switch (dip->di_format) {
395 case XFS_DINODE_FMT_LOCAL: 366 case XFS_DINODE_FMT_LOCAL:
396 /* 367 /*
397 * no local regular files yet 368 * no local regular files yet
398 */ 369 */
399 if (unlikely((be16_to_cpu(dip->di_core.di_mode) & S_IFMT) == S_IFREG)) { 370 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
400 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 371 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
401 "corrupt inode %Lu " 372 "corrupt inode %Lu "
402 "(local format for regular file).", 373 "(local format for regular file).",
@@ -407,7 +378,7 @@ xfs_iformat(
407 return XFS_ERROR(EFSCORRUPTED); 378 return XFS_ERROR(EFSCORRUPTED);
408 } 379 }
409 380
410 di_size = be64_to_cpu(dip->di_core.di_size); 381 di_size = be64_to_cpu(dip->di_size);
411 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 382 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
412 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 383 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
413 "corrupt inode %Lu " 384 "corrupt inode %Lu "
@@ -449,7 +420,7 @@ xfs_iformat(
449 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 420 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
450 ip->i_afp->if_ext_max = 421 ip->i_afp->if_ext_max =
451 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 422 XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
452 switch (dip->di_core.di_aformat) { 423 switch (dip->di_aformat) {
453 case XFS_DINODE_FMT_LOCAL: 424 case XFS_DINODE_FMT_LOCAL:
454 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); 425 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
455 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
@@ -621,7 +592,7 @@ xfs_iformat_btree(
621 ifp = XFS_IFORK_PTR(ip, whichfork); 592 ifp = XFS_IFORK_PTR(ip, whichfork);
622 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); 593 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
623 size = XFS_BMAP_BROOT_SPACE(dfp); 594 size = XFS_BMAP_BROOT_SPACE(dfp);
624 nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); 595 nrecs = be16_to_cpu(dfp->bb_numrecs);
625 596
626 /* 597 /*
627 * blow out if -- fork has less extents than can fit in 598 * blow out if -- fork has less extents than can fit in
@@ -649,8 +620,9 @@ xfs_iformat_btree(
649 * Copy and convert from the on-disk structure 620 * Copy and convert from the on-disk structure
650 * to the in-memory structure. 621 * to the in-memory structure.
651 */ 622 */
652 xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), 623 xfs_bmdr_to_bmbt(ip->i_mount, dfp,
653 ifp->if_broot, size); 624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
625 ifp->if_broot, size);
654 ifp->if_flags &= ~XFS_IFEXTENTS; 626 ifp->if_flags &= ~XFS_IFEXTENTS;
655 ifp->if_flags |= XFS_IFBROOT; 627 ifp->if_flags |= XFS_IFBROOT;
656 628
@@ -660,7 +632,7 @@ xfs_iformat_btree(
660void 632void
661xfs_dinode_from_disk( 633xfs_dinode_from_disk(
662 xfs_icdinode_t *to, 634 xfs_icdinode_t *to,
663 xfs_dinode_core_t *from) 635 xfs_dinode_t *from)
664{ 636{
665 to->di_magic = be16_to_cpu(from->di_magic); 637 to->di_magic = be16_to_cpu(from->di_magic);
666 to->di_mode = be16_to_cpu(from->di_mode); 638 to->di_mode = be16_to_cpu(from->di_mode);
@@ -694,7 +666,7 @@ xfs_dinode_from_disk(
694 666
695void 667void
696xfs_dinode_to_disk( 668xfs_dinode_to_disk(
697 xfs_dinode_core_t *to, 669 xfs_dinode_t *to,
698 xfs_icdinode_t *from) 670 xfs_icdinode_t *from)
699{ 671{
700 to->di_magic = cpu_to_be16(from->di_magic); 672 to->di_magic = cpu_to_be16(from->di_magic);
@@ -781,93 +753,57 @@ uint
781xfs_dic2xflags( 753xfs_dic2xflags(
782 xfs_dinode_t *dip) 754 xfs_dinode_t *dip)
783{ 755{
784 xfs_dinode_core_t *dic = &dip->di_core; 756 return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
785
786 return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) |
787 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); 757 (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
788} 758}
789 759
790/* 760/*
791 * Given a mount structure and an inode number, return a pointer 761 * Read the disk inode attributes into the in-core inode structure.
792 * to a newly allocated in-core inode corresponding to the given
793 * inode number.
794 *
795 * Initialize the inode's attributes and extent pointers if it
796 * already has them (it will not if the inode has no links).
797 */ 762 */
798int 763int
799xfs_iread( 764xfs_iread(
800 xfs_mount_t *mp, 765 xfs_mount_t *mp,
801 xfs_trans_t *tp, 766 xfs_trans_t *tp,
802 xfs_ino_t ino, 767 xfs_inode_t *ip,
803 xfs_inode_t **ipp,
804 xfs_daddr_t bno, 768 xfs_daddr_t bno,
805 uint imap_flags) 769 uint iget_flags)
806{ 770{
807 xfs_buf_t *bp; 771 xfs_buf_t *bp;
808 xfs_dinode_t *dip; 772 xfs_dinode_t *dip;
809 xfs_inode_t *ip;
810 int error; 773 int error;
811 774
812 ASSERT(xfs_inode_zone != NULL);
813
814 ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP);
815 ip->i_ino = ino;
816 ip->i_mount = mp;
817 atomic_set(&ip->i_iocount, 0);
818 spin_lock_init(&ip->i_flags_lock);
819
820 /* 775 /*
821 * Get pointer's to the on-disk inode and the buffer containing it. 776 * Fill in the location information in the in-core inode.
822 * If the inode number refers to a block outside the file system
823 * then xfs_itobp() will return NULL. In this case we should
824 * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
825 * know that this is a new incore inode.
826 */ 777 */
827 error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); 778 ip->i_imap.im_blkno = bno;
828 if (error) { 779 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
829 kmem_zone_free(xfs_inode_zone, ip); 780 if (error)
830 return error; 781 return error;
831 } 782 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
832 783
833 /* 784 /*
834 * Initialize inode's trace buffers. 785 * Get pointers to the on-disk inode and the buffer containing it.
835 * Do this before xfs_iformat in case it adds entries.
836 */ 786 */
837#ifdef XFS_INODE_TRACE 787 error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp,
838 ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); 788 XFS_BUF_LOCK, iget_flags);
839#endif 789 if (error)
840#ifdef XFS_BMAP_TRACE 790 return error;
841 ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); 791 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
842#endif
843#ifdef XFS_BMBT_TRACE
844 ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
845#endif
846#ifdef XFS_RW_TRACE
847 ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
848#endif
849#ifdef XFS_ILOCK_TRACE
850 ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
851#endif
852#ifdef XFS_DIR2_TRACE
853 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
854#endif
855 792
856 /* 793 /*
857 * If we got something that isn't an inode it means someone 794 * If we got something that isn't an inode it means someone
858 * (nfs or dmi) has a stale handle. 795 * (nfs or dmi) has a stale handle.
859 */ 796 */
860 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { 797 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
861 kmem_zone_free(xfs_inode_zone, ip);
862 xfs_trans_brelse(tp, bp);
863#ifdef DEBUG 798#ifdef DEBUG
864 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 799 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
865 "dip->di_core.di_magic (0x%x) != " 800 "dip->di_magic (0x%x) != "
866 "XFS_DINODE_MAGIC (0x%x)", 801 "XFS_DINODE_MAGIC (0x%x)",
867 be16_to_cpu(dip->di_core.di_magic), 802 be16_to_cpu(dip->di_magic),
868 XFS_DINODE_MAGIC); 803 XFS_DINODE_MAGIC);
869#endif /* DEBUG */ 804#endif /* DEBUG */
870 return XFS_ERROR(EINVAL); 805 error = XFS_ERROR(EINVAL);
806 goto out_brelse;
871 } 807 }
872 808
873 /* 809 /*
@@ -877,24 +813,22 @@ xfs_iread(
877 * specific information. 813 * specific information.
878 * Otherwise, just get the truly permanent information. 814 * Otherwise, just get the truly permanent information.
879 */ 815 */
880 if (dip->di_core.di_mode) { 816 if (dip->di_mode) {
881 xfs_dinode_from_disk(&ip->i_d, &dip->di_core); 817 xfs_dinode_from_disk(&ip->i_d, dip);
882 error = xfs_iformat(ip, dip); 818 error = xfs_iformat(ip, dip);
883 if (error) { 819 if (error) {
884 kmem_zone_free(xfs_inode_zone, ip);
885 xfs_trans_brelse(tp, bp);
886#ifdef DEBUG 820#ifdef DEBUG
887 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 821 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: "
888 "xfs_iformat() returned error %d", 822 "xfs_iformat() returned error %d",
889 error); 823 error);
890#endif /* DEBUG */ 824#endif /* DEBUG */
891 return error; 825 goto out_brelse;
892 } 826 }
893 } else { 827 } else {
894 ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); 828 ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
895 ip->i_d.di_version = dip->di_core.di_version; 829 ip->i_d.di_version = dip->di_version;
896 ip->i_d.di_gen = be32_to_cpu(dip->di_core.di_gen); 830 ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
897 ip->i_d.di_flushiter = be16_to_cpu(dip->di_core.di_flushiter); 831 ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
898 /* 832 /*
899 * Make sure to pull in the mode here as well in 833 * Make sure to pull in the mode here as well in
900 * case the inode is released without being used. 834 * case the inode is released without being used.
@@ -911,8 +845,6 @@ xfs_iread(
911 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); 845 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
912 } 846 }
913 847
914 INIT_LIST_HEAD(&ip->i_reclaim);
915
916 /* 848 /*
917 * The inode format changed when we moved the link count and 849 * The inode format changed when we moved the link count and
918 * made it 32 bits long. If this is an old format inode, 850 * made it 32 bits long. If this is an old format inode,
@@ -924,7 +856,7 @@ xfs_iread(
924 * the new format. We don't change the version number so that we 856 * the new format. We don't change the version number so that we
925 * can distinguish this from a real new format inode. 857 * can distinguish this from a real new format inode.
926 */ 858 */
927 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { 859 if (ip->i_d.di_version == 1) {
928 ip->i_d.di_nlink = ip->i_d.di_onlink; 860 ip->i_d.di_nlink = ip->i_d.di_onlink;
929 ip->i_d.di_onlink = 0; 861 ip->i_d.di_onlink = 0;
930 ip->i_d.di_projid = 0; 862 ip->i_d.di_projid = 0;
@@ -938,7 +870,7 @@ xfs_iread(
938 * around for a while. This helps to keep recently accessed 870 * around for a while. This helps to keep recently accessed
939 * meta-data in-core longer. 871 * meta-data in-core longer.
940 */ 872 */
941 XFS_BUF_SET_REF(bp, XFS_INO_REF); 873 XFS_BUF_SET_REF(bp, XFS_INO_REF);
942 874
943 /* 875 /*
944 * Use xfs_trans_brelse() to release the buffer containing the 876 * Use xfs_trans_brelse() to release the buffer containing the
@@ -953,9 +885,9 @@ xfs_iread(
953 * to worry about the inode being changed just because we released 885 * to worry about the inode being changed just because we released
954 * the buffer. 886 * the buffer.
955 */ 887 */
888 out_brelse:
956 xfs_trans_brelse(tp, bp); 889 xfs_trans_brelse(tp, bp);
957 *ipp = ip; 890 return error;
958 return 0;
959} 891}
960 892
961/* 893/*
@@ -1049,6 +981,7 @@ xfs_ialloc(
1049 uint flags; 981 uint flags;
1050 int error; 982 int error;
1051 timespec_t tv; 983 timespec_t tv;
984 int filestreams = 0;
1052 985
1053 /* 986 /*
1054 * Call the space management code to pick 987 * Call the space management code to pick
@@ -1056,9 +989,8 @@ xfs_ialloc(
1056 */ 989 */
1057 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 990 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
1058 ialloc_context, call_again, &ino); 991 ialloc_context, call_again, &ino);
1059 if (error != 0) { 992 if (error)
1060 return error; 993 return error;
1061 }
1062 if (*call_again || ino == NULLFSINO) { 994 if (*call_again || ino == NULLFSINO) {
1063 *ipp = NULL; 995 *ipp = NULL;
1064 return 0; 996 return 0;
@@ -1072,9 +1004,8 @@ xfs_ialloc(
1072 */ 1004 */
1073 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1005 error = xfs_trans_iget(tp->t_mountp, tp, ino,
1074 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1006 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
1075 if (error != 0) { 1007 if (error)
1076 return error; 1008 return error;
1077 }
1078 ASSERT(ip != NULL); 1009 ASSERT(ip != NULL);
1079 1010
1080 ip->i_d.di_mode = (__uint16_t)mode; 1011 ip->i_d.di_mode = (__uint16_t)mode;
@@ -1093,8 +1024,8 @@ xfs_ialloc(
1093 * here rather than here and in the flush/logging code. 1024 * here rather than here and in the flush/logging code.
1094 */ 1025 */
1095 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && 1026 if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
1096 ip->i_d.di_version == XFS_DINODE_VERSION_1) { 1027 ip->i_d.di_version == 1) {
1097 ip->i_d.di_version = XFS_DINODE_VERSION_2; 1028 ip->i_d.di_version = 2;
1098 /* 1029 /*
1099 * We've already zeroed the old link count, the projid field, 1030 * We've already zeroed the old link count, the projid field,
1100 * and the pad field. 1031 * and the pad field.
@@ -1104,7 +1035,7 @@ xfs_ialloc(
1104 /* 1035 /*
1105 * Project ids won't be stored on disk if we are using a version 1 inode. 1036 * Project ids won't be stored on disk if we are using a version 1 inode.
1106 */ 1037 */
1107 if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) 1038 if ((prid != 0) && (ip->i_d.di_version == 1))
1108 xfs_bump_ino_vers2(tp, ip); 1039 xfs_bump_ino_vers2(tp, ip);
1109 1040
1110 if (pip && XFS_INHERIT_GID(pip)) { 1041 if (pip && XFS_INHERIT_GID(pip)) {
@@ -1155,13 +1086,12 @@ xfs_ialloc(
1155 flags |= XFS_ILOG_DEV; 1086 flags |= XFS_ILOG_DEV;
1156 break; 1087 break;
1157 case S_IFREG: 1088 case S_IFREG:
1158 if (pip && xfs_inode_is_filestream(pip)) { 1089 /*
1159 error = xfs_filestream_associate(pip, ip); 1090 * we can't set up filestreams until after the VFS inode
1160 if (error < 0) 1091 * is set up properly.
1161 return -error; 1092 */
1162 if (!error) 1093 if (pip && xfs_inode_is_filestream(pip))
1163 xfs_iflags_set(ip, XFS_IFILESTREAM); 1094 filestreams = 1;
1164 }
1165 /* fall through */ 1095 /* fall through */
1166 case S_IFDIR: 1096 case S_IFDIR:
1167 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { 1097 if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
@@ -1227,6 +1157,15 @@ xfs_ialloc(
1227 /* now that we have an i_mode we can setup inode ops and unlock */ 1157 /* now that we have an i_mode we can setup inode ops and unlock */
1228 xfs_setup_inode(ip); 1158 xfs_setup_inode(ip);
1229 1159
1160 /* now we have set up the vfs inode we can associate the filestream */
1161 if (filestreams) {
1162 error = xfs_filestream_associate(pip, ip);
1163 if (error < 0)
1164 return -error;
1165 if (!error)
1166 xfs_iflags_set(ip, XFS_IFILESTREAM);
1167 }
1168
1230 *ipp = ip; 1169 *ipp = ip;
1231 return 0; 1170 return 0;
1232} 1171}
@@ -1383,8 +1322,8 @@ xfs_itrunc_trace(
1383 * direct I/O with the truncate operation. Also, because we hold 1322 * direct I/O with the truncate operation. Also, because we hold
1384 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being 1323 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
1385 * started until the truncate completes and drops the lock. Essentially, 1324 * started until the truncate completes and drops the lock. Essentially,
1386 * the vn_iowait() call forms an I/O barrier that provides strict ordering 1325 * the xfs_ioend_wait() call forms an I/O barrier that provides strict
1387 * between direct I/Os and the truncate operation. 1326 * ordering between direct I/Os and the truncate operation.
1388 * 1327 *
1389 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE 1328 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
1390 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used 1329 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
@@ -1415,7 +1354,7 @@ xfs_itruncate_start(
1415 1354
1416 /* wait for the completion of any pending DIOs */ 1355 /* wait for the completion of any pending DIOs */
1417 if (new_size == 0 || new_size < ip->i_size) 1356 if (new_size == 0 || new_size < ip->i_size)
1418 vn_iowait(ip); 1357 xfs_ioend_wait(ip);
1419 1358
1420 /* 1359 /*
1421 * Call toss_pages or flushinval_pages to get rid of pages 1360 * Call toss_pages or flushinval_pages to get rid of pages
@@ -1726,8 +1665,14 @@ xfs_itruncate_finish(
1726 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1665 xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1727 xfs_trans_ihold(ntp, ip); 1666 xfs_trans_ihold(ntp, ip);
1728 1667
1729 if (!error) 1668 if (error)
1730 error = xfs_trans_reserve(ntp, 0, 1669 return error;
1670 /*
1671 * transaction commit worked ok so we can drop the extra ticket
1672 * reference that we gained in xfs_trans_dup()
1673 */
1674 xfs_log_ticket_put(ntp->t_ticket);
1675 error = xfs_trans_reserve(ntp, 0,
1731 XFS_ITRUNCATE_LOG_RES(mp), 0, 1676 XFS_ITRUNCATE_LOG_RES(mp), 0,
1732 XFS_TRANS_PERM_LOG_RES, 1677 XFS_TRANS_PERM_LOG_RES,
1733 XFS_ITRUNCATE_LOG_COUNT); 1678 XFS_ITRUNCATE_LOG_COUNT);
@@ -1781,13 +1726,10 @@ xfs_iunlink(
1781 xfs_dinode_t *dip; 1726 xfs_dinode_t *dip;
1782 xfs_buf_t *agibp; 1727 xfs_buf_t *agibp;
1783 xfs_buf_t *ibp; 1728 xfs_buf_t *ibp;
1784 xfs_agnumber_t agno;
1785 xfs_daddr_t agdaddr;
1786 xfs_agino_t agino; 1729 xfs_agino_t agino;
1787 short bucket_index; 1730 short bucket_index;
1788 int offset; 1731 int offset;
1789 int error; 1732 int error;
1790 int agi_ok;
1791 1733
1792 ASSERT(ip->i_d.di_nlink == 0); 1734 ASSERT(ip->i_d.di_nlink == 0);
1793 ASSERT(ip->i_d.di_mode != 0); 1735 ASSERT(ip->i_d.di_mode != 0);
@@ -1795,31 +1737,15 @@ xfs_iunlink(
1795 1737
1796 mp = tp->t_mountp; 1738 mp = tp->t_mountp;
1797 1739
1798 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1799 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1800
1801 /* 1740 /*
1802 * Get the agi buffer first. It ensures lock ordering 1741 * Get the agi buffer first. It ensures lock ordering
1803 * on the list. 1742 * on the list.
1804 */ 1743 */
1805 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1744 error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp);
1806 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
1807 if (error) 1745 if (error)
1808 return error; 1746 return error;
1809
1810 /*
1811 * Validate the magic number of the agi block.
1812 */
1813 agi = XFS_BUF_TO_AGI(agibp); 1747 agi = XFS_BUF_TO_AGI(agibp);
1814 agi_ok = 1748
1815 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1816 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1817 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK,
1818 XFS_RANDOM_IUNLINK))) {
1819 XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi);
1820 xfs_trans_brelse(tp, agibp);
1821 return XFS_ERROR(EFSCORRUPTED);
1822 }
1823 /* 1749 /*
1824 * Get the index into the agi hash table for the 1750 * Get the index into the agi hash table for the
1825 * list this inode will go on. 1751 * list this inode will go on.
@@ -1837,14 +1763,14 @@ xfs_iunlink(
1837 * Here we put the head pointer into our next pointer, 1763 * Here we put the head pointer into our next pointer,
1838 * and then we fall through to point the head at us. 1764 * and then we fall through to point the head at us.
1839 */ 1765 */
1840 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1766 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1841 if (error) 1767 if (error)
1842 return error; 1768 return error;
1843 1769
1844 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); 1770 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
1845 /* both on-disk, don't endian flip twice */ 1771 /* both on-disk, don't endian flip twice */
1846 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1772 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1847 offset = ip->i_boffset + 1773 offset = ip->i_imap.im_boffset +
1848 offsetof(xfs_dinode_t, di_next_unlinked); 1774 offsetof(xfs_dinode_t, di_next_unlinked);
1849 xfs_trans_inode_buf(tp, ibp); 1775 xfs_trans_inode_buf(tp, ibp);
1850 xfs_trans_log_buf(tp, ibp, offset, 1776 xfs_trans_log_buf(tp, ibp, offset,
@@ -1879,7 +1805,6 @@ xfs_iunlink_remove(
1879 xfs_buf_t *agibp; 1805 xfs_buf_t *agibp;
1880 xfs_buf_t *ibp; 1806 xfs_buf_t *ibp;
1881 xfs_agnumber_t agno; 1807 xfs_agnumber_t agno;
1882 xfs_daddr_t agdaddr;
1883 xfs_agino_t agino; 1808 xfs_agino_t agino;
1884 xfs_agino_t next_agino; 1809 xfs_agino_t next_agino;
1885 xfs_buf_t *last_ibp; 1810 xfs_buf_t *last_ibp;
@@ -1887,45 +1812,20 @@ xfs_iunlink_remove(
1887 short bucket_index; 1812 short bucket_index;
1888 int offset, last_offset = 0; 1813 int offset, last_offset = 0;
1889 int error; 1814 int error;
1890 int agi_ok;
1891 1815
1892 /*
1893 * First pull the on-disk inode from the AGI unlinked list.
1894 */
1895 mp = tp->t_mountp; 1816 mp = tp->t_mountp;
1896
1897 agno = XFS_INO_TO_AGNO(mp, ip->i_ino); 1817 agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
1898 agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
1899 1818
1900 /* 1819 /*
1901 * Get the agi buffer first. It ensures lock ordering 1820 * Get the agi buffer first. It ensures lock ordering
1902 * on the list. 1821 * on the list.
1903 */ 1822 */
1904 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, 1823 error = xfs_read_agi(mp, tp, agno, &agibp);
1905 XFS_FSS_TO_BB(mp, 1), 0, &agibp); 1824 if (error)
1906 if (error) {
1907 cmn_err(CE_WARN,
1908 "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.",
1909 error, mp->m_fsname);
1910 return error; 1825 return error;
1911 } 1826
1912 /*
1913 * Validate the magic number of the agi block.
1914 */
1915 agi = XFS_BUF_TO_AGI(agibp); 1827 agi = XFS_BUF_TO_AGI(agibp);
1916 agi_ok = 1828
1917 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1918 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1919 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE,
1920 XFS_RANDOM_IUNLINK_REMOVE))) {
1921 XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW,
1922 mp, agi);
1923 xfs_trans_brelse(tp, agibp);
1924 cmn_err(CE_WARN,
1925 "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.",
1926 mp->m_fsname);
1927 return XFS_ERROR(EFSCORRUPTED);
1928 }
1929 /* 1829 /*
1930 * Get the index into the agi hash table for the 1830 * Get the index into the agi hash table for the
1931 * list this inode will go on. 1831 * list this inode will go on.
@@ -1945,7 +1845,7 @@ xfs_iunlink_remove(
1945 * of dealing with the buffer when there is no need to 1845 * of dealing with the buffer when there is no need to
1946 * change it. 1846 * change it.
1947 */ 1847 */
1948 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1848 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
1949 if (error) { 1849 if (error) {
1950 cmn_err(CE_WARN, 1850 cmn_err(CE_WARN,
1951 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1851 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -1956,7 +1856,7 @@ xfs_iunlink_remove(
1956 ASSERT(next_agino != 0); 1856 ASSERT(next_agino != 0);
1957 if (next_agino != NULLAGINO) { 1857 if (next_agino != NULLAGINO) {
1958 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1858 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
1959 offset = ip->i_boffset + 1859 offset = ip->i_imap.im_boffset +
1960 offsetof(xfs_dinode_t, di_next_unlinked); 1860 offsetof(xfs_dinode_t, di_next_unlinked);
1961 xfs_trans_inode_buf(tp, ibp); 1861 xfs_trans_inode_buf(tp, ibp);
1962 xfs_trans_log_buf(tp, ibp, offset, 1862 xfs_trans_log_buf(tp, ibp, offset,
@@ -1992,7 +1892,7 @@ xfs_iunlink_remove(
1992 } 1892 }
1993 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); 1893 next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
1994 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1894 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1995 &last_ibp, &last_offset); 1895 &last_ibp, &last_offset, 0);
1996 if (error) { 1896 if (error) {
1997 cmn_err(CE_WARN, 1897 cmn_err(CE_WARN,
1998 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1898 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.",
@@ -2007,7 +1907,7 @@ xfs_iunlink_remove(
2007 * Now last_ibp points to the buffer previous to us on 1907 * Now last_ibp points to the buffer previous to us on
2008 * the unlinked list. Pull us from the list. 1908 * the unlinked list. Pull us from the list.
2009 */ 1909 */
2010 error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 1910 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2011 if (error) { 1911 if (error) {
2012 cmn_err(CE_WARN, 1912 cmn_err(CE_WARN,
2013 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1913 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2019,7 +1919,7 @@ xfs_iunlink_remove(
2019 ASSERT(next_agino != agino); 1919 ASSERT(next_agino != agino);
2020 if (next_agino != NULLAGINO) { 1920 if (next_agino != NULLAGINO) {
2021 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1921 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
2022 offset = ip->i_boffset + 1922 offset = ip->i_imap.im_boffset +
2023 offsetof(xfs_dinode_t, di_next_unlinked); 1923 offsetof(xfs_dinode_t, di_next_unlinked);
2024 xfs_trans_inode_buf(tp, ibp); 1924 xfs_trans_inode_buf(tp, ibp);
2025 xfs_trans_log_buf(tp, ibp, offset, 1925 xfs_trans_log_buf(tp, ibp, offset,
@@ -2160,9 +2060,9 @@ xfs_ifree_cluster(
2160 iip = (xfs_inode_log_item_t *)lip; 2060 iip = (xfs_inode_log_item_t *)lip;
2161 ASSERT(iip->ili_logged == 1); 2061 ASSERT(iip->ili_logged == 1);
2162 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; 2062 lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
2163 spin_lock(&mp->m_ail_lock); 2063 xfs_trans_ail_copy_lsn(mp->m_ail,
2164 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2064 &iip->ili_flush_lsn,
2165 spin_unlock(&mp->m_ail_lock); 2065 &iip->ili_item.li_lsn);
2166 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 2066 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
2167 pre_flushed++; 2067 pre_flushed++;
2168 } 2068 }
@@ -2183,9 +2083,8 @@ xfs_ifree_cluster(
2183 iip->ili_last_fields = iip->ili_format.ilf_fields; 2083 iip->ili_last_fields = iip->ili_format.ilf_fields;
2184 iip->ili_format.ilf_fields = 0; 2084 iip->ili_format.ilf_fields = 0;
2185 iip->ili_logged = 1; 2085 iip->ili_logged = 1;
2186 spin_lock(&mp->m_ail_lock); 2086 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2187 iip->ili_flush_lsn = iip->ili_item.li_lsn; 2087 &iip->ili_item.li_lsn);
2188 spin_unlock(&mp->m_ail_lock);
2189 2088
2190 xfs_buf_attach_iodone(bp, 2089 xfs_buf_attach_iodone(bp,
2191 (void(*)(xfs_buf_t*,xfs_log_item_t*)) 2090 (void(*)(xfs_buf_t*,xfs_log_item_t*))
@@ -2263,7 +2162,7 @@ xfs_ifree(
2263 2162
2264 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2163 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2265 2164
2266 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); 2165 error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK);
2267 if (error) 2166 if (error)
2268 return error; 2167 return error;
2269 2168
@@ -2279,7 +2178,7 @@ xfs_ifree(
2279 * This is a temporary hack that would require a proper fix 2178 * This is a temporary hack that would require a proper fix
2280 * in the future. 2179 * in the future.
2281 */ 2180 */
2282 dip->di_core.di_mode = 0; 2181 dip->di_mode = 0;
2283 2182
2284 if (delete) { 2183 if (delete) {
2285 xfs_ifree_cluster(ip, tp, first_ino); 2184 xfs_ifree_cluster(ip, tp, first_ino);
@@ -2312,9 +2211,10 @@ xfs_iroot_realloc(
2312 int rec_diff, 2211 int rec_diff,
2313 int whichfork) 2212 int whichfork)
2314{ 2213{
2214 struct xfs_mount *mp = ip->i_mount;
2315 int cur_max; 2215 int cur_max;
2316 xfs_ifork_t *ifp; 2216 xfs_ifork_t *ifp;
2317 xfs_bmbt_block_t *new_broot; 2217 struct xfs_btree_block *new_broot;
2318 int new_max; 2218 int new_max;
2319 size_t new_size; 2219 size_t new_size;
2320 char *np; 2220 char *np;
@@ -2335,8 +2235,7 @@ xfs_iroot_realloc(
2335 */ 2235 */
2336 if (ifp->if_broot_bytes == 0) { 2236 if (ifp->if_broot_bytes == 0) {
2337 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); 2237 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
2338 ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, 2238 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP);
2339 KM_SLEEP);
2340 ifp->if_broot_bytes = (int)new_size; 2239 ifp->if_broot_bytes = (int)new_size;
2341 return; 2240 return;
2342 } 2241 }
@@ -2347,18 +2246,16 @@ xfs_iroot_realloc(
2347 * location. The records don't change location because 2246 * location. The records don't change location because
2348 * they are kept butted up against the btree block header. 2247 * they are kept butted up against the btree block header.
2349 */ 2248 */
2350 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2249 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2351 new_max = cur_max + rec_diff; 2250 new_max = cur_max + rec_diff;
2352 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); 2251 new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
2353 ifp->if_broot = (xfs_bmbt_block_t *) 2252 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
2354 kmem_realloc(ifp->if_broot,
2355 new_size,
2356 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ 2253 (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
2357 KM_SLEEP); 2254 KM_SLEEP);
2358 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2255 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2359 ifp->if_broot_bytes); 2256 ifp->if_broot_bytes);
2360 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2257 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2361 (int)new_size); 2258 (int)new_size);
2362 ifp->if_broot_bytes = (int)new_size; 2259 ifp->if_broot_bytes = (int)new_size;
2363 ASSERT(ifp->if_broot_bytes <= 2260 ASSERT(ifp->if_broot_bytes <=
2364 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); 2261 XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
@@ -2372,7 +2269,7 @@ xfs_iroot_realloc(
2372 * records, just get rid of the root and clear the status bit. 2269 * records, just get rid of the root and clear the status bit.
2373 */ 2270 */
2374 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); 2271 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
2375 cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); 2272 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
2376 new_max = cur_max + rec_diff; 2273 new_max = cur_max + rec_diff;
2377 ASSERT(new_max >= 0); 2274 ASSERT(new_max >= 0);
2378 if (new_max > 0) 2275 if (new_max > 0)
@@ -2380,11 +2277,11 @@ xfs_iroot_realloc(
2380 else 2277 else
2381 new_size = 0; 2278 new_size = 0;
2382 if (new_size > 0) { 2279 if (new_size > 0) {
2383 new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); 2280 new_broot = kmem_alloc(new_size, KM_SLEEP);
2384 /* 2281 /*
2385 * First copy over the btree block header. 2282 * First copy over the btree block header.
2386 */ 2283 */
2387 memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); 2284 memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
2388 } else { 2285 } else {
2389 new_broot = NULL; 2286 new_broot = NULL;
2390 ifp->if_flags &= ~XFS_IFBROOT; 2287 ifp->if_flags &= ~XFS_IFBROOT;
@@ -2397,18 +2294,16 @@ xfs_iroot_realloc(
2397 /* 2294 /*
2398 * First copy the records. 2295 * First copy the records.
2399 */ 2296 */
2400 op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, 2297 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
2401 ifp->if_broot_bytes); 2298 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
2402 np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1,
2403 (int)new_size);
2404 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); 2299 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
2405 2300
2406 /* 2301 /*
2407 * Then copy the pointers. 2302 * Then copy the pointers.
2408 */ 2303 */
2409 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, 2304 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
2410 ifp->if_broot_bytes); 2305 ifp->if_broot_bytes);
2411 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, 2306 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
2412 (int)new_size); 2307 (int)new_size);
2413 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2308 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2414 } 2309 }
@@ -2511,64 +2406,6 @@ xfs_idata_realloc(
2511 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); 2406 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
2512} 2407}
2513 2408
2514
2515
2516
2517/*
2518 * Map inode to disk block and offset.
2519 *
2520 * mp -- the mount point structure for the current file system
2521 * tp -- the current transaction
2522 * ino -- the inode number of the inode to be located
2523 * imap -- this structure is filled in with the information necessary
2524 * to retrieve the given inode from disk
2525 * flags -- flags to pass to xfs_dilocate indicating whether or not
2526 * lookups in the inode btree were OK or not
2527 */
2528int
2529xfs_imap(
2530 xfs_mount_t *mp,
2531 xfs_trans_t *tp,
2532 xfs_ino_t ino,
2533 xfs_imap_t *imap,
2534 uint flags)
2535{
2536 xfs_fsblock_t fsbno;
2537 int len;
2538 int off;
2539 int error;
2540
2541 fsbno = imap->im_blkno ?
2542 XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
2543 error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
2544 if (error)
2545 return error;
2546
2547 imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
2548 imap->im_len = XFS_FSB_TO_BB(mp, len);
2549 imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
2550 imap->im_ioffset = (ushort)off;
2551 imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
2552
2553 /*
2554 * If the inode number maps to a block outside the bounds
2555 * of the file system then return NULL rather than calling
2556 * read_buf and panicing when we get an error from the
2557 * driver.
2558 */
2559 if ((imap->im_blkno + imap->im_len) >
2560 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
2561 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
2562 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
2563 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
2564 (unsigned long long) imap->im_blkno,
2565 (unsigned long long) imap->im_len,
2566 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
2567 return EINVAL;
2568 }
2569 return 0;
2570}
2571
2572void 2409void
2573xfs_idestroy_fork( 2410xfs_idestroy_fork(
2574 xfs_inode_t *ip, 2411 xfs_inode_t *ip,
@@ -2613,70 +2450,6 @@ xfs_idestroy_fork(
2613} 2450}
2614 2451
2615/* 2452/*
2616 * This is called free all the memory associated with an inode.
2617 * It must free the inode itself and any buffers allocated for
2618 * if_extents/if_data and if_broot. It must also free the lock
2619 * associated with the inode.
2620 */
2621void
2622xfs_idestroy(
2623 xfs_inode_t *ip)
2624{
2625 switch (ip->i_d.di_mode & S_IFMT) {
2626 case S_IFREG:
2627 case S_IFDIR:
2628 case S_IFLNK:
2629 xfs_idestroy_fork(ip, XFS_DATA_FORK);
2630 break;
2631 }
2632 if (ip->i_afp)
2633 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
2634 mrfree(&ip->i_lock);
2635 mrfree(&ip->i_iolock);
2636
2637#ifdef XFS_INODE_TRACE
2638 ktrace_free(ip->i_trace);
2639#endif
2640#ifdef XFS_BMAP_TRACE
2641 ktrace_free(ip->i_xtrace);
2642#endif
2643#ifdef XFS_BMBT_TRACE
2644 ktrace_free(ip->i_btrace);
2645#endif
2646#ifdef XFS_RW_TRACE
2647 ktrace_free(ip->i_rwtrace);
2648#endif
2649#ifdef XFS_ILOCK_TRACE
2650 ktrace_free(ip->i_lock_trace);
2651#endif
2652#ifdef XFS_DIR2_TRACE
2653 ktrace_free(ip->i_dir_trace);
2654#endif
2655 if (ip->i_itemp) {
2656 /*
2657 * Only if we are shutting down the fs will we see an
2658 * inode still in the AIL. If it is there, we should remove
2659 * it to prevent a use-after-free from occurring.
2660 */
2661 xfs_mount_t *mp = ip->i_mount;
2662 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
2663
2664 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
2665 XFS_FORCED_SHUTDOWN(ip->i_mount));
2666 if (lip->li_flags & XFS_LI_IN_AIL) {
2667 spin_lock(&mp->m_ail_lock);
2668 if (lip->li_flags & XFS_LI_IN_AIL)
2669 xfs_trans_delete_ail(mp, lip);
2670 else
2671 spin_unlock(&mp->m_ail_lock);
2672 }
2673 xfs_inode_item_destroy(ip);
2674 }
2675 kmem_zone_free(xfs_inode_zone, ip);
2676}
2677
2678
2679/*
2680 * Increment the pin count of the given buffer. 2453 * Increment the pin count of the given buffer.
2681 * This value is protected by ipinlock spinlock in the mount structure. 2454 * This value is protected by ipinlock spinlock in the mount structure.
2682 */ 2455 */
@@ -2880,7 +2653,7 @@ xfs_iflush_fork(
2880 ASSERT(ifp->if_broot_bytes <= 2653 ASSERT(ifp->if_broot_bytes <=
2881 (XFS_IFORK_SIZE(ip, whichfork) + 2654 (XFS_IFORK_SIZE(ip, whichfork) +
2882 XFS_BROOT_SIZE_ADJ)); 2655 XFS_BROOT_SIZE_ADJ));
2883 xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, 2656 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
2884 (xfs_bmdr_block_t *)cp, 2657 (xfs_bmdr_block_t *)cp,
2885 XFS_DFORK_SIZE(dip, mp, whichfork)); 2658 XFS_DFORK_SIZE(dip, mp, whichfork));
2886 } 2659 }
@@ -2889,15 +2662,16 @@ xfs_iflush_fork(
2889 case XFS_DINODE_FMT_DEV: 2662 case XFS_DINODE_FMT_DEV:
2890 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { 2663 if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) {
2891 ASSERT(whichfork == XFS_DATA_FORK); 2664 ASSERT(whichfork == XFS_DATA_FORK);
2892 dip->di_u.di_dev = cpu_to_be32(ip->i_df.if_u2.if_rdev); 2665 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
2893 } 2666 }
2894 break; 2667 break;
2895 2668
2896 case XFS_DINODE_FMT_UUID: 2669 case XFS_DINODE_FMT_UUID:
2897 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { 2670 if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) {
2898 ASSERT(whichfork == XFS_DATA_FORK); 2671 ASSERT(whichfork == XFS_DATA_FORK);
2899 memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, 2672 memcpy(XFS_DFORK_DPTR(dip),
2900 sizeof(uuid_t)); 2673 &ip->i_df.if_u2.if_uuid,
2674 sizeof(uuid_t));
2901 } 2675 }
2902 break; 2676 break;
2903 2677
@@ -3030,7 +2804,6 @@ cluster_corrupt_out:
3030 XFS_BUF_CLR_BDSTRAT_FUNC(bp); 2804 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
3031 XFS_BUF_UNDONE(bp); 2805 XFS_BUF_UNDONE(bp);
3032 XFS_BUF_STALE(bp); 2806 XFS_BUF_STALE(bp);
3033 XFS_BUF_SHUT(bp);
3034 XFS_BUF_ERROR(bp,EIO); 2807 XFS_BUF_ERROR(bp,EIO);
3035 xfs_biodone(bp); 2808 xfs_biodone(bp);
3036 } else { 2809 } else {
@@ -3172,7 +2945,7 @@ xfs_iflush(
3172 /* 2945 /*
3173 * Get the buffer containing the on-disk inode. 2946 * Get the buffer containing the on-disk inode.
3174 */ 2947 */
3175 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, 2948 error = xfs_itobp(mp, NULL, ip, &dip, &bp,
3176 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); 2949 noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
3177 if (error || !bp) { 2950 if (error || !bp) {
3178 xfs_ifunlock(ip); 2951 xfs_ifunlock(ip);
@@ -3253,7 +3026,7 @@ xfs_iflush_int(
3253 } 3026 }
3254 3027
3255 /* set *dip = inode's place in the buffer */ 3028 /* set *dip = inode's place in the buffer */
3256 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); 3029 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
3257 3030
3258 /* 3031 /*
3259 * Clear i_update_core before copying out the data. 3032 * Clear i_update_core before copying out the data.
@@ -3275,11 +3048,11 @@ xfs_iflush_int(
3275 */ 3048 */
3276 xfs_synchronize_atime(ip); 3049 xfs_synchronize_atime(ip);
3277 3050
3278 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC, 3051 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
3279 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3052 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
3280 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 3053 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp,
3281 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 3054 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p",
3282 ip->i_ino, be16_to_cpu(dip->di_core.di_magic), dip); 3055 ip->i_ino, be16_to_cpu(dip->di_magic), dip);
3283 goto corrupt_out; 3056 goto corrupt_out;
3284 } 3057 }
3285 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 3058 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
@@ -3342,7 +3115,7 @@ xfs_iflush_int(
3342 * because if the inode is dirty at all the core must 3115 * because if the inode is dirty at all the core must
3343 * be. 3116 * be.
3344 */ 3117 */
3345 xfs_dinode_to_disk(&dip->di_core, &ip->i_d); 3118 xfs_dinode_to_disk(dip, &ip->i_d);
3346 3119
3347 /* Wrap, we never let the log put out DI_MAX_FLUSH */ 3120 /* Wrap, we never let the log put out DI_MAX_FLUSH */
3348 if (ip->i_d.di_flushiter == DI_MAX_FLUSH) 3121 if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
@@ -3354,28 +3127,27 @@ xfs_iflush_int(
3354 * convert back to the old inode format. If the superblock version 3127 * convert back to the old inode format. If the superblock version
3355 * has been updated, then make the conversion permanent. 3128 * has been updated, then make the conversion permanent.
3356 */ 3129 */
3357 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 3130 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
3358 xfs_sb_version_hasnlink(&mp->m_sb)); 3131 if (ip->i_d.di_version == 1) {
3359 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
3360 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 3132 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
3361 /* 3133 /*
3362 * Convert it back. 3134 * Convert it back.
3363 */ 3135 */
3364 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); 3136 ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1);
3365 dip->di_core.di_onlink = cpu_to_be16(ip->i_d.di_nlink); 3137 dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink);
3366 } else { 3138 } else {
3367 /* 3139 /*
3368 * The superblock version has already been bumped, 3140 * The superblock version has already been bumped,
3369 * so just make the conversion to the new inode 3141 * so just make the conversion to the new inode
3370 * format permanent. 3142 * format permanent.
3371 */ 3143 */
3372 ip->i_d.di_version = XFS_DINODE_VERSION_2; 3144 ip->i_d.di_version = 2;
3373 dip->di_core.di_version = XFS_DINODE_VERSION_2; 3145 dip->di_version = 2;
3374 ip->i_d.di_onlink = 0; 3146 ip->i_d.di_onlink = 0;
3375 dip->di_core.di_onlink = 0; 3147 dip->di_onlink = 0;
3376 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 3148 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
3377 memset(&(dip->di_core.di_pad[0]), 0, 3149 memset(&(dip->di_pad[0]), 0,
3378 sizeof(dip->di_core.di_pad)); 3150 sizeof(dip->di_pad));
3379 ASSERT(ip->i_d.di_projid == 0); 3151 ASSERT(ip->i_d.di_projid == 0);
3380 } 3152 }
3381 } 3153 }
@@ -3418,10 +3190,8 @@ xfs_iflush_int(
3418 iip->ili_format.ilf_fields = 0; 3190 iip->ili_format.ilf_fields = 0;
3419 iip->ili_logged = 1; 3191 iip->ili_logged = 1;
3420 3192
3421 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ 3193 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
3422 spin_lock(&mp->m_ail_lock); 3194 &iip->ili_item.li_lsn);
3423 iip->ili_flush_lsn = iip->ili_item.li_lsn;
3424 spin_unlock(&mp->m_ail_lock);
3425 3195
3426 /* 3196 /*
3427 * Attach the function xfs_iflush_done to the inode's 3197 * Attach the function xfs_iflush_done to the inode's
@@ -3459,45 +3229,8 @@ corrupt_out:
3459} 3229}
3460 3230
3461 3231
3462/*
3463 * Flush all inactive inodes in mp.
3464 */
3465void
3466xfs_iflush_all(
3467 xfs_mount_t *mp)
3468{
3469 xfs_inode_t *ip;
3470
3471 again:
3472 XFS_MOUNT_ILOCK(mp);
3473 ip = mp->m_inodes;
3474 if (ip == NULL)
3475 goto out;
3476
3477 do {
3478 /* Make sure we skip markers inserted by sync */
3479 if (ip->i_mount == NULL) {
3480 ip = ip->i_mnext;
3481 continue;
3482 }
3483
3484 if (!VFS_I(ip)) {
3485 XFS_MOUNT_IUNLOCK(mp);
3486 xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC);
3487 goto again;
3488 }
3489
3490 ASSERT(vn_count(VFS_I(ip)) == 0);
3491
3492 ip = ip->i_mnext;
3493 } while (ip != mp->m_inodes);
3494 out:
3495 XFS_MOUNT_IUNLOCK(mp);
3496}
3497 3232
3498#ifdef XFS_ILOCK_TRACE 3233#ifdef XFS_ILOCK_TRACE
3499ktrace_t *xfs_ilock_trace_buf;
3500
3501void 3234void
3502xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) 3235xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra)
3503{ 3236{
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 6be310d41daf..1f175fa34b22 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -19,8 +19,7 @@
19#define __XFS_INODE_H__ 19#define __XFS_INODE_H__
20 20
21struct xfs_dinode; 21struct xfs_dinode;
22struct xfs_dinode_core; 22struct xfs_inode;
23
24 23
25/* 24/*
26 * Fork identifiers. 25 * Fork identifiers.
@@ -63,7 +62,7 @@ typedef struct xfs_ext_irec {
63typedef struct xfs_ifork { 62typedef struct xfs_ifork {
64 int if_bytes; /* bytes in if_u1 */ 63 int if_bytes; /* bytes in if_u1 */
65 int if_real_bytes; /* bytes allocated in if_u1 */ 64 int if_real_bytes; /* bytes allocated in if_u1 */
66 xfs_bmbt_block_t *if_broot; /* file's incore btree root */ 65 struct xfs_btree_block *if_broot; /* file's incore btree root */
67 short if_broot_bytes; /* bytes allocated for root */ 66 short if_broot_bytes; /* bytes allocated for root */
68 unsigned char if_flags; /* per-fork flags */ 67 unsigned char if_flags; /* per-fork flags */
69 unsigned char if_ext_max; /* max # of extent records */ 68 unsigned char if_ext_max; /* max # of extent records */
@@ -84,52 +83,14 @@ typedef struct xfs_ifork {
84} xfs_ifork_t; 83} xfs_ifork_t;
85 84
86/* 85/*
87 * Flags for xfs_ichgtime(). 86 * Inode location information. Stored in the inode and passed to
87 * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
88 */ 88 */
89#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ 89struct xfs_imap {
90#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ 90 xfs_daddr_t im_blkno; /* starting BB of inode chunk */
91 91 ushort im_len; /* length in BBs of inode chunk */
92/* 92 ushort im_boffset; /* inode offset in block in bytes */
93 * Per-fork incore inode flags. 93};
94 */
95#define XFS_IFINLINE 0x01 /* Inline data is read in */
96#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
97#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
98#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
99
100/*
101 * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate().
102 */
103#define XFS_IMAP_LOOKUP 0x1
104#define XFS_IMAP_BULKSTAT 0x2
105
106#ifdef __KERNEL__
107struct bhv_desc;
108struct cred;
109struct ktrace;
110struct xfs_buf;
111struct xfs_bmap_free;
112struct xfs_bmbt_irec;
113struct xfs_bmbt_block;
114struct xfs_inode;
115struct xfs_inode_log_item;
116struct xfs_mount;
117struct xfs_trans;
118struct xfs_dquot;
119
120#if defined(XFS_ILOCK_TRACE)
121#define XFS_ILOCK_KTRACE_SIZE 32
122extern ktrace_t *xfs_ilock_trace_buf;
123extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
124#else
125#define xfs_ilock_trace(i,n,f,ra)
126#endif
127
128typedef struct dm_attrs_s {
129 __uint32_t da_dmevmask; /* DMIG event mask */
130 __uint16_t da_dmstate; /* DMIG state info */
131 __uint16_t da_pad; /* DMIG extra padding */
132} dm_attrs_t;
133 94
134/* 95/*
135 * This is the xfs in-core inode structure. 96 * This is the xfs in-core inode structure.
@@ -160,7 +121,7 @@ typedef struct xfs_ictimestamp {
160} xfs_ictimestamp_t; 121} xfs_ictimestamp_t;
161 122
162/* 123/*
163 * NOTE: This structure must be kept identical to struct xfs_dinode_core 124 * NOTE: This structure must be kept identical to struct xfs_dinode
164 * in xfs_dinode.h except for the endianess annotations. 125 * in xfs_dinode.h except for the endianess annotations.
165 */ 126 */
166typedef struct xfs_icdinode { 127typedef struct xfs_icdinode {
@@ -191,27 +152,97 @@ typedef struct xfs_icdinode {
191 __uint32_t di_gen; /* generation number */ 152 __uint32_t di_gen; /* generation number */
192} xfs_icdinode_t; 153} xfs_icdinode_t;
193 154
194typedef struct { 155/*
195 struct xfs_inode *ip_mnext; /* next inode in mount list */ 156 * Flags for xfs_ichgtime().
196 struct xfs_inode *ip_mprev; /* ptr to prev inode */ 157 */
197 struct xfs_mount *ip_mount; /* fs mount struct ptr */ 158#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */
198} xfs_iptr_t; 159#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
160
161/*
162 * Per-fork incore inode flags.
163 */
164#define XFS_IFINLINE 0x01 /* Inline data is read in */
165#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */
166#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */
167#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */
168
169/*
170 * Fork handling.
171 */
172
173#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0)
174#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3))
175
176#define XFS_IFORK_PTR(ip,w) \
177 ((w) == XFS_DATA_FORK ? \
178 &(ip)->i_df : \
179 (ip)->i_afp)
180#define XFS_IFORK_DSIZE(ip) \
181 (XFS_IFORK_Q(ip) ? \
182 XFS_IFORK_BOFF(ip) : \
183 XFS_LITINO((ip)->i_mount))
184#define XFS_IFORK_ASIZE(ip) \
185 (XFS_IFORK_Q(ip) ? \
186 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
187 0)
188#define XFS_IFORK_SIZE(ip,w) \
189 ((w) == XFS_DATA_FORK ? \
190 XFS_IFORK_DSIZE(ip) : \
191 XFS_IFORK_ASIZE(ip))
192#define XFS_IFORK_FORMAT(ip,w) \
193 ((w) == XFS_DATA_FORK ? \
194 (ip)->i_d.di_format : \
195 (ip)->i_d.di_aformat)
196#define XFS_IFORK_FMT_SET(ip,w,n) \
197 ((w) == XFS_DATA_FORK ? \
198 ((ip)->i_d.di_format = (n)) : \
199 ((ip)->i_d.di_aformat = (n)))
200#define XFS_IFORK_NEXTENTS(ip,w) \
201 ((w) == XFS_DATA_FORK ? \
202 (ip)->i_d.di_nextents : \
203 (ip)->i_d.di_anextents)
204#define XFS_IFORK_NEXT_SET(ip,w,n) \
205 ((w) == XFS_DATA_FORK ? \
206 ((ip)->i_d.di_nextents = (n)) : \
207 ((ip)->i_d.di_anextents = (n)))
208
209
210
211#ifdef __KERNEL__
212
213struct bhv_desc;
214struct cred;
215struct ktrace;
216struct xfs_buf;
217struct xfs_bmap_free;
218struct xfs_bmbt_irec;
219struct xfs_inode_log_item;
220struct xfs_mount;
221struct xfs_trans;
222struct xfs_dquot;
223
224#if defined(XFS_ILOCK_TRACE)
225#define XFS_ILOCK_KTRACE_SIZE 32
226extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *);
227#else
228#define xfs_ilock_trace(i,n,f,ra)
229#endif
230
231typedef struct dm_attrs_s {
232 __uint32_t da_dmevmask; /* DMIG event mask */
233 __uint16_t da_dmstate; /* DMIG state info */
234 __uint16_t da_pad; /* DMIG extra padding */
235} dm_attrs_t;
199 236
200typedef struct xfs_inode { 237typedef struct xfs_inode {
201 /* Inode linking and identification information. */ 238 /* Inode linking and identification information. */
202 struct xfs_inode *i_mnext; /* next inode in mount list */
203 struct xfs_inode *i_mprev; /* ptr to prev inode */
204 struct xfs_mount *i_mount; /* fs mount struct ptr */ 239 struct xfs_mount *i_mount; /* fs mount struct ptr */
205 struct list_head i_reclaim; /* reclaim list */
206 struct inode *i_vnode; /* vnode backpointer */
207 struct xfs_dquot *i_udquot; /* user dquot */ 240 struct xfs_dquot *i_udquot; /* user dquot */
208 struct xfs_dquot *i_gdquot; /* group dquot */ 241 struct xfs_dquot *i_gdquot; /* group dquot */
209 242
210 /* Inode location stuff */ 243 /* Inode location stuff */
211 xfs_ino_t i_ino; /* inode number (agno/agino)*/ 244 xfs_ino_t i_ino; /* inode number (agno/agino)*/
212 xfs_daddr_t i_blkno; /* blkno of inode buffer */ 245 struct xfs_imap i_imap; /* location for xfs_imap() */
213 ushort i_len; /* len of inode buffer */
214 ushort i_boffset; /* off of inode in buffer */
215 246
216 /* Extent information. */ 247 /* Extent information. */
217 xfs_ifork_t *i_afp; /* attribute fork pointer */ 248 xfs_ifork_t *i_afp; /* attribute fork pointer */
@@ -230,7 +261,6 @@ typedef struct xfs_inode {
230 unsigned short i_flags; /* see defined flags below */ 261 unsigned short i_flags; /* see defined flags below */
231 unsigned char i_update_core; /* timestamps/size is dirty */ 262 unsigned char i_update_core; /* timestamps/size is dirty */
232 unsigned char i_update_size; /* di_size field is dirty */ 263 unsigned char i_update_size; /* di_size field is dirty */
233 unsigned int i_gen; /* generation count */
234 unsigned int i_delayed_blks; /* count of delay alloc blks */ 264 unsigned int i_delayed_blks; /* count of delay alloc blks */
235 265
236 xfs_icdinode_t i_d; /* most of ondisk inode */ 266 xfs_icdinode_t i_d; /* most of ondisk inode */
@@ -238,6 +268,10 @@ typedef struct xfs_inode {
238 xfs_fsize_t i_size; /* in-memory size */ 268 xfs_fsize_t i_size; /* in-memory size */
239 xfs_fsize_t i_new_size; /* size when write completes */ 269 xfs_fsize_t i_new_size; /* size when write completes */
240 atomic_t i_iocount; /* outstanding I/O count */ 270 atomic_t i_iocount; /* outstanding I/O count */
271
272 /* VFS inode */
273 struct inode i_vnode; /* embedded VFS inode */
274
241 /* Trace buffers per inode. */ 275 /* Trace buffers per inode. */
242#ifdef XFS_INODE_TRACE 276#ifdef XFS_INODE_TRACE
243 struct ktrace *i_trace; /* general inode trace */ 277 struct ktrace *i_trace; /* general inode trace */
@@ -245,7 +279,7 @@ typedef struct xfs_inode {
245#ifdef XFS_BMAP_TRACE 279#ifdef XFS_BMAP_TRACE
246 struct ktrace *i_xtrace; /* inode extent list trace */ 280 struct ktrace *i_xtrace; /* inode extent list trace */
247#endif 281#endif
248#ifdef XFS_BMBT_TRACE 282#ifdef XFS_BTREE_TRACE
249 struct ktrace *i_btrace; /* inode bmap btree trace */ 283 struct ktrace *i_btrace; /* inode bmap btree trace */
250#endif 284#endif
251#ifdef XFS_RW_TRACE 285#ifdef XFS_RW_TRACE
@@ -265,13 +299,30 @@ typedef struct xfs_inode {
265/* Convert from vfs inode to xfs inode */ 299/* Convert from vfs inode to xfs inode */
266static inline struct xfs_inode *XFS_I(struct inode *inode) 300static inline struct xfs_inode *XFS_I(struct inode *inode)
267{ 301{
268 return (struct xfs_inode *)inode->i_private; 302 return container_of(inode, struct xfs_inode, i_vnode);
269} 303}
270 304
271/* convert from xfs inode to vfs inode */ 305/* convert from xfs inode to vfs inode */
272static inline struct inode *VFS_I(struct xfs_inode *ip) 306static inline struct inode *VFS_I(struct xfs_inode *ip)
273{ 307{
274 return (struct inode *)ip->i_vnode; 308 return &ip->i_vnode;
309}
310
311/*
312 * Get rid of a partially initialized inode.
313 *
314 * We have to go through destroy_inode to make sure allocations
315 * from init_inode_always like the security data are undone.
316 *
317 * We mark the inode bad so that it takes the short cut in
318 * the reclaim path instead of going through the flush path
319 * which doesn't make sense for an inode that has never seen the
320 * light of day.
321 */
322static inline void xfs_destroy_inode(struct xfs_inode *ip)
323{
324 make_bad_inode(VFS_I(ip));
325 return destroy_inode(VFS_I(ip));
275} 326}
276 327
277/* 328/*
@@ -327,65 +378,36 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
327 spin_unlock(&ip->i_flags_lock); 378 spin_unlock(&ip->i_flags_lock);
328 return ret; 379 return ret;
329} 380}
330#endif /* __KERNEL__ */
331
332 381
333/* 382/*
334 * Fork handling. 383 * Manage the i_flush queue embedded in the inode. This completion
384 * queue synchronizes processes attempting to flush the in-core
385 * inode back to disk.
335 */ 386 */
387static inline void xfs_iflock(xfs_inode_t *ip)
388{
389 wait_for_completion(&ip->i_flush);
390}
336 391
337#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) 392static inline int xfs_iflock_nowait(xfs_inode_t *ip)
338#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) 393{
339 394 return try_wait_for_completion(&ip->i_flush);
340#define XFS_IFORK_PTR(ip,w) \ 395}
341 ((w) == XFS_DATA_FORK ? \
342 &(ip)->i_df : \
343 (ip)->i_afp)
344#define XFS_IFORK_DSIZE(ip) \
345 (XFS_IFORK_Q(ip) ? \
346 XFS_IFORK_BOFF(ip) : \
347 XFS_LITINO((ip)->i_mount))
348#define XFS_IFORK_ASIZE(ip) \
349 (XFS_IFORK_Q(ip) ? \
350 XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \
351 0)
352#define XFS_IFORK_SIZE(ip,w) \
353 ((w) == XFS_DATA_FORK ? \
354 XFS_IFORK_DSIZE(ip) : \
355 XFS_IFORK_ASIZE(ip))
356#define XFS_IFORK_FORMAT(ip,w) \
357 ((w) == XFS_DATA_FORK ? \
358 (ip)->i_d.di_format : \
359 (ip)->i_d.di_aformat)
360#define XFS_IFORK_FMT_SET(ip,w,n) \
361 ((w) == XFS_DATA_FORK ? \
362 ((ip)->i_d.di_format = (n)) : \
363 ((ip)->i_d.di_aformat = (n)))
364#define XFS_IFORK_NEXTENTS(ip,w) \
365 ((w) == XFS_DATA_FORK ? \
366 (ip)->i_d.di_nextents : \
367 (ip)->i_d.di_anextents)
368#define XFS_IFORK_NEXT_SET(ip,w,n) \
369 ((w) == XFS_DATA_FORK ? \
370 ((ip)->i_d.di_nextents = (n)) : \
371 ((ip)->i_d.di_anextents = (n)))
372 396
373#ifdef __KERNEL__ 397static inline void xfs_ifunlock(xfs_inode_t *ip)
398{
399 complete(&ip->i_flush);
400}
374 401
375/* 402/*
376 * In-core inode flags. 403 * In-core inode flags.
377 */ 404 */
378#define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ 405#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */
379#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ 406#define XFS_ISTALE 0x0002 /* inode has been staled */
380#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ 407#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
381#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ 408#define XFS_INEW 0x0008 /* inode has just been allocated */
382#define XFS_ISTALE 0x0010 /* inode has been staled */ 409#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
383#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ 410#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
384#define XFS_INEW 0x0040
385#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */
386#define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */
387 /* to the Linux inode state. */
388#define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */
389 411
390/* 412/*
391 * Flags for inode locking. 413 * Flags for inode locking.
@@ -460,16 +482,8 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
460 ((pip)->i_d.di_mode & S_ISGID)) 482 ((pip)->i_d.di_mode & S_ISGID))
461 483
462/* 484/*
463 * Flags for xfs_iget()
464 */
465#define XFS_IGET_CREATE 0x1
466#define XFS_IGET_BULKSTAT 0x2
467
468/*
469 * xfs_iget.c prototypes. 485 * xfs_iget.c prototypes.
470 */ 486 */
471void xfs_ihash_init(struct xfs_mount *);
472void xfs_ihash_free(struct xfs_mount *);
473xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, 487xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t,
474 struct xfs_trans *); 488 struct xfs_trans *);
475int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 489int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
@@ -484,25 +498,13 @@ int xfs_isilocked(xfs_inode_t *, uint);
484uint xfs_ilock_map_shared(xfs_inode_t *); 498uint xfs_ilock_map_shared(xfs_inode_t *);
485void xfs_iunlock_map_shared(xfs_inode_t *, uint); 499void xfs_iunlock_map_shared(xfs_inode_t *, uint);
486void xfs_ireclaim(xfs_inode_t *); 500void xfs_ireclaim(xfs_inode_t *);
487int xfs_finish_reclaim(xfs_inode_t *, int, int);
488int xfs_finish_reclaim_all(struct xfs_mount *, int);
489 501
490/* 502/*
491 * xfs_inode.c prototypes. 503 * xfs_inode.c prototypes.
492 */ 504 */
493int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
494 xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
495 xfs_daddr_t, uint, uint);
496int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
497 xfs_inode_t **, xfs_daddr_t, uint);
498int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
499int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 505int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
500 xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, 506 xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
501 int, struct xfs_buf **, boolean_t *, xfs_inode_t **); 507 int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
502void xfs_dinode_from_disk(struct xfs_icdinode *,
503 struct xfs_dinode_core *);
504void xfs_dinode_to_disk(struct xfs_dinode_core *,
505 struct xfs_icdinode *);
506 508
507uint xfs_ip2xflags(struct xfs_inode *); 509uint xfs_ip2xflags(struct xfs_inode *);
508uint xfs_dic2xflags(struct xfs_dinode *); 510uint xfs_dic2xflags(struct xfs_dinode *);
@@ -513,17 +515,10 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
513 xfs_fsize_t, int, int); 515 xfs_fsize_t, int, int);
514int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 516int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
515 517
516void xfs_idestroy_fork(xfs_inode_t *, int);
517void xfs_idestroy(xfs_inode_t *);
518void xfs_idata_realloc(xfs_inode_t *, int, int);
519void xfs_iextract(xfs_inode_t *);
520void xfs_iext_realloc(xfs_inode_t *, int, int); 518void xfs_iext_realloc(xfs_inode_t *, int, int);
521void xfs_iroot_realloc(xfs_inode_t *, int, int);
522void xfs_ipin(xfs_inode_t *); 519void xfs_ipin(xfs_inode_t *);
523void xfs_iunpin(xfs_inode_t *); 520void xfs_iunpin(xfs_inode_t *);
524int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int);
525int xfs_iflush(xfs_inode_t *, uint); 521int xfs_iflush(xfs_inode_t *, uint);
526void xfs_iflush_all(struct xfs_mount *);
527void xfs_ichgtime(xfs_inode_t *, int); 522void xfs_ichgtime(xfs_inode_t *, int);
528xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 523xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
529void xfs_lock_inodes(xfs_inode_t **, int, uint); 524void xfs_lock_inodes(xfs_inode_t **, int, uint);
@@ -532,6 +527,77 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
532void xfs_synchronize_atime(xfs_inode_t *); 527void xfs_synchronize_atime(xfs_inode_t *);
533void xfs_mark_inode_dirty_sync(xfs_inode_t *); 528void xfs_mark_inode_dirty_sync(xfs_inode_t *);
534 529
530#if defined(XFS_INODE_TRACE)
531
532#define INODE_TRACE_SIZE 16 /* number of trace entries */
533#define INODE_KTRACE_ENTRY 1
534#define INODE_KTRACE_EXIT 2
535#define INODE_KTRACE_HOLD 3
536#define INODE_KTRACE_REF 4
537#define INODE_KTRACE_RELE 5
538
539extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *);
540extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *);
541extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *);
542extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *);
543extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *);
544#define xfs_itrace_entry(ip) \
545 _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address)
546#define xfs_itrace_exit(ip) \
547 _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address)
548#define xfs_itrace_exit_tag(ip, tag) \
549 _xfs_itrace_exit(ip, tag, (inst_t *)__return_address)
550#define xfs_itrace_ref(ip) \
551 _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address)
552
553#else
554#define xfs_itrace_entry(a)
555#define xfs_itrace_exit(a)
556#define xfs_itrace_exit_tag(a, b)
557#define xfs_itrace_hold(a, b, c, d)
558#define xfs_itrace_ref(a)
559#define xfs_itrace_rele(a, b, c, d)
560#endif
561
562#define IHOLD(ip) \
563do { \
564 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
565 atomic_inc(&(VFS_I(ip)->i_count)); \
566 xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
567} while (0)
568
569#define IRELE(ip) \
570do { \
571 xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
572 iput(VFS_I(ip)); \
573} while (0)
574
575#endif /* __KERNEL__ */
576
577/*
578 * Flags for xfs_iget()
579 */
580#define XFS_IGET_CREATE 0x1
581#define XFS_IGET_BULKSTAT 0x2
582
583int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
584 xfs_ino_t, struct xfs_dinode **,
585 struct xfs_buf **, int *, uint);
586int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
587 struct xfs_inode *, struct xfs_dinode **,
588 struct xfs_buf **, uint);
589int xfs_iread(struct xfs_mount *, struct xfs_trans *,
590 struct xfs_inode *, xfs_daddr_t, uint);
591void xfs_dinode_from_disk(struct xfs_icdinode *,
592 struct xfs_dinode *);
593void xfs_dinode_to_disk(struct xfs_dinode *,
594 struct xfs_icdinode *);
595void xfs_idestroy_fork(struct xfs_inode *, int);
596void xfs_idata_realloc(struct xfs_inode *, int, int);
597void xfs_iroot_realloc(struct xfs_inode *, int, int);
598int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
599int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
600
535xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); 601xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
536void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, 602void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t,
537 xfs_bmbt_irec_t *); 603 xfs_bmbt_irec_t *);
@@ -561,7 +627,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
561#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 627#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
562 628
563#ifdef DEBUG 629#ifdef DEBUG
564void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); 630void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
631 xfs_fsize_t);
565#else /* DEBUG */ 632#else /* DEBUG */
566#define xfs_isize_check(mp, ip, isize) 633#define xfs_isize_check(mp, ip, isize)
567#endif /* DEBUG */ 634#endif /* DEBUG */
@@ -576,26 +643,4 @@ extern struct kmem_zone *xfs_ifork_zone;
576extern struct kmem_zone *xfs_inode_zone; 643extern struct kmem_zone *xfs_inode_zone;
577extern struct kmem_zone *xfs_ili_zone; 644extern struct kmem_zone *xfs_ili_zone;
578 645
579/*
580 * Manage the i_flush queue embedded in the inode. This completion
581 * queue synchronizes processes attempting to flush the in-core
582 * inode back to disk.
583 */
584static inline void xfs_iflock(xfs_inode_t *ip)
585{
586 wait_for_completion(&ip->i_flush);
587}
588
589static inline int xfs_iflock_nowait(xfs_inode_t *ip)
590{
591 return try_wait_for_completion(&ip->i_flush);
592}
593
594static inline void xfs_ifunlock(xfs_inode_t *ip)
595{
596 complete(&ip->i_flush);
597}
598
599#endif /* __KERNEL__ */
600
601#endif /* __XFS_INODE_H__ */ 646#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 97c7452e2620..977c4aec587e 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -281,7 +281,7 @@ xfs_inode_item_format(
281 xfs_mark_inode_dirty_sync(ip); 281 xfs_mark_inode_dirty_sync(ip);
282 282
283 vecp->i_addr = (xfs_caddr_t)&ip->i_d; 283 vecp->i_addr = (xfs_caddr_t)&ip->i_d;
284 vecp->i_len = sizeof(xfs_dinode_core_t); 284 vecp->i_len = sizeof(struct xfs_icdinode);
285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); 285 XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
286 vecp++; 286 vecp++;
287 nvecs++; 287 nvecs++;
@@ -296,9 +296,8 @@ xfs_inode_item_format(
296 * has a new version number, then we don't bother converting back. 296 * has a new version number, then we don't bother converting back.
297 */ 297 */
298 mp = ip->i_mount; 298 mp = ip->i_mount;
299 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || 299 ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb));
300 xfs_sb_version_hasnlink(&mp->m_sb)); 300 if (ip->i_d.di_version == 1) {
301 if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
302 if (!xfs_sb_version_hasnlink(&mp->m_sb)) { 301 if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
303 /* 302 /*
304 * Convert it back. 303 * Convert it back.
@@ -311,7 +310,7 @@ xfs_inode_item_format(
311 * so just make the conversion to the new inode 310 * so just make the conversion to the new inode
312 * format permanent. 311 * format permanent.
313 */ 312 */
314 ip->i_d.di_version = XFS_DINODE_VERSION_2; 313 ip->i_d.di_version = 2;
315 ip->i_d.di_onlink = 0; 314 ip->i_d.di_onlink = 0;
316 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 315 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
317 } 316 }
@@ -932,6 +931,7 @@ xfs_inode_item_init(
932 iip->ili_item.li_type = XFS_LI_INODE; 931 iip->ili_item.li_type = XFS_LI_INODE;
933 iip->ili_item.li_ops = &xfs_inode_item_ops; 932 iip->ili_item.li_ops = &xfs_inode_item_ops;
934 iip->ili_item.li_mountp = mp; 933 iip->ili_item.li_mountp = mp;
934 iip->ili_item.li_ailp = mp->m_ail;
935 iip->ili_inode = ip; 935 iip->ili_inode = ip;
936 936
937 /* 937 /*
@@ -942,9 +942,9 @@ xfs_inode_item_init(
942 942
943 iip->ili_format.ilf_type = XFS_LI_INODE; 943 iip->ili_format.ilf_type = XFS_LI_INODE;
944 iip->ili_format.ilf_ino = ip->i_ino; 944 iip->ili_format.ilf_ino = ip->i_ino;
945 iip->ili_format.ilf_blkno = ip->i_blkno; 945 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
946 iip->ili_format.ilf_len = ip->i_len; 946 iip->ili_format.ilf_len = ip->i_imap.im_len;
947 iip->ili_format.ilf_boffset = ip->i_boffset; 947 iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;
948} 948}
949 949
950/* 950/*
@@ -976,9 +976,8 @@ xfs_iflush_done(
976 xfs_buf_t *bp, 976 xfs_buf_t *bp,
977 xfs_inode_log_item_t *iip) 977 xfs_inode_log_item_t *iip)
978{ 978{
979 xfs_inode_t *ip; 979 xfs_inode_t *ip = iip->ili_inode;
980 980 struct xfs_ail *ailp = iip->ili_item.li_ailp;
981 ip = iip->ili_inode;
982 981
983 /* 982 /*
984 * We only want to pull the item from the AIL if it is 983 * We only want to pull the item from the AIL if it is
@@ -991,15 +990,12 @@ xfs_iflush_done(
991 */ 990 */
992 if (iip->ili_logged && 991 if (iip->ili_logged &&
993 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { 992 (iip->ili_item.li_lsn == iip->ili_flush_lsn)) {
994 spin_lock(&ip->i_mount->m_ail_lock); 993 spin_lock(&ailp->xa_lock);
995 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { 994 if (iip->ili_item.li_lsn == iip->ili_flush_lsn) {
996 /* 995 /* xfs_trans_ail_delete() drops the AIL lock. */
997 * xfs_trans_delete_ail() drops the AIL lock. 996 xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip);
998 */
999 xfs_trans_delete_ail(ip->i_mount,
1000 (xfs_log_item_t*)iip);
1001 } else { 997 } else {
1002 spin_unlock(&ip->i_mount->m_ail_lock); 998 spin_unlock(&ailp->xa_lock);
1003 } 999 }
1004 } 1000 }
1005 1001
@@ -1031,21 +1027,20 @@ void
1031xfs_iflush_abort( 1027xfs_iflush_abort(
1032 xfs_inode_t *ip) 1028 xfs_inode_t *ip)
1033{ 1029{
1034 xfs_inode_log_item_t *iip; 1030 xfs_inode_log_item_t *iip = ip->i_itemp;
1035 xfs_mount_t *mp; 1031 xfs_mount_t *mp;
1036 1032
1037 iip = ip->i_itemp; 1033 iip = ip->i_itemp;
1038 mp = ip->i_mount; 1034 mp = ip->i_mount;
1039 if (iip) { 1035 if (iip) {
1036 struct xfs_ail *ailp = iip->ili_item.li_ailp;
1040 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1037 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1041 spin_lock(&mp->m_ail_lock); 1038 spin_lock(&ailp->xa_lock);
1042 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 1039 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
1043 /* 1040 /* xfs_trans_ail_delete() drops the AIL lock. */
1044 * xfs_trans_delete_ail() drops the AIL lock. 1041 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip);
1045 */
1046 xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip);
1047 } else 1042 } else
1048 spin_unlock(&mp->m_ail_lock); 1043 spin_unlock(&ailp->xa_lock);
1049 } 1044 }
1050 iip->ili_logged = 0; 1045 iip->ili_logged = 0;
1051 /* 1046 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 40513077ab36..1ff04cc323ad 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 {
112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) 112#define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED)
113 113
114 114
115#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
116static inline int xfs_ilog_fbroot(int w)
117{
118 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
119}
120
121#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
122static inline int xfs_ilog_fext(int w)
123{
124 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
125}
126
127#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
128static inline int xfs_ilog_fdata(int w)
129{
130 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
131}
132
115#ifdef __KERNEL__ 133#ifdef __KERNEL__
116 134
117struct xfs_buf; 135struct xfs_buf;
@@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item {
148} xfs_inode_log_item_t; 166} xfs_inode_log_item_t;
149 167
150 168
151#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w)
152static inline int xfs_ilog_fdata(int w)
153{
154 return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
155}
156
157#endif /* __KERNEL__ */
158
159#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w)
160static inline int xfs_ilog_fbroot(int w)
161{
162 return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
163}
164
165#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w)
166static inline int xfs_ilog_fext(int w)
167{
168 return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
169}
170
171static inline int xfs_inode_clean(xfs_inode_t *ip) 169static inline int xfs_inode_clean(xfs_inode_t *ip)
172{ 170{
173 return (!ip->i_itemp || 171 return (!ip->i_itemp ||
@@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip)
175 !ip->i_update_core; 173 !ip->i_update_core;
176} 174}
177 175
178
179#ifdef __KERNEL__
180
181extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); 176extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
182extern void xfs_inode_item_destroy(struct xfs_inode *); 177extern void xfs_inode_item_destroy(struct xfs_inode *);
183extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); 178extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67f22b2b44b3..911062cf73a6 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -290,7 +290,6 @@ STATIC int
290xfs_iomap_eof_align_last_fsb( 290xfs_iomap_eof_align_last_fsb(
291 xfs_mount_t *mp, 291 xfs_mount_t *mp,
292 xfs_inode_t *ip, 292 xfs_inode_t *ip,
293 xfs_fsize_t isize,
294 xfs_extlen_t extsize, 293 xfs_extlen_t extsize,
295 xfs_fileoff_t *last_fsb) 294 xfs_fileoff_t *last_fsb)
296{ 295{
@@ -306,14 +305,14 @@ xfs_iomap_eof_align_last_fsb(
306 * stripe width and we are allocating past the allocation eof. 305 * stripe width and we are allocating past the allocation eof.
307 */ 306 */
308 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && 307 else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
309 (isize >= XFS_FSB_TO_B(mp, mp->m_swidth))) 308 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
310 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); 309 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
311 /* 310 /*
312 * Roundup the allocation request to a stripe unit (m_dalign) boundary 311 * Roundup the allocation request to a stripe unit (m_dalign) boundary
313 * if the file size is >= stripe unit size, and we are allocating past 312 * if the file size is >= stripe unit size, and we are allocating past
314 * the allocation eof. 313 * the allocation eof.
315 */ 314 */
316 else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign))) 315 else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
317 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); 316 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
318 317
319 /* 318 /*
@@ -403,7 +402,6 @@ xfs_iomap_write_direct(
403 xfs_filblks_t count_fsb, resaligned; 402 xfs_filblks_t count_fsb, resaligned;
404 xfs_fsblock_t firstfsb; 403 xfs_fsblock_t firstfsb;
405 xfs_extlen_t extsz, temp; 404 xfs_extlen_t extsz, temp;
406 xfs_fsize_t isize;
407 int nimaps; 405 int nimaps;
408 int bmapi_flag; 406 int bmapi_flag;
409 int quota_flag; 407 int quota_flag;
@@ -426,15 +424,10 @@ xfs_iomap_write_direct(
426 rt = XFS_IS_REALTIME_INODE(ip); 424 rt = XFS_IS_REALTIME_INODE(ip);
427 extsz = xfs_get_extsz_hint(ip); 425 extsz = xfs_get_extsz_hint(ip);
428 426
429 isize = ip->i_size;
430 if (ip->i_new_size > isize)
431 isize = ip->i_new_size;
432
433 offset_fsb = XFS_B_TO_FSBT(mp, offset); 427 offset_fsb = XFS_B_TO_FSBT(mp, offset);
434 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 428 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
435 if ((offset + count) > isize) { 429 if ((offset + count) > ip->i_size) {
436 error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, 430 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
437 &last_fsb);
438 if (error) 431 if (error)
439 goto error_out; 432 goto error_out;
440 } else { 433 } else {
@@ -559,7 +552,6 @@ STATIC int
559xfs_iomap_eof_want_preallocate( 552xfs_iomap_eof_want_preallocate(
560 xfs_mount_t *mp, 553 xfs_mount_t *mp,
561 xfs_inode_t *ip, 554 xfs_inode_t *ip,
562 xfs_fsize_t isize,
563 xfs_off_t offset, 555 xfs_off_t offset,
564 size_t count, 556 size_t count,
565 int ioflag, 557 int ioflag,
@@ -573,7 +565,7 @@ xfs_iomap_eof_want_preallocate(
573 int n, error, imaps; 565 int n, error, imaps;
574 566
575 *prealloc = 0; 567 *prealloc = 0;
576 if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) 568 if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
577 return 0; 569 return 0;
578 570
579 /* 571 /*
@@ -617,7 +609,6 @@ xfs_iomap_write_delay(
617 xfs_fileoff_t ioalign; 609 xfs_fileoff_t ioalign;
618 xfs_fsblock_t firstblock; 610 xfs_fsblock_t firstblock;
619 xfs_extlen_t extsz; 611 xfs_extlen_t extsz;
620 xfs_fsize_t isize;
621 int nimaps; 612 int nimaps;
622 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 613 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
623 int prealloc, fsynced = 0; 614 int prealloc, fsynced = 0;
@@ -637,11 +628,7 @@ xfs_iomap_write_delay(
637 offset_fsb = XFS_B_TO_FSBT(mp, offset); 628 offset_fsb = XFS_B_TO_FSBT(mp, offset);
638 629
639retry: 630retry:
640 isize = ip->i_size; 631 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
641 if (ip->i_new_size > isize)
642 isize = ip->i_new_size;
643
644 error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count,
645 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 632 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
646 if (error) 633 if (error)
647 return error; 634 return error;
@@ -655,8 +642,7 @@ retry:
655 } 642 }
656 643
657 if (prealloc || extsz) { 644 if (prealloc || extsz) {
658 error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, 645 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
659 &last_fsb);
660 if (error) 646 if (error)
661 return error; 647 return error;
662 } 648 }
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cf6754a3c5b3..e19d0a8d5618 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -69,7 +69,7 @@ xfs_bulkstat_one_iget(
69 } 69 }
70 70
71 ASSERT(ip != NULL); 71 ASSERT(ip != NULL);
72 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 72 ASSERT(ip->i_imap.im_blkno != 0);
73 73
74 dic = &ip->i_d; 74 dic = &ip->i_d;
75 75
@@ -125,13 +125,9 @@ STATIC void
125xfs_bulkstat_one_dinode( 125xfs_bulkstat_one_dinode(
126 xfs_mount_t *mp, /* mount point for filesystem */ 126 xfs_mount_t *mp, /* mount point for filesystem */
127 xfs_ino_t ino, /* inode number to get data for */ 127 xfs_ino_t ino, /* inode number to get data for */
128 xfs_dinode_t *dip, /* dinode inode pointer */ 128 xfs_dinode_t *dic, /* dinode inode pointer */
129 xfs_bstat_t *buf) /* return buffer */ 129 xfs_bstat_t *buf) /* return buffer */
130{ 130{
131 xfs_dinode_core_t *dic; /* dinode core info pointer */
132
133 dic = &dip->di_core;
134
135 /* 131 /*
136 * The inode format changed when we moved the link count and 132 * The inode format changed when we moved the link count and
137 * made it 32 bits long. If this is an old format inode, 133 * made it 32 bits long. If this is an old format inode,
@@ -143,7 +139,7 @@ xfs_bulkstat_one_dinode(
143 * the new format. We don't change the version number so that we 139 * the new format. We don't change the version number so that we
144 * can distinguish this from a real new format inode. 140 * can distinguish this from a real new format inode.
145 */ 141 */
146 if (dic->di_version == XFS_DINODE_VERSION_1) { 142 if (dic->di_version == 1) {
147 buf->bs_nlink = be16_to_cpu(dic->di_onlink); 143 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
148 buf->bs_projid = 0; 144 buf->bs_projid = 0;
149 } else { 145 } else {
@@ -162,7 +158,7 @@ xfs_bulkstat_one_dinode(
162 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); 158 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
163 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); 159 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
164 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); 160 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
165 buf->bs_xflags = xfs_dic2xflags(dip); 161 buf->bs_xflags = xfs_dic2xflags(dic);
166 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; 162 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
167 buf->bs_extents = be32_to_cpu(dic->di_nextents); 163 buf->bs_extents = be32_to_cpu(dic->di_nextents);
168 buf->bs_gen = be32_to_cpu(dic->di_gen); 164 buf->bs_gen = be32_to_cpu(dic->di_gen);
@@ -173,7 +169,7 @@ xfs_bulkstat_one_dinode(
173 169
174 switch (dic->di_format) { 170 switch (dic->di_format) {
175 case XFS_DINODE_FMT_DEV: 171 case XFS_DINODE_FMT_DEV:
176 buf->bs_rdev = be32_to_cpu(dip->di_u.di_dev); 172 buf->bs_rdev = xfs_dinode_get_rdev(dic);
177 buf->bs_blksize = BLKDEV_IOSIZE; 173 buf->bs_blksize = BLKDEV_IOSIZE;
178 buf->bs_blocks = 0; 174 buf->bs_blocks = 0;
179 break; 175 break;
@@ -192,27 +188,34 @@ xfs_bulkstat_one_dinode(
192 } 188 }
193} 189}
194 190
191/* Return 0 on success or positive error */
195STATIC int 192STATIC int
196xfs_bulkstat_one_fmt( 193xfs_bulkstat_one_fmt(
197 void __user *ubuffer, 194 void __user *ubuffer,
195 int ubsize,
196 int *ubused,
198 const xfs_bstat_t *buffer) 197 const xfs_bstat_t *buffer)
199{ 198{
199 if (ubsize < sizeof(*buffer))
200 return XFS_ERROR(ENOMEM);
200 if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 201 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
201 return -EFAULT; 202 return XFS_ERROR(EFAULT);
202 return sizeof(*buffer); 203 if (ubused)
204 *ubused = sizeof(*buffer);
205 return 0;
203} 206}
204 207
205/* 208/*
206 * Return stat information for one inode. 209 * Return stat information for one inode.
207 * Return 0 if ok, else errno. 210 * Return 0 if ok, else errno.
208 */ 211 */
209int /* error status */ 212int /* error status */
210xfs_bulkstat_one( 213xfs_bulkstat_one_int(
211 xfs_mount_t *mp, /* mount point for filesystem */ 214 xfs_mount_t *mp, /* mount point for filesystem */
212 xfs_ino_t ino, /* inode number to get data for */ 215 xfs_ino_t ino, /* inode number to get data for */
213 void __user *buffer, /* buffer to place output in */ 216 void __user *buffer, /* buffer to place output in */
214 int ubsize, /* size of buffer */ 217 int ubsize, /* size of buffer */
215 void *private_data, /* my private data */ 218 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
216 xfs_daddr_t bno, /* starting bno of inode cluster */ 219 xfs_daddr_t bno, /* starting bno of inode cluster */
217 int *ubused, /* bytes used by me */ 220 int *ubused, /* bytes used by me */
218 void *dibuff, /* on-disk inode buffer */ 221 void *dibuff, /* on-disk inode buffer */
@@ -221,15 +224,12 @@ xfs_bulkstat_one(
221 xfs_bstat_t *buf; /* return buffer */ 224 xfs_bstat_t *buf; /* return buffer */
222 int error = 0; /* error value */ 225 int error = 0; /* error value */
223 xfs_dinode_t *dip; /* dinode inode pointer */ 226 xfs_dinode_t *dip; /* dinode inode pointer */
224 bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt;
225 227
226 dip = (xfs_dinode_t *)dibuff; 228 dip = (xfs_dinode_t *)dibuff;
227 *stat = BULKSTAT_RV_NOTHING; 229 *stat = BULKSTAT_RV_NOTHING;
228 230
229 if (!buffer || xfs_internal_inum(mp, ino)) 231 if (!buffer || xfs_internal_inum(mp, ino))
230 return XFS_ERROR(EINVAL); 232 return XFS_ERROR(EINVAL);
231 if (ubsize < sizeof(*buf))
232 return XFS_ERROR(ENOMEM);
233 233
234 buf = kmem_alloc(sizeof(*buf), KM_SLEEP); 234 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
235 235
@@ -244,21 +244,34 @@ xfs_bulkstat_one(
244 xfs_bulkstat_one_dinode(mp, ino, dip, buf); 244 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
245 } 245 }
246 246
247 error = formatter(buffer, buf); 247 error = formatter(buffer, ubsize, ubused, buf);
248 if (error < 0) { 248 if (error)
249 error = EFAULT;
250 goto out_free; 249 goto out_free;
251 }
252 250
253 *stat = BULKSTAT_RV_DIDONE; 251 *stat = BULKSTAT_RV_DIDONE;
254 if (ubused)
255 *ubused = error;
256 252
257 out_free: 253 out_free:
258 kmem_free(buf); 254 kmem_free(buf);
259 return error; 255 return error;
260} 256}
261 257
258int
259xfs_bulkstat_one(
260 xfs_mount_t *mp, /* mount point for filesystem */
261 xfs_ino_t ino, /* inode number to get data for */
262 void __user *buffer, /* buffer to place output in */
263 int ubsize, /* size of buffer */
264 void *private_data, /* my private data */
265 xfs_daddr_t bno, /* starting bno of inode cluster */
266 int *ubused, /* bytes used by me */
267 void *dibuff, /* on-disk inode buffer */
268 int *stat) /* BULKSTAT_RV_... */
269{
270 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
271 xfs_bulkstat_one_fmt, bno,
272 ubused, dibuff, stat);
273}
274
262/* 275/*
263 * Test to see whether we can use the ondisk inode directly, based 276 * Test to see whether we can use the ondisk inode directly, based
264 * on the given bulkstat flags, filling in dipp accordingly. 277 * on the given bulkstat flags, filling in dipp accordingly.
@@ -287,19 +300,19 @@ xfs_bulkstat_use_dinode(
287 * to disk yet. This is a temporary hack that would require a proper 300 * to disk yet. This is a temporary hack that would require a proper
288 * fix in the future. 301 * fix in the future.
289 */ 302 */
290 if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || 303 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
291 !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || 304 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
292 !dip->di_core.di_mode) 305 !dip->di_mode)
293 return 0; 306 return 0;
294 if (flags & BULKSTAT_FG_QUICK) { 307 if (flags & BULKSTAT_FG_QUICK) {
295 *dipp = dip; 308 *dipp = dip;
296 return 1; 309 return 1;
297 } 310 }
298 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ 311 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
299 aformat = dip->di_core.di_aformat; 312 aformat = dip->di_aformat;
300 if ((XFS_DFORK_Q(dip) == 0) || 313 if ((XFS_DFORK_Q(dip) == 0) ||
301 (aformat == XFS_DINODE_FMT_LOCAL) || 314 (aformat == XFS_DINODE_FMT_LOCAL) ||
302 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { 315 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
303 *dipp = dip; 316 *dipp = dip;
304 return 1; 317 return 1;
305 } 318 }
@@ -359,7 +372,6 @@ xfs_bulkstat(
359 int ubused; /* bytes used by formatter */ 372 int ubused; /* bytes used by formatter */
360 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 373 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
361 xfs_dinode_t *dip; /* ptr into bp for specific inode */ 374 xfs_dinode_t *dip; /* ptr into bp for specific inode */
362 xfs_inode_t *ip; /* ptr to in-core inode struct */
363 375
364 /* 376 /*
365 * Get the last inode value, see if there's nothing to do. 377 * Get the last inode value, see if there's nothing to do.
@@ -416,8 +428,7 @@ xfs_bulkstat(
416 /* 428 /*
417 * Allocate and initialize a btree cursor for ialloc btree. 429 * Allocate and initialize a btree cursor for ialloc btree.
418 */ 430 */
419 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, 431 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
420 (xfs_inode_t *)0, 0);
421 irbp = irbuf; 432 irbp = irbuf;
422 irbufend = irbuf + nirbuf; 433 irbufend = irbuf + nirbuf;
423 end_of_ag = 0; 434 end_of_ag = 0;
@@ -472,7 +483,7 @@ xfs_bulkstat(
472 * In any case, increment to the next record. 483 * In any case, increment to the next record.
473 */ 484 */
474 if (!error) 485 if (!error)
475 error = xfs_inobt_increment(cur, 0, &tmp); 486 error = xfs_btree_increment(cur, 0, &tmp);
476 } else { 487 } else {
477 /* 488 /*
478 * Start of ag. Lookup the first inode chunk. 489 * Start of ag. Lookup the first inode chunk.
@@ -539,7 +550,7 @@ xfs_bulkstat(
539 * Set agino to after this chunk and bump the cursor. 550 * Set agino to after this chunk and bump the cursor.
540 */ 551 */
541 agino = gino + XFS_INODES_PER_CHUNK; 552 agino = gino + XFS_INODES_PER_CHUNK;
542 error = xfs_inobt_increment(cur, 0, &tmp); 553 error = xfs_btree_increment(cur, 0, &tmp);
543 cond_resched(); 554 cond_resched();
544 } 555 }
545 /* 556 /*
@@ -586,6 +597,8 @@ xfs_bulkstat(
586 597
587 if (flags & (BULKSTAT_FG_QUICK | 598 if (flags & (BULKSTAT_FG_QUICK |
588 BULKSTAT_FG_INLINE)) { 599 BULKSTAT_FG_INLINE)) {
600 int offset;
601
589 ino = XFS_AGINO_TO_INO(mp, agno, 602 ino = XFS_AGINO_TO_INO(mp, agno,
590 agino); 603 agino);
591 bno = XFS_AGB_TO_DADDR(mp, agno, 604 bno = XFS_AGB_TO_DADDR(mp, agno,
@@ -594,21 +607,15 @@ xfs_bulkstat(
594 /* 607 /*
595 * Get the inode cluster buffer 608 * Get the inode cluster buffer
596 */ 609 */
597 ASSERT(xfs_inode_zone != NULL);
598 ip = kmem_zone_zalloc(xfs_inode_zone,
599 KM_SLEEP);
600 ip->i_ino = ino;
601 ip->i_mount = mp;
602 spin_lock_init(&ip->i_flags_lock);
603 if (bp) 610 if (bp)
604 xfs_buf_relse(bp); 611 xfs_buf_relse(bp);
605 error = xfs_itobp(mp, NULL, ip, 612
606 &dip, &bp, bno, 613 error = xfs_inotobp(mp, NULL, ino, &dip,
607 XFS_IMAP_BULKSTAT, 614 &bp, &offset,
608 XFS_BUF_LOCK); 615 XFS_IGET_BULKSTAT);
616
609 if (!error) 617 if (!error)
610 clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; 618 clustidx = offset / mp->m_sb.sb_inodesize;
611 kmem_zone_free(xfs_inode_zone, ip);
612 if (XFS_TEST_ERROR(error != 0, 619 if (XFS_TEST_ERROR(error != 0,
613 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, 620 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
614 XFS_RANDOM_BULKSTAT_READ_CHUNK)) { 621 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
@@ -842,8 +849,7 @@ xfs_inumbers(
842 agino = 0; 849 agino = 0;
843 continue; 850 continue;
844 } 851 }
845 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, 852 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno);
846 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
847 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); 853 error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
848 if (error) { 854 if (error) {
849 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 855 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -887,7 +893,7 @@ xfs_inumbers(
887 bufidx = 0; 893 bufidx = 0;
888 } 894 }
889 if (left) { 895 if (left) {
890 error = xfs_inobt_increment(cur, 0, &tmp); 896 error = xfs_btree_increment(cur, 0, &tmp);
891 if (error) { 897 if (error) {
892 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 898 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
893 cur = NULL; 899 cur = NULL;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index a1f18fce9b70..1fb04e7deb61 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -71,9 +71,23 @@ xfs_bulkstat_single(
71 71
72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ 72typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
73 void __user *ubuffer, /* buffer to write to */ 73 void __user *ubuffer, /* buffer to write to */
74 int ubsize, /* remaining user buffer sz */
75 int *ubused, /* bytes used by formatter */
74 const xfs_bstat_t *buffer); /* buffer to read from */ 76 const xfs_bstat_t *buffer); /* buffer to read from */
75 77
76int 78int
79xfs_bulkstat_one_int(
80 xfs_mount_t *mp,
81 xfs_ino_t ino,
82 void __user *buffer,
83 int ubsize,
84 bulkstat_one_fmt_pf formatter,
85 xfs_daddr_t bno,
86 int *ubused,
87 void *dibuff,
88 int *stat);
89
90int
77xfs_bulkstat_one( 91xfs_bulkstat_one(
78 xfs_mount_t *mp, 92 xfs_mount_t *mp,
79 xfs_ino_t ino, 93 xfs_ino_t ino,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3608a0f0a5f6..f4726f702a9e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
100 100
101 101
102/* local ticket functions */ 102/* local ticket functions */
103STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 103STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
104 int unit_bytes, 104 int unit_bytes,
105 int count, 105 int count,
106 char clientid, 106 char clientid,
107 uint flags); 107 uint flags);
108STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket);
109 108
110#if defined(DEBUG) 109#if defined(DEBUG)
111STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); 110STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
@@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp,
360 */ 359 */
361 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 360 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
362 xlog_ungrant_log_space(log, ticket); 361 xlog_ungrant_log_space(log, ticket);
363 xlog_ticket_put(log, ticket); 362 xfs_log_ticket_put(ticket);
364 } else { 363 } else {
365 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 364 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
366 xlog_regrant_reserve_log_space(log, ticket); 365 xlog_regrant_reserve_log_space(log, ticket);
@@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp,
514 retval = xlog_regrant_write_log_space(log, internal_ticket); 513 retval = xlog_regrant_write_log_space(log, internal_ticket);
515 } else { 514 } else {
516 /* may sleep if need to allocate more tickets */ 515 /* may sleep if need to allocate more tickets */
517 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 516 internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
518 client, flags); 517 client, flags);
519 if (!internal_ticket) 518 if (!internal_ticket)
520 return XFS_ERROR(ENOMEM); 519 return XFS_ERROR(ENOMEM);
@@ -572,12 +571,12 @@ xfs_log_mount(
572 /* 571 /*
573 * Initialize the AIL now we have a log. 572 * Initialize the AIL now we have a log.
574 */ 573 */
575 spin_lock_init(&mp->m_ail_lock);
576 error = xfs_trans_ail_init(mp); 574 error = xfs_trans_ail_init(mp);
577 if (error) { 575 if (error) {
578 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 576 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error);
579 goto error; 577 goto error;
580 } 578 }
579 mp->m_log->l_ailp = mp->m_ail;
581 580
582 /* 581 /*
583 * skip log recovery on a norecovery mount. pretend it all 582 * skip log recovery on a norecovery mount. pretend it all
@@ -730,8 +729,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
730 spin_lock(&log->l_icloglock); 729 spin_lock(&log->l_icloglock);
731 iclog = log->l_iclog; 730 iclog = log->l_iclog;
732 atomic_inc(&iclog->ic_refcnt); 731 atomic_inc(&iclog->ic_refcnt);
733 spin_unlock(&log->l_icloglock);
734 xlog_state_want_sync(log, iclog); 732 xlog_state_want_sync(log, iclog);
733 spin_unlock(&log->l_icloglock);
735 error = xlog_state_release_iclog(log, iclog); 734 error = xlog_state_release_iclog(log, iclog);
736 735
737 spin_lock(&log->l_icloglock); 736 spin_lock(&log->l_icloglock);
@@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
749 if (tic) { 748 if (tic) {
750 xlog_trace_loggrant(log, tic, "unmount rec"); 749 xlog_trace_loggrant(log, tic, "unmount rec");
751 xlog_ungrant_log_space(log, tic); 750 xlog_ungrant_log_space(log, tic);
752 xlog_ticket_put(log, tic); 751 xfs_log_ticket_put(tic);
753 } 752 }
754 } else { 753 } else {
755 /* 754 /*
@@ -768,9 +767,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
768 spin_lock(&log->l_icloglock); 767 spin_lock(&log->l_icloglock);
769 iclog = log->l_iclog; 768 iclog = log->l_iclog;
770 atomic_inc(&iclog->ic_refcnt); 769 atomic_inc(&iclog->ic_refcnt);
771 spin_unlock(&log->l_icloglock);
772 770
773 xlog_state_want_sync(log, iclog); 771 xlog_state_want_sync(log, iclog);
772 spin_unlock(&log->l_icloglock);
774 error = xlog_state_release_iclog(log, iclog); 773 error = xlog_state_release_iclog(log, iclog);
775 774
776 spin_lock(&log->l_icloglock); 775 spin_lock(&log->l_icloglock);
@@ -906,7 +905,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
906int 905int
907xfs_log_need_covered(xfs_mount_t *mp) 906xfs_log_need_covered(xfs_mount_t *mp)
908{ 907{
909 int needed = 0, gen; 908 int needed = 0;
910 xlog_t *log = mp->m_log; 909 xlog_t *log = mp->m_log;
911 910
912 if (!xfs_fs_writable(mp)) 911 if (!xfs_fs_writable(mp))
@@ -915,7 +914,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
915 spin_lock(&log->l_icloglock); 914 spin_lock(&log->l_icloglock);
916 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || 915 if (((log->l_covered_state == XLOG_STATE_COVER_NEED) ||
917 (log->l_covered_state == XLOG_STATE_COVER_NEED2)) 916 (log->l_covered_state == XLOG_STATE_COVER_NEED2))
918 && !xfs_trans_first_ail(mp, &gen) 917 && !xfs_trans_ail_tail(log->l_ailp)
919 && xlog_iclogs_empty(log)) { 918 && xlog_iclogs_empty(log)) {
920 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 919 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
921 log->l_covered_state = XLOG_STATE_COVER_DONE; 920 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -952,7 +951,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
952 xfs_lsn_t tail_lsn; 951 xfs_lsn_t tail_lsn;
953 xlog_t *log = mp->m_log; 952 xlog_t *log = mp->m_log;
954 953
955 tail_lsn = xfs_trans_tail_ail(mp); 954 tail_lsn = xfs_trans_ail_tail(mp->m_ail);
956 spin_lock(&log->l_grant_lock); 955 spin_lock(&log->l_grant_lock);
957 if (tail_lsn != 0) { 956 if (tail_lsn != 0) {
958 log->l_tail_lsn = tail_lsn; 957 log->l_tail_lsn = tail_lsn;
@@ -1030,12 +1029,6 @@ xlog_iodone(xfs_buf_t *bp)
1030 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); 1029 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
1031 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1030 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1032 aborted = 0; 1031 aborted = 0;
1033
1034 /*
1035 * Some versions of cpp barf on the recursive definition of
1036 * ic_log -> hic_fields.ic_log and expand ic_log twice when
1037 * it is passed through two macros. Workaround broken cpp.
1038 */
1039 l = iclog->ic_log; 1032 l = iclog->ic_log;
1040 1033
1041 /* 1034 /*
@@ -1302,7 +1295,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1302 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); 1295 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1303 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1296 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1304 iclog->ic_bp = bp; 1297 iclog->ic_bp = bp;
1305 iclog->hic_data = bp->b_addr; 1298 iclog->ic_data = bp->b_addr;
1306#ifdef DEBUG 1299#ifdef DEBUG
1307 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1300 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1308#endif 1301#endif
@@ -1322,7 +1315,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1322 atomic_set(&iclog->ic_refcnt, 0); 1315 atomic_set(&iclog->ic_refcnt, 0);
1323 spin_lock_init(&iclog->ic_callback_lock); 1316 spin_lock_init(&iclog->ic_callback_lock);
1324 iclog->ic_callback_tail = &(iclog->ic_callback); 1317 iclog->ic_callback_tail = &(iclog->ic_callback);
1325 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1318 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1326 1319
1327 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1320 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1328 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1321 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
@@ -1446,7 +1439,7 @@ xlog_grant_push_ail(xfs_mount_t *mp,
1446 */ 1439 */
1447 if (threshold_lsn && 1440 if (threshold_lsn &&
1448 !XLOG_FORCED_SHUTDOWN(log)) 1441 !XLOG_FORCED_SHUTDOWN(log))
1449 xfs_trans_push_ail(mp, threshold_lsn); 1442 xfs_trans_ail_push(log->l_ailp, threshold_lsn);
1450} /* xlog_grant_push_ail */ 1443} /* xlog_grant_push_ail */
1451 1444
1452 1445
@@ -1991,7 +1984,9 @@ xlog_write(xfs_mount_t * mp,
1991 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { 1984 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1992 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); 1985 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1993 record_cnt = data_cnt = 0; 1986 record_cnt = data_cnt = 0;
1987 spin_lock(&log->l_icloglock);
1994 xlog_state_want_sync(log, iclog); 1988 xlog_state_want_sync(log, iclog);
1989 spin_unlock(&log->l_icloglock);
1995 if (commit_iclog) { 1990 if (commit_iclog) {
1996 ASSERT(flags & XLOG_COMMIT_TRANS); 1991 ASSERT(flags & XLOG_COMMIT_TRANS);
1997 *commit_iclog = iclog; 1992 *commit_iclog = iclog;
@@ -3200,7 +3195,7 @@ try_again:
3200STATIC void 3195STATIC void
3201xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3196xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3202{ 3197{
3203 spin_lock(&log->l_icloglock); 3198 ASSERT(spin_is_locked(&log->l_icloglock));
3204 3199
3205 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3200 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3206 xlog_state_switch_iclogs(log, iclog, 0); 3201 xlog_state_switch_iclogs(log, iclog, 0);
@@ -3208,10 +3203,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3208 ASSERT(iclog->ic_state & 3203 ASSERT(iclog->ic_state &
3209 (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); 3204 (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
3210 } 3205 }
3211 3206}
3212 spin_unlock(&log->l_icloglock);
3213} /* xlog_state_want_sync */
3214
3215 3207
3216 3208
3217/***************************************************************************** 3209/*****************************************************************************
@@ -3222,22 +3214,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3222 */ 3214 */
3223 3215
3224/* 3216/*
3225 * Free a used ticket. 3217 * Free a used ticket when it's refcount falls to zero.
3226 */ 3218 */
3227STATIC void 3219void
3228xlog_ticket_put(xlog_t *log, 3220xfs_log_ticket_put(
3229 xlog_ticket_t *ticket) 3221 xlog_ticket_t *ticket)
3230{ 3222{
3231 sv_destroy(&ticket->t_wait); 3223 ASSERT(atomic_read(&ticket->t_ref) > 0);
3232 kmem_zone_free(xfs_log_ticket_zone, ticket); 3224 if (atomic_dec_and_test(&ticket->t_ref)) {
3233} /* xlog_ticket_put */ 3225 sv_destroy(&ticket->t_wait);
3226 kmem_zone_free(xfs_log_ticket_zone, ticket);
3227 }
3228}
3234 3229
3230xlog_ticket_t *
3231xfs_log_ticket_get(
3232 xlog_ticket_t *ticket)
3233{
3234 ASSERT(atomic_read(&ticket->t_ref) > 0);
3235 atomic_inc(&ticket->t_ref);
3236 return ticket;
3237}
3235 3238
3236/* 3239/*
3237 * Allocate and initialise a new log ticket. 3240 * Allocate and initialise a new log ticket.
3238 */ 3241 */
3239STATIC xlog_ticket_t * 3242STATIC xlog_ticket_t *
3240xlog_ticket_get(xlog_t *log, 3243xlog_ticket_alloc(xlog_t *log,
3241 int unit_bytes, 3244 int unit_bytes,
3242 int cnt, 3245 int cnt,
3243 char client, 3246 char client,
@@ -3308,6 +3311,7 @@ xlog_ticket_get(xlog_t *log,
3308 unit_bytes += 2*BBSIZE; 3311 unit_bytes += 2*BBSIZE;
3309 } 3312 }
3310 3313
3314 atomic_set(&tic->t_ref, 1);
3311 tic->t_unit_res = unit_bytes; 3315 tic->t_unit_res = unit_bytes;
3312 tic->t_curr_res = unit_bytes; 3316 tic->t_curr_res = unit_bytes;
3313 tic->t_cnt = cnt; 3317 tic->t_cnt = cnt;
@@ -3323,7 +3327,7 @@ xlog_ticket_get(xlog_t *log,
3323 xlog_tic_reset_res(tic); 3327 xlog_tic_reset_res(tic);
3324 3328
3325 return tic; 3329 return tic;
3326} /* xlog_ticket_get */ 3330}
3327 3331
3328 3332
3329/****************************************************************************** 3333/******************************************************************************
@@ -3452,7 +3456,7 @@ xlog_verify_iclog(xlog_t *log,
3452 ptr = iclog->ic_datap; 3456 ptr = iclog->ic_datap;
3453 base_ptr = ptr; 3457 base_ptr = ptr;
3454 ophead = (xlog_op_header_t *)ptr; 3458 ophead = (xlog_op_header_t *)ptr;
3455 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3459 xhdr = iclog->ic_data;
3456 for (i = 0; i < len; i++) { 3460 for (i = 0; i < len; i++) {
3457 ophead = (xlog_op_header_t *)ptr; 3461 ophead = (xlog_op_header_t *)ptr;
3458 3462
@@ -3558,7 +3562,8 @@ xfs_log_force_umount(
3558 if (!log || 3562 if (!log ||
3559 log->l_flags & XLOG_ACTIVE_RECOVERY) { 3563 log->l_flags & XLOG_ACTIVE_RECOVERY) {
3560 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3564 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3561 XFS_BUF_DONE(mp->m_sb_bp); 3565 if (mp->m_sb_bp)
3566 XFS_BUF_DONE(mp->m_sb_bp);
3562 return 0; 3567 return 0;
3563 } 3568 }
3564 3569
@@ -3579,7 +3584,9 @@ xfs_log_force_umount(
3579 spin_lock(&log->l_icloglock); 3584 spin_lock(&log->l_icloglock);
3580 spin_lock(&log->l_grant_lock); 3585 spin_lock(&log->l_grant_lock);
3581 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3586 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3582 XFS_BUF_DONE(mp->m_sb_bp); 3587 if (mp->m_sb_bp)
3588 XFS_BUF_DONE(mp->m_sb_bp);
3589
3583 /* 3590 /*
3584 * This flag is sort of redundant because of the mount flag, but 3591 * This flag is sort of redundant because of the mount flag, but
3585 * it's good to maintain the separation between the log and the rest 3592 * it's good to maintain the separation between the log and the rest
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index d47b91f10822..8a3e84e900a3 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -134,6 +134,7 @@ typedef struct xfs_log_callback {
134#ifdef __KERNEL__ 134#ifdef __KERNEL__
135/* Log manager interfaces */ 135/* Log manager interfaces */
136struct xfs_mount; 136struct xfs_mount;
137struct xlog_ticket;
137xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 138xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
138 xfs_log_ticket_t ticket, 139 xfs_log_ticket_t ticket,
139 void **iclog, 140 void **iclog,
@@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp);
177 178
178void xlog_iodone(struct xfs_buf *); 179void xlog_iodone(struct xfs_buf *);
179 180
181struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket);
182void xfs_log_ticket_put(struct xlog_ticket *ticket);
183
180#endif 184#endif
181 185
182 186
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e7d8f84443fa..654167be0efb 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -245,6 +245,7 @@ typedef struct xlog_ticket {
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
248 atomic_t t_ref; /* ticket reference count : 4 */
248 int t_curr_res; /* current reservation in bytes : 4 */ 249 int t_curr_res; /* current reservation in bytes : 4 */
249 int t_unit_res; /* unit reservation in bytes : 4 */ 250 int t_unit_res; /* unit reservation in bytes : 4 */
250 char t_ocnt; /* original count : 1 */ 251 char t_ocnt; /* original count : 1 */
@@ -309,6 +310,16 @@ typedef struct xlog_rec_ext_header {
309} xlog_rec_ext_header_t; 310} xlog_rec_ext_header_t;
310 311
311#ifdef __KERNEL__ 312#ifdef __KERNEL__
313
314/*
315 * Quite misnamed, because this union lays out the actual on-disk log buffer.
316 */
317typedef union xlog_in_core2 {
318 xlog_rec_header_t hic_header;
319 xlog_rec_ext_header_t hic_xheader;
320 char hic_sector[XLOG_HEADER_SIZE];
321} xlog_in_core_2_t;
322
312/* 323/*
313 * - A log record header is 512 bytes. There is plenty of room to grow the 324 * - A log record header is 512 bytes. There is plenty of room to grow the
314 * xlog_rec_header_t into the reserved space. 325 * xlog_rec_header_t into the reserved space.
@@ -338,7 +349,7 @@ typedef struct xlog_rec_ext_header {
338 * We'll put all the read-only and l_icloglock fields in the first cacheline, 349 * We'll put all the read-only and l_icloglock fields in the first cacheline,
339 * and move everything else out to subsequent cachelines. 350 * and move everything else out to subsequent cachelines.
340 */ 351 */
341typedef struct xlog_iclog_fields { 352typedef struct xlog_in_core {
342 sv_t ic_force_wait; 353 sv_t ic_force_wait;
343 sv_t ic_write_wait; 354 sv_t ic_write_wait;
344 struct xlog_in_core *ic_next; 355 struct xlog_in_core *ic_next;
@@ -361,41 +372,11 @@ typedef struct xlog_iclog_fields {
361 372
362 /* reference counts need their own cacheline */ 373 /* reference counts need their own cacheline */
363 atomic_t ic_refcnt ____cacheline_aligned_in_smp; 374 atomic_t ic_refcnt ____cacheline_aligned_in_smp;
364} xlog_iclog_fields_t; 375 xlog_in_core_2_t *ic_data;
365 376#define ic_header ic_data->hic_header
366typedef union xlog_in_core2 {
367 xlog_rec_header_t hic_header;
368 xlog_rec_ext_header_t hic_xheader;
369 char hic_sector[XLOG_HEADER_SIZE];
370} xlog_in_core_2_t;
371
372typedef struct xlog_in_core {
373 xlog_iclog_fields_t hic_fields;
374 xlog_in_core_2_t *hic_data;
375} xlog_in_core_t; 377} xlog_in_core_t;
376 378
377/* 379/*
378 * Defines to save our code from this glop.
379 */
380#define ic_force_wait hic_fields.ic_force_wait
381#define ic_write_wait hic_fields.ic_write_wait
382#define ic_next hic_fields.ic_next
383#define ic_prev hic_fields.ic_prev
384#define ic_bp hic_fields.ic_bp
385#define ic_log hic_fields.ic_log
386#define ic_callback hic_fields.ic_callback
387#define ic_callback_lock hic_fields.ic_callback_lock
388#define ic_callback_tail hic_fields.ic_callback_tail
389#define ic_trace hic_fields.ic_trace
390#define ic_size hic_fields.ic_size
391#define ic_offset hic_fields.ic_offset
392#define ic_refcnt hic_fields.ic_refcnt
393#define ic_bwritecnt hic_fields.ic_bwritecnt
394#define ic_state hic_fields.ic_state
395#define ic_datap hic_fields.ic_datap
396#define ic_header hic_data->hic_header
397
398/*
399 * The reservation head lsn is not made up of a cycle number and block number. 380 * The reservation head lsn is not made up of a cycle number and block number.
400 * Instead, it uses a cycle number and byte number. Logs don't expect to 381 * Instead, it uses a cycle number and byte number. Logs don't expect to
401 * overflow 31 bits worth of byte offset, so using a byte number will mean 382 * overflow 31 bits worth of byte offset, so using a byte number will mean
@@ -404,6 +385,7 @@ typedef struct xlog_in_core {
404typedef struct log { 385typedef struct log {
405 /* The following fields don't need locking */ 386 /* The following fields don't need locking */
406 struct xfs_mount *l_mp; /* mount point */ 387 struct xfs_mount *l_mp; /* mount point */
388 struct xfs_ail *l_ailp; /* AIL log is working with */
407 struct xfs_buf *l_xbuf; /* extra buffer for log 389 struct xfs_buf *l_xbuf; /* extra buffer for log
408 * wrapping */ 390 * wrapping */
409 struct xfs_buftarg *l_targ; /* buftarg of log */ 391 struct xfs_buftarg *l_targ; /* buftarg of log */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 70e3ba32e6be..35cca98bd94c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -36,7 +36,6 @@
36#include "xfs_dinode.h" 36#include "xfs_dinode.h"
37#include "xfs_inode.h" 37#include "xfs_inode.h"
38#include "xfs_inode_item.h" 38#include "xfs_inode_item.h"
39#include "xfs_imap.h"
40#include "xfs_alloc.h" 39#include "xfs_alloc.h"
41#include "xfs_ialloc.h" 40#include "xfs_ialloc.h"
42#include "xfs_log_priv.h" 41#include "xfs_log_priv.h"
@@ -54,10 +53,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q,
54 xlog_recover_item_t *item); 53 xlog_recover_item_t *item);
55#if defined(DEBUG) 54#if defined(DEBUG)
56STATIC void xlog_recover_check_summary(xlog_t *); 55STATIC void xlog_recover_check_summary(xlog_t *);
57STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int);
58#else 56#else
59#define xlog_recover_check_summary(log) 57#define xlog_recover_check_summary(log)
60#define xlog_recover_check_ail(mp, lip, gen)
61#endif 58#endif
62 59
63 60
@@ -270,21 +267,16 @@ STATIC void
270xlog_recover_iodone( 267xlog_recover_iodone(
271 struct xfs_buf *bp) 268 struct xfs_buf *bp)
272{ 269{
273 xfs_mount_t *mp;
274
275 ASSERT(XFS_BUF_FSPRIVATE(bp, void *));
276
277 if (XFS_BUF_GETERROR(bp)) { 270 if (XFS_BUF_GETERROR(bp)) {
278 /* 271 /*
279 * We're not going to bother about retrying 272 * We're not going to bother about retrying
280 * this during recovery. One strike! 273 * this during recovery. One strike!
281 */ 274 */
282 mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *);
283 xfs_ioerror_alert("xlog_recover_iodone", 275 xfs_ioerror_alert("xlog_recover_iodone",
284 mp, bp, XFS_BUF_ADDR(bp)); 276 bp->b_mount, bp, XFS_BUF_ADDR(bp));
285 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 277 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
286 } 278 }
287 XFS_BUF_SET_FSPRIVATE(bp, NULL); 279 bp->b_mount = NULL;
288 XFS_BUF_CLR_IODONE_FUNC(bp); 280 XFS_BUF_CLR_IODONE_FUNC(bp);
289 xfs_biodone(bp); 281 xfs_biodone(bp);
290} 282}
@@ -2228,9 +2220,8 @@ xlog_recover_do_buffer_trans(
2228 XFS_BUF_STALE(bp); 2220 XFS_BUF_STALE(bp);
2229 error = xfs_bwrite(mp, bp); 2221 error = xfs_bwrite(mp, bp);
2230 } else { 2222 } else {
2231 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2223 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2232 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2224 bp->b_mount = mp;
2233 XFS_BUF_SET_FSPRIVATE(bp, mp);
2234 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2225 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2235 xfs_bdwrite(mp, bp); 2226 xfs_bdwrite(mp, bp);
2236 } 2227 }
@@ -2247,7 +2238,6 @@ xlog_recover_do_inode_trans(
2247 xfs_inode_log_format_t *in_f; 2238 xfs_inode_log_format_t *in_f;
2248 xfs_mount_t *mp; 2239 xfs_mount_t *mp;
2249 xfs_buf_t *bp; 2240 xfs_buf_t *bp;
2250 xfs_imap_t imap;
2251 xfs_dinode_t *dip; 2241 xfs_dinode_t *dip;
2252 xfs_ino_t ino; 2242 xfs_ino_t ino;
2253 int len; 2243 int len;
@@ -2275,54 +2265,35 @@ xlog_recover_do_inode_trans(
2275 } 2265 }
2276 ino = in_f->ilf_ino; 2266 ino = in_f->ilf_ino;
2277 mp = log->l_mp; 2267 mp = log->l_mp;
2278 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2279 imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno;
2280 imap.im_len = in_f->ilf_len;
2281 imap.im_boffset = in_f->ilf_boffset;
2282 } else {
2283 /*
2284 * It's an old inode format record. We don't know where
2285 * its cluster is located on disk, and we can't allow
2286 * xfs_imap() to figure it out because the inode btrees
2287 * are not ready to be used. Therefore do not pass the
2288 * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give
2289 * us only the single block in which the inode lives
2290 * rather than its cluster, so we must make sure to
2291 * invalidate the buffer when we write it out below.
2292 */
2293 imap.im_blkno = 0;
2294 error = xfs_imap(log->l_mp, NULL, ino, &imap, 0);
2295 if (error)
2296 goto error;
2297 }
2298 2268
2299 /* 2269 /*
2300 * Inode buffers can be freed, look out for it, 2270 * Inode buffers can be freed, look out for it,
2301 * and do not replay the inode. 2271 * and do not replay the inode.
2302 */ 2272 */
2303 if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { 2273 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2274 in_f->ilf_len, 0)) {
2304 error = 0; 2275 error = 0;
2305 goto error; 2276 goto error;
2306 } 2277 }
2307 2278
2308 bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, 2279 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno,
2309 XFS_BUF_LOCK); 2280 in_f->ilf_len, XFS_BUF_LOCK);
2310 if (XFS_BUF_ISERROR(bp)) { 2281 if (XFS_BUF_ISERROR(bp)) {
2311 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2282 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2312 bp, imap.im_blkno); 2283 bp, in_f->ilf_blkno);
2313 error = XFS_BUF_GETERROR(bp); 2284 error = XFS_BUF_GETERROR(bp);
2314 xfs_buf_relse(bp); 2285 xfs_buf_relse(bp);
2315 goto error; 2286 goto error;
2316 } 2287 }
2317 error = 0; 2288 error = 0;
2318 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2289 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2319 dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); 2290 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
2320 2291
2321 /* 2292 /*
2322 * Make sure the place we're flushing out to really looks 2293 * Make sure the place we're flushing out to really looks
2323 * like an inode! 2294 * like an inode!
2324 */ 2295 */
2325 if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) { 2296 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2326 xfs_buf_relse(bp); 2297 xfs_buf_relse(bp);
2327 xfs_fs_cmn_err(CE_ALERT, mp, 2298 xfs_fs_cmn_err(CE_ALERT, mp,
2328 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2299 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2345,12 +2316,12 @@ xlog_recover_do_inode_trans(
2345 } 2316 }
2346 2317
2347 /* Skip replay when the on disk inode is newer than the log one */ 2318 /* Skip replay when the on disk inode is newer than the log one */
2348 if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) { 2319 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2349 /* 2320 /*
2350 * Deal with the wrap case, DI_MAX_FLUSH is less 2321 * Deal with the wrap case, DI_MAX_FLUSH is less
2351 * than smaller numbers 2322 * than smaller numbers
2352 */ 2323 */
2353 if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH && 2324 if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
2354 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { 2325 dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
2355 /* do nothing */ 2326 /* do nothing */
2356 } else { 2327 } else {
@@ -2410,7 +2381,7 @@ xlog_recover_do_inode_trans(
2410 error = EFSCORRUPTED; 2381 error = EFSCORRUPTED;
2411 goto error; 2382 goto error;
2412 } 2383 }
2413 if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { 2384 if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) {
2414 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", 2385 XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)",
2415 XFS_ERRLEVEL_LOW, mp, dicp); 2386 XFS_ERRLEVEL_LOW, mp, dicp);
2416 xfs_buf_relse(bp); 2387 xfs_buf_relse(bp);
@@ -2422,23 +2393,24 @@ xlog_recover_do_inode_trans(
2422 } 2393 }
2423 2394
2424 /* The core is in in-core format */ 2395 /* The core is in in-core format */
2425 xfs_dinode_to_disk(&dip->di_core, 2396 xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr);
2426 (xfs_icdinode_t *)item->ri_buf[1].i_addr);
2427 2397
2428 /* the rest is in on-disk format */ 2398 /* the rest is in on-disk format */
2429 if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { 2399 if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) {
2430 memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), 2400 memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode),
2431 item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), 2401 item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode),
2432 item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); 2402 item->ri_buf[1].i_len - sizeof(struct xfs_icdinode));
2433 } 2403 }
2434 2404
2435 fields = in_f->ilf_fields; 2405 fields = in_f->ilf_fields;
2436 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { 2406 switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
2437 case XFS_ILOG_DEV: 2407 case XFS_ILOG_DEV:
2438 dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev); 2408 xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
2439 break; 2409 break;
2440 case XFS_ILOG_UUID: 2410 case XFS_ILOG_UUID:
2441 dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; 2411 memcpy(XFS_DFORK_DPTR(dip),
2412 &in_f->ilf_u.ilfu_uuid,
2413 sizeof(uuid_t));
2442 break; 2414 break;
2443 } 2415 }
2444 2416
@@ -2454,12 +2426,12 @@ xlog_recover_do_inode_trans(
2454 switch (fields & XFS_ILOG_DFORK) { 2426 switch (fields & XFS_ILOG_DFORK) {
2455 case XFS_ILOG_DDATA: 2427 case XFS_ILOG_DDATA:
2456 case XFS_ILOG_DEXT: 2428 case XFS_ILOG_DEXT:
2457 memcpy(&dip->di_u, src, len); 2429 memcpy(XFS_DFORK_DPTR(dip), src, len);
2458 break; 2430 break;
2459 2431
2460 case XFS_ILOG_DBROOT: 2432 case XFS_ILOG_DBROOT:
2461 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2433 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
2462 &(dip->di_u.di_bmbt), 2434 (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip),
2463 XFS_DFORK_DSIZE(dip, mp)); 2435 XFS_DFORK_DSIZE(dip, mp));
2464 break; 2436 break;
2465 2437
@@ -2496,8 +2468,8 @@ xlog_recover_do_inode_trans(
2496 2468
2497 case XFS_ILOG_ABROOT: 2469 case XFS_ILOG_ABROOT:
2498 dest = XFS_DFORK_APTR(dip); 2470 dest = XFS_DFORK_APTR(dip);
2499 xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, 2471 xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src,
2500 (xfs_bmdr_block_t*)dest, 2472 len, (xfs_bmdr_block_t*)dest,
2501 XFS_DFORK_ASIZE(dip, mp)); 2473 XFS_DFORK_ASIZE(dip, mp));
2502 break; 2474 break;
2503 2475
@@ -2512,9 +2484,8 @@ xlog_recover_do_inode_trans(
2512 2484
2513write_inode_buffer: 2485write_inode_buffer:
2514 if (ITEM_TYPE(item) == XFS_LI_INODE) { 2486 if (ITEM_TYPE(item) == XFS_LI_INODE) {
2515 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2487 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2516 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2488 bp->b_mount = mp;
2517 XFS_BUF_SET_FSPRIVATE(bp, mp);
2518 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2489 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2519 xfs_bdwrite(mp, bp); 2490 xfs_bdwrite(mp, bp);
2520 } else { 2491 } else {
@@ -2645,9 +2616,8 @@ xlog_recover_do_dquot_trans(
2645 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2616 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2646 2617
2647 ASSERT(dq_f->qlf_size == 2); 2618 ASSERT(dq_f->qlf_size == 2);
2648 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || 2619 ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
2649 XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); 2620 bp->b_mount = mp;
2650 XFS_BUF_SET_FSPRIVATE(bp, mp);
2651 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2621 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
2652 xfs_bdwrite(mp, bp); 2622 xfs_bdwrite(mp, bp);
2653 2623
@@ -2689,11 +2659,11 @@ xlog_recover_do_efi_trans(
2689 efip->efi_next_extent = efi_formatp->efi_nextents; 2659 efip->efi_next_extent = efi_formatp->efi_nextents;
2690 efip->efi_flags |= XFS_EFI_COMMITTED; 2660 efip->efi_flags |= XFS_EFI_COMMITTED;
2691 2661
2692 spin_lock(&mp->m_ail_lock); 2662 spin_lock(&log->l_ailp->xa_lock);
2693 /* 2663 /*
2694 * xfs_trans_update_ail() drops the AIL lock. 2664 * xfs_trans_ail_update() drops the AIL lock.
2695 */ 2665 */
2696 xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); 2666 xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn);
2697 return 0; 2667 return 0;
2698} 2668}
2699 2669
@@ -2712,12 +2682,12 @@ xlog_recover_do_efd_trans(
2712 xlog_recover_item_t *item, 2682 xlog_recover_item_t *item,
2713 int pass) 2683 int pass)
2714{ 2684{
2715 xfs_mount_t *mp;
2716 xfs_efd_log_format_t *efd_formatp; 2685 xfs_efd_log_format_t *efd_formatp;
2717 xfs_efi_log_item_t *efip = NULL; 2686 xfs_efi_log_item_t *efip = NULL;
2718 xfs_log_item_t *lip; 2687 xfs_log_item_t *lip;
2719 int gen;
2720 __uint64_t efi_id; 2688 __uint64_t efi_id;
2689 struct xfs_ail_cursor cur;
2690 struct xfs_ail *ailp = log->l_ailp;
2721 2691
2722 if (pass == XLOG_RECOVER_PASS1) { 2692 if (pass == XLOG_RECOVER_PASS1) {
2723 return; 2693 return;
@@ -2734,25 +2704,26 @@ xlog_recover_do_efd_trans(
2734 * Search for the efi with the id in the efd format structure 2704 * Search for the efi with the id in the efd format structure
2735 * in the AIL. 2705 * in the AIL.
2736 */ 2706 */
2737 mp = log->l_mp; 2707 spin_lock(&ailp->xa_lock);
2738 spin_lock(&mp->m_ail_lock); 2708 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
2739 lip = xfs_trans_first_ail(mp, &gen);
2740 while (lip != NULL) { 2709 while (lip != NULL) {
2741 if (lip->li_type == XFS_LI_EFI) { 2710 if (lip->li_type == XFS_LI_EFI) {
2742 efip = (xfs_efi_log_item_t *)lip; 2711 efip = (xfs_efi_log_item_t *)lip;
2743 if (efip->efi_format.efi_id == efi_id) { 2712 if (efip->efi_format.efi_id == efi_id) {
2744 /* 2713 /*
2745 * xfs_trans_delete_ail() drops the 2714 * xfs_trans_ail_delete() drops the
2746 * AIL lock. 2715 * AIL lock.
2747 */ 2716 */
2748 xfs_trans_delete_ail(mp, lip); 2717 xfs_trans_ail_delete(ailp, lip);
2749 xfs_efi_item_free(efip); 2718 xfs_efi_item_free(efip);
2750 return; 2719 spin_lock(&ailp->xa_lock);
2720 break;
2751 } 2721 }
2752 } 2722 }
2753 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 2723 lip = xfs_trans_ail_cursor_next(ailp, &cur);
2754 } 2724 }
2755 spin_unlock(&mp->m_ail_lock); 2725 xfs_trans_ail_cursor_done(ailp, &cur);
2726 spin_unlock(&ailp->xa_lock);
2756} 2727}
2757 2728
2758/* 2729/*
@@ -3036,33 +3007,6 @@ abort_error:
3036} 3007}
3037 3008
3038/* 3009/*
3039 * Verify that once we've encountered something other than an EFI
3040 * in the AIL that there are no more EFIs in the AIL.
3041 */
3042#if defined(DEBUG)
3043STATIC void
3044xlog_recover_check_ail(
3045 xfs_mount_t *mp,
3046 xfs_log_item_t *lip,
3047 int gen)
3048{
3049 int orig_gen = gen;
3050
3051 do {
3052 ASSERT(lip->li_type != XFS_LI_EFI);
3053 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3054 /*
3055 * The check will be bogus if we restart from the
3056 * beginning of the AIL, so ASSERT that we don't.
3057 * We never should since we're holding the AIL lock
3058 * the entire time.
3059 */
3060 ASSERT(gen == orig_gen);
3061 } while (lip != NULL);
3062}
3063#endif /* DEBUG */
3064
3065/*
3066 * When this is called, all of the EFIs which did not have 3010 * When this is called, all of the EFIs which did not have
3067 * corresponding EFDs should be in the AIL. What we do now 3011 * corresponding EFDs should be in the AIL. What we do now
3068 * is free the extents associated with each one. 3012 * is free the extents associated with each one.
@@ -3086,20 +3030,23 @@ xlog_recover_process_efis(
3086{ 3030{
3087 xfs_log_item_t *lip; 3031 xfs_log_item_t *lip;
3088 xfs_efi_log_item_t *efip; 3032 xfs_efi_log_item_t *efip;
3089 int gen;
3090 xfs_mount_t *mp;
3091 int error = 0; 3033 int error = 0;
3034 struct xfs_ail_cursor cur;
3035 struct xfs_ail *ailp;
3092 3036
3093 mp = log->l_mp; 3037 ailp = log->l_ailp;
3094 spin_lock(&mp->m_ail_lock); 3038 spin_lock(&ailp->xa_lock);
3095 3039 lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
3096 lip = xfs_trans_first_ail(mp, &gen);
3097 while (lip != NULL) { 3040 while (lip != NULL) {
3098 /* 3041 /*
3099 * We're done when we see something other than an EFI. 3042 * We're done when we see something other than an EFI.
3043 * There should be no EFIs left in the AIL now.
3100 */ 3044 */
3101 if (lip->li_type != XFS_LI_EFI) { 3045 if (lip->li_type != XFS_LI_EFI) {
3102 xlog_recover_check_ail(mp, lip, gen); 3046#ifdef DEBUG
3047 for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
3048 ASSERT(lip->li_type != XFS_LI_EFI);
3049#endif
3103 break; 3050 break;
3104 } 3051 }
3105 3052
@@ -3108,18 +3055,20 @@ xlog_recover_process_efis(
3108 */ 3055 */
3109 efip = (xfs_efi_log_item_t *)lip; 3056 efip = (xfs_efi_log_item_t *)lip;
3110 if (efip->efi_flags & XFS_EFI_RECOVERED) { 3057 if (efip->efi_flags & XFS_EFI_RECOVERED) {
3111 lip = xfs_trans_next_ail(mp, lip, &gen, NULL); 3058 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3112 continue; 3059 continue;
3113 } 3060 }
3114 3061
3115 spin_unlock(&mp->m_ail_lock); 3062 spin_unlock(&ailp->xa_lock);
3116 error = xlog_recover_process_efi(mp, efip); 3063 error = xlog_recover_process_efi(log->l_mp, efip);
3064 spin_lock(&ailp->xa_lock);
3117 if (error) 3065 if (error)
3118 return error; 3066 goto out;
3119 spin_lock(&mp->m_ail_lock); 3067 lip = xfs_trans_ail_cursor_next(ailp, &cur);
3120 lip = xfs_trans_next_ail(mp, lip, &gen, NULL);
3121 } 3068 }
3122 spin_unlock(&mp->m_ail_lock); 3069out:
3070 xfs_trans_ail_cursor_done(ailp, &cur);
3071 spin_unlock(&ailp->xa_lock);
3123 return error; 3072 return error;
3124} 3073}
3125 3074
@@ -3140,19 +3089,16 @@ xlog_recover_clear_agi_bucket(
3140 int error; 3089 int error;
3141 3090
3142 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 3091 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
3143 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); 3092 error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
3144 if (!error) 3093 0, 0, 0);
3145 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3146 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
3147 XFS_FSS_TO_BB(mp, 1), 0, &agibp);
3148 if (error) 3094 if (error)
3149 goto out_abort; 3095 goto out_abort;
3150 3096
3151 error = EINVAL; 3097 error = xfs_read_agi(mp, tp, agno, &agibp);
3152 agi = XFS_BUF_TO_AGI(agibp); 3098 if (error)
3153 if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC)
3154 goto out_abort; 3099 goto out_abort;
3155 3100
3101 agi = XFS_BUF_TO_AGI(agibp);
3156 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 3102 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
3157 offset = offsetof(xfs_agi_t, agi_unlinked) + 3103 offset = offsetof(xfs_agi_t, agi_unlinked) +
3158 (sizeof(xfs_agino_t) * bucket); 3104 (sizeof(xfs_agino_t) * bucket);
@@ -3172,6 +3118,62 @@ out_error:
3172 return; 3118 return;
3173} 3119}
3174 3120
3121STATIC xfs_agino_t
3122xlog_recover_process_one_iunlink(
3123 struct xfs_mount *mp,
3124 xfs_agnumber_t agno,
3125 xfs_agino_t agino,
3126 int bucket)
3127{
3128 struct xfs_buf *ibp;
3129 struct xfs_dinode *dip;
3130 struct xfs_inode *ip;
3131 xfs_ino_t ino;
3132 int error;
3133
3134 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3135 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
3136 if (error)
3137 goto fail;
3138
3139 /*
3140 * Get the on disk inode to find the next inode in the bucket.
3141 */
3142 error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK);
3143 if (error)
3144 goto fail_iput;
3145
3146 ASSERT(ip->i_d.di_nlink == 0);
3147 ASSERT(ip->i_d.di_mode != 0);
3148
3149 /* setup for the next pass */
3150 agino = be32_to_cpu(dip->di_next_unlinked);
3151 xfs_buf_relse(ibp);
3152
3153 /*
3154 * Prevent any DMAPI event from being sent when the reference on
3155 * the inode is dropped.
3156 */
3157 ip->i_d.di_dmevmask = 0;
3158
3159 IRELE(ip);
3160 return agino;
3161
3162 fail_iput:
3163 IRELE(ip);
3164 fail:
3165 /*
3166 * We can't read in the inode this bucket points to, or this inode
3167 * is messed up. Just ditch this bucket of inodes. We will lose
3168 * some inodes and space, but at least we won't hang.
3169 *
3170 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
3171 * clear the inode pointer in the bucket.
3172 */
3173 xlog_recover_clear_agi_bucket(mp, agno, bucket);
3174 return NULLAGINO;
3175}
3176
3175/* 3177/*
3176 * xlog_iunlink_recover 3178 * xlog_iunlink_recover
3177 * 3179 *
@@ -3192,11 +3194,7 @@ xlog_recover_process_iunlinks(
3192 xfs_agnumber_t agno; 3194 xfs_agnumber_t agno;
3193 xfs_agi_t *agi; 3195 xfs_agi_t *agi;
3194 xfs_buf_t *agibp; 3196 xfs_buf_t *agibp;
3195 xfs_buf_t *ibp;
3196 xfs_dinode_t *dip;
3197 xfs_inode_t *ip;
3198 xfs_agino_t agino; 3197 xfs_agino_t agino;
3199 xfs_ino_t ino;
3200 int bucket; 3198 int bucket;
3201 int error; 3199 int error;
3202 uint mp_dmevmask; 3200 uint mp_dmevmask;
@@ -3213,22 +3211,21 @@ xlog_recover_process_iunlinks(
3213 /* 3211 /*
3214 * Find the agi for this ag. 3212 * Find the agi for this ag.
3215 */ 3213 */
3216 agibp = xfs_buf_read(mp->m_ddev_targp, 3214 error = xfs_read_agi(mp, NULL, agno, &agibp);
3217 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 3215 if (error) {
3218 XFS_FSS_TO_BB(mp, 1), 0); 3216 /*
3219 if (XFS_BUF_ISERROR(agibp)) { 3217 * AGI is b0rked. Don't process it.
3220 xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", 3218 *
3221 log->l_mp, agibp, 3219 * We should probably mark the filesystem as corrupt
3222 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); 3220 * after we've recovered all the ag's we can....
3221 */
3222 continue;
3223 } 3223 }
3224 agi = XFS_BUF_TO_AGI(agibp); 3224 agi = XFS_BUF_TO_AGI(agibp);
3225 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum));
3226 3225
3227 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 3226 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
3228
3229 agino = be32_to_cpu(agi->agi_unlinked[bucket]); 3227 agino = be32_to_cpu(agi->agi_unlinked[bucket]);
3230 while (agino != NULLAGINO) { 3228 while (agino != NULLAGINO) {
3231
3232 /* 3229 /*
3233 * Release the agi buffer so that it can 3230 * Release the agi buffer so that it can
3234 * be acquired in the normal course of the 3231 * be acquired in the normal course of the
@@ -3236,87 +3233,17 @@ xlog_recover_process_iunlinks(
3236 */ 3233 */
3237 xfs_buf_relse(agibp); 3234 xfs_buf_relse(agibp);
3238 3235
3239 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3236 agino = xlog_recover_process_one_iunlink(mp,
3240 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3237 agno, agino, bucket);
3241 ASSERT(error || (ip != NULL));
3242
3243 if (!error) {
3244 /*
3245 * Get the on disk inode to find the
3246 * next inode in the bucket.
3247 */
3248 error = xfs_itobp(mp, NULL, ip, &dip,
3249 &ibp, 0, 0,
3250 XFS_BUF_LOCK);
3251 ASSERT(error || (dip != NULL));
3252 }
3253
3254 if (!error) {
3255 ASSERT(ip->i_d.di_nlink == 0);
3256
3257 /* setup for the next pass */
3258 agino = be32_to_cpu(
3259 dip->di_next_unlinked);
3260 xfs_buf_relse(ibp);
3261 /*
3262 * Prevent any DMAPI event from
3263 * being sent when the
3264 * reference on the inode is
3265 * dropped.
3266 */
3267 ip->i_d.di_dmevmask = 0;
3268
3269 /*
3270 * If this is a new inode, handle
3271 * it specially. Otherwise,
3272 * just drop our reference to the
3273 * inode. If there are no
3274 * other references, this will
3275 * send the inode to
3276 * xfs_inactive() which will
3277 * truncate the file and free
3278 * the inode.
3279 */
3280 if (ip->i_d.di_mode == 0)
3281 xfs_iput_new(ip, 0);
3282 else
3283 IRELE(ip);
3284 } else {
3285 /*
3286 * We can't read in the inode
3287 * this bucket points to, or
3288 * this inode is messed up. Just
3289 * ditch this bucket of inodes. We
3290 * will lose some inodes and space,
3291 * but at least we won't hang. Call
3292 * xlog_recover_clear_agi_bucket()
3293 * to perform a transaction to clear
3294 * the inode pointer in the bucket.
3295 */
3296 xlog_recover_clear_agi_bucket(mp, agno,
3297 bucket);
3298
3299 agino = NULLAGINO;
3300 }
3301 3238
3302 /* 3239 /*
3303 * Reacquire the agibuffer and continue around 3240 * Reacquire the agibuffer and continue around
3304 * the loop. 3241 * the loop. This should never fail as we know
3242 * the buffer was good earlier on.
3305 */ 3243 */
3306 agibp = xfs_buf_read(mp->m_ddev_targp, 3244 error = xfs_read_agi(mp, NULL, agno, &agibp);
3307 XFS_AG_DADDR(mp, agno, 3245 ASSERT(error == 0);
3308 XFS_AGI_DADDR(mp)),
3309 XFS_FSS_TO_BB(mp, 1), 0);
3310 if (XFS_BUF_ISERROR(agibp)) {
3311 xfs_ioerror_alert(
3312 "xlog_recover_process_iunlinks(#2)",
3313 log->l_mp, agibp,
3314 XFS_AG_DADDR(mp, agno,
3315 XFS_AGI_DADDR(mp)));
3316 }
3317 agi = XFS_BUF_TO_AGI(agibp); 3246 agi = XFS_BUF_TO_AGI(agibp);
3318 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(
3319 agi->agi_magicnum));
3320 } 3247 }
3321 } 3248 }
3322 3249
@@ -3367,7 +3294,6 @@ xlog_pack_data(
3367 int size = iclog->ic_offset + roundoff; 3294 int size = iclog->ic_offset + roundoff;
3368 __be32 cycle_lsn; 3295 __be32 cycle_lsn;
3369 xfs_caddr_t dp; 3296 xfs_caddr_t dp;
3370 xlog_in_core_2_t *xhdr;
3371 3297
3372 xlog_pack_data_checksum(log, iclog, size); 3298 xlog_pack_data_checksum(log, iclog, size);
3373 3299
@@ -3382,7 +3308,8 @@ xlog_pack_data(
3382 } 3308 }
3383 3309
3384 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3310 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3385 xhdr = (xlog_in_core_2_t *)&iclog->ic_header; 3311 xlog_in_core_2_t *xhdr = iclog->ic_data;
3312
3386 for ( ; i < BTOBB(size); i++) { 3313 for ( ; i < BTOBB(size); i++) {
3387 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3314 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3388 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3315 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3440,7 +3367,6 @@ xlog_unpack_data(
3440 xlog_t *log) 3367 xlog_t *log)
3441{ 3368{
3442 int i, j, k; 3369 int i, j, k;
3443 xlog_in_core_2_t *xhdr;
3444 3370
3445 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3371 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3446 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3372 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
@@ -3449,7 +3375,7 @@ xlog_unpack_data(
3449 } 3375 }
3450 3376
3451 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 3377 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3452 xhdr = (xlog_in_core_2_t *)rhead; 3378 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
3453 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 3379 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
3454 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3380 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3455 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3381 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -4003,11 +3929,8 @@ xlog_recover_check_summary(
4003{ 3929{
4004 xfs_mount_t *mp; 3930 xfs_mount_t *mp;
4005 xfs_agf_t *agfp; 3931 xfs_agf_t *agfp;
4006 xfs_agi_t *agip;
4007 xfs_buf_t *agfbp; 3932 xfs_buf_t *agfbp;
4008 xfs_buf_t *agibp; 3933 xfs_buf_t *agibp;
4009 xfs_daddr_t agfdaddr;
4010 xfs_daddr_t agidaddr;
4011 xfs_buf_t *sbbp; 3934 xfs_buf_t *sbbp;
4012#ifdef XFS_LOUD_RECOVERY 3935#ifdef XFS_LOUD_RECOVERY
4013 xfs_sb_t *sbp; 3936 xfs_sb_t *sbp;
@@ -4016,6 +3939,7 @@ xlog_recover_check_summary(
4016 __uint64_t freeblks; 3939 __uint64_t freeblks;
4017 __uint64_t itotal; 3940 __uint64_t itotal;
4018 __uint64_t ifree; 3941 __uint64_t ifree;
3942 int error;
4019 3943
4020 mp = log->l_mp; 3944 mp = log->l_mp;
4021 3945
@@ -4023,37 +3947,27 @@ xlog_recover_check_summary(
4023 itotal = 0LL; 3947 itotal = 0LL;
4024 ifree = 0LL; 3948 ifree = 0LL;
4025 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3949 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
4026 agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); 3950 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
4027 agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, 3951 if (error) {
4028 XFS_FSS_TO_BB(mp, 1), 0); 3952 xfs_fs_cmn_err(CE_ALERT, mp,
4029 if (XFS_BUF_ISERROR(agfbp)) { 3953 "xlog_recover_check_summary(agf)"
4030 xfs_ioerror_alert("xlog_recover_check_summary(agf)", 3954 "agf read failed agno %d error %d",
4031 mp, agfbp, agfdaddr); 3955 agno, error);
4032 } 3956 } else {
4033 agfp = XFS_BUF_TO_AGF(agfbp); 3957 agfp = XFS_BUF_TO_AGF(agfbp);
4034 ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); 3958 freeblks += be32_to_cpu(agfp->agf_freeblks) +
4035 ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); 3959 be32_to_cpu(agfp->agf_flcount);
4036 ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); 3960 xfs_buf_relse(agfbp);
4037
4038 freeblks += be32_to_cpu(agfp->agf_freeblks) +
4039 be32_to_cpu(agfp->agf_flcount);
4040 xfs_buf_relse(agfbp);
4041
4042 agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp));
4043 agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr,
4044 XFS_FSS_TO_BB(mp, 1), 0);
4045 if (XFS_BUF_ISERROR(agibp)) {
4046 xfs_ioerror_alert("xlog_recover_check_summary(agi)",
4047 mp, agibp, agidaddr);
4048 } 3961 }
4049 agip = XFS_BUF_TO_AGI(agibp);
4050 ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum));
4051 ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum)));
4052 ASSERT(be32_to_cpu(agip->agi_seqno) == agno);
4053 3962
4054 itotal += be32_to_cpu(agip->agi_count); 3963 error = xfs_read_agi(mp, NULL, agno, &agibp);
4055 ifree += be32_to_cpu(agip->agi_freecount); 3964 if (!error) {
4056 xfs_buf_relse(agibp); 3965 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3966
3967 itotal += be32_to_cpu(agi->agi_count);
3968 ifree += be32_to_cpu(agi->agi_freecount);
3969 xfs_buf_relse(agibp);
3970 }
4057 } 3971 }
4058 3972
4059 sbbp = xfs_getsb(mp, 0); 3973 sbbp = xfs_getsb(mp, 0);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 15f5dd22fbb2..3c97c6463a4e 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags)
567STATIC void 567STATIC void
568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 568xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
569{ 569{
570 int i;
571
572 mp->m_agfrotor = mp->m_agirotor = 0; 570 mp->m_agfrotor = mp->m_agirotor = 0;
573 spin_lock_init(&mp->m_agirotor_lock); 571 spin_lock_init(&mp->m_agirotor_lock);
574 mp->m_maxagi = mp->m_sb.sb_agcount; 572 mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -577,12 +575,10 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
577 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 575 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
578 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 576 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
579 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 577 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
580 mp->m_litino = sbp->sb_inodesize - 578 mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode);
581 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
582 mp->m_blockmask = sbp->sb_blocksize - 1; 579 mp->m_blockmask = sbp->sb_blocksize - 1;
583 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 580 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
584 mp->m_blockwmask = mp->m_blockwsize - 1; 581 mp->m_blockwmask = mp->m_blockwsize - 1;
585 INIT_LIST_HEAD(&mp->m_del_inodes);
586 582
587 /* 583 /*
588 * Setup for attributes, in case they get created. 584 * Setup for attributes, in case they get created.
@@ -605,24 +601,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
605 } 601 }
606 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 602 ASSERT(mp->m_attroffset < XFS_LITINO(mp));
607 603
608 for (i = 0; i < 2; i++) { 604 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
609 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 605 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
610 xfs_alloc, i == 0); 606 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
611 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 607 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
612 xfs_alloc, i == 0); 608
613 } 609 mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
614 for (i = 0; i < 2; i++) { 610 mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
615 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 611 mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
616 xfs_bmbt, i == 0); 612 mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
617 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 613
618 xfs_bmbt, i == 0); 614 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
619 } 615 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
620 for (i = 0; i < 2; i++) { 616 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
621 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 617 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
622 xfs_inobt, i == 0);
623 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
624 xfs_inobt, i == 0);
625 }
626 618
627 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 619 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
628 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 620 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
@@ -1228,6 +1220,16 @@ xfs_unmountfs(
1228 __uint64_t resblks; 1220 __uint64_t resblks;
1229 int error; 1221 int error;
1230 1222
1223 /*
1224 * Release dquot that rootinode, rbmino and rsumino might be holding,
1225 * and release the quota inodes.
1226 */
1227 XFS_QM_UNMOUNT(mp);
1228
1229 if (mp->m_rbmip)
1230 IRELE(mp->m_rbmip);
1231 if (mp->m_rsumip)
1232 IRELE(mp->m_rsumip);
1231 IRELE(mp->m_rootip); 1233 IRELE(mp->m_rootip);
1232 1234
1233 /* 1235 /*
@@ -1241,7 +1243,7 @@ xfs_unmountfs(
1241 * need to force the log first. 1243 * need to force the log first.
1242 */ 1244 */
1243 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1245 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1244 xfs_iflush_all(mp); 1246 xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
1245 1247
1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1248 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1247 1249
@@ -1288,11 +1290,6 @@ xfs_unmountfs(
1288 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1290 xfs_unmountfs_wait(mp); /* wait for async bufs */
1289 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1291 xfs_log_unmount(mp); /* Done! No more fs ops. */
1290 1292
1291 /*
1292 * All inodes from this mount point should be freed.
1293 */
1294 ASSERT(mp->m_inodes == NULL);
1295
1296 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1293 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1297 uuid_table_remove(&mp->m_sb.sb_uuid); 1294 uuid_table_remove(&mp->m_sb.sb_uuid);
1298 1295
@@ -1365,24 +1362,6 @@ xfs_log_sbcount(
1365 return error; 1362 return error;
1366} 1363}
1367 1364
1368STATIC void
1369xfs_mark_shared_ro(
1370 xfs_mount_t *mp,
1371 xfs_buf_t *bp)
1372{
1373 xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp);
1374 __uint16_t version;
1375
1376 if (!(sb->sb_flags & XFS_SBF_READONLY))
1377 sb->sb_flags |= XFS_SBF_READONLY;
1378
1379 version = be16_to_cpu(sb->sb_versionnum);
1380 if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 ||
1381 !(version & XFS_SB_VERSION_SHAREDBIT))
1382 version |= XFS_SB_VERSION_SHAREDBIT;
1383 sb->sb_versionnum = cpu_to_be16(version);
1384}
1385
1386int 1365int
1387xfs_unmountfs_writesb(xfs_mount_t *mp) 1366xfs_unmountfs_writesb(xfs_mount_t *mp)
1388{ 1367{
@@ -1398,12 +1377,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1398 1377
1399 sbp = xfs_getsb(mp, 0); 1378 sbp = xfs_getsb(mp, 0);
1400 1379
1401 /*
1402 * mark shared-readonly if desired
1403 */
1404 if (mp->m_mk_sharedro)
1405 xfs_mark_shared_ro(mp, sbp);
1406
1407 XFS_BUF_UNDONE(sbp); 1380 XFS_BUF_UNDONE(sbp);
1408 XFS_BUF_UNREAD(sbp); 1381 XFS_BUF_UNREAD(sbp);
1409 XFS_BUF_UNDELAYWRITE(sbp); 1382 XFS_BUF_UNDELAYWRITE(sbp);
@@ -1415,8 +1388,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1415 if (error) 1388 if (error)
1416 xfs_ioerror_alert("xfs_unmountfs_writesb", 1389 xfs_ioerror_alert("xfs_unmountfs_writesb",
1417 mp, sbp, XFS_BUF_ADDR(sbp)); 1390 mp, sbp, XFS_BUF_ADDR(sbp));
1418 if (error && mp->m_mk_sharedro)
1419 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly");
1420 xfs_buf_relse(sbp); 1391 xfs_buf_relse(sbp);
1421 } 1392 }
1422 return error; 1393 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f3c1024b1241..c1e028467327 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,7 +18,6 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21
22typedef struct xfs_trans_reservations { 21typedef struct xfs_trans_reservations {
23 uint tr_write; /* extent alloc trans */ 22 uint tr_write; /* extent alloc trans */
24 uint tr_itruncate; /* truncate trans */ 23 uint tr_itruncate; /* truncate trans */
@@ -44,14 +43,16 @@ typedef struct xfs_trans_reservations {
44} xfs_trans_reservations_t; 43} xfs_trans_reservations_t;
45 44
46#ifndef __KERNEL__ 45#ifndef __KERNEL__
47/* 46
48 * Moved here from xfs_ag.h to avoid reordering header files
49 */
50#define XFS_DADDR_TO_AGNO(mp,d) \ 47#define XFS_DADDR_TO_AGNO(mp,d) \
51 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) 48 ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
52#define XFS_DADDR_TO_AGBNO(mp,d) \ 49#define XFS_DADDR_TO_AGBNO(mp,d) \
53 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) 50 ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
54#else 51
52#else /* __KERNEL__ */
53
54#include "xfs_sync.h"
55
55struct cred; 56struct cred;
56struct log; 57struct log;
57struct xfs_mount_args; 58struct xfs_mount_args;
@@ -62,6 +63,7 @@ struct xfs_extdelta;
62struct xfs_swapext; 63struct xfs_swapext;
63struct xfs_mru_cache; 64struct xfs_mru_cache;
64struct xfs_nameops; 65struct xfs_nameops;
66struct xfs_ail;
65 67
66/* 68/*
67 * Prototypes and functions for the Data Migration subsystem. 69 * Prototypes and functions for the Data Migration subsystem.
@@ -115,7 +117,7 @@ struct xfs_quotainfo;
115 117
116typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); 118typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
117typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); 119typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
118typedef int (*xfs_qmunmount_t)(struct xfs_mount *); 120typedef void (*xfs_qmunmount_t)(struct xfs_mount *);
119typedef void (*xfs_qmdone_t)(struct xfs_mount *); 121typedef void (*xfs_qmdone_t)(struct xfs_mount *);
120typedef void (*xfs_dqrele_t)(struct xfs_dquot *); 122typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
121typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); 123typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint);
@@ -132,7 +134,7 @@ typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
132 struct xfs_dquot **, struct xfs_dquot *); 134 struct xfs_dquot **, struct xfs_dquot *);
133typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, 135typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
134 struct xfs_dquot *, struct xfs_dquot *, uint); 136 struct xfs_dquot *, struct xfs_dquot *, uint);
135typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, bhv_statvfs_t *); 137typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
136typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); 138typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
137typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t); 139typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t);
138 140
@@ -223,18 +225,10 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 225#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
224#endif 226#endif
225 227
226typedef struct xfs_ail {
227 struct list_head xa_ail;
228 uint xa_gen;
229 struct task_struct *xa_task;
230 xfs_lsn_t xa_target;
231} xfs_ail_t;
232
233typedef struct xfs_mount { 228typedef struct xfs_mount {
234 struct super_block *m_super; 229 struct super_block *m_super;
235 xfs_tid_t m_tid; /* next unused tid for fs */ 230 xfs_tid_t m_tid; /* next unused tid for fs */
236 spinlock_t m_ail_lock; /* fs AIL mutex */ 231 struct xfs_ail *m_ail; /* fs active log item list */
237 xfs_ail_t m_ail; /* fs active log item list */
238 xfs_sb_t m_sb; /* copy of fs superblock */ 232 xfs_sb_t m_sb; /* copy of fs superblock */
239 spinlock_t m_sb_lock; /* sb counter lock */ 233 spinlock_t m_sb_lock; /* sb counter lock */
240 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 234 struct xfs_buf *m_sb_bp; /* buffer for superblock */
@@ -247,10 +241,6 @@ typedef struct xfs_mount {
247 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ 241 xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
248 spinlock_t m_agirotor_lock;/* .. and lock protecting it */ 242 spinlock_t m_agirotor_lock;/* .. and lock protecting it */
249 xfs_agnumber_t m_maxagi; /* highest inode alloc group */ 243 xfs_agnumber_t m_maxagi; /* highest inode alloc group */
250 struct xfs_inode *m_inodes; /* active inode list */
251 struct list_head m_del_inodes; /* inodes to reclaim */
252 mutex_t m_ilock; /* inode list mutex */
253 uint m_ireclaims; /* count of calls to reclaim*/
254 uint m_readio_log; /* min read size log bytes */ 244 uint m_readio_log; /* min read size log bytes */
255 uint m_readio_blocks; /* min read size blocks */ 245 uint m_readio_blocks; /* min read size blocks */
256 uint m_writeio_log; /* min write size log bytes */ 246 uint m_writeio_log; /* min write size log bytes */
@@ -267,7 +257,6 @@ typedef struct xfs_mount {
267 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ 257 xfs_buftarg_t *m_ddev_targp; /* saves taking the address */
268 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ 258 xfs_buftarg_t *m_logdev_targp;/* ptr to log device */
269 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ 259 xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */
270 __uint8_t m_dircook_elog; /* log d-cookie entry bits */
271 __uint8_t m_blkbit_log; /* blocklog + NBBY */ 260 __uint8_t m_blkbit_log; /* blocklog + NBBY */
272 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 261 __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
273 __uint8_t m_agno_log; /* log #ag's */ 262 __uint8_t m_agno_log; /* log #ag's */
@@ -276,12 +265,12 @@ typedef struct xfs_mount {
276 uint m_blockmask; /* sb_blocksize-1 */ 265 uint m_blockmask; /* sb_blocksize-1 */
277 uint m_blockwsize; /* sb_blocksize in words */ 266 uint m_blockwsize; /* sb_blocksize in words */
278 uint m_blockwmask; /* blockwsize-1 */ 267 uint m_blockwmask; /* blockwsize-1 */
279 uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ 268 uint m_alloc_mxr[2]; /* max alloc btree records */
280 uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ 269 uint m_alloc_mnr[2]; /* min alloc btree records */
281 uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ 270 uint m_bmap_dmxr[2]; /* max bmap btree records */
282 uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ 271 uint m_bmap_dmnr[2]; /* min bmap btree records */
283 uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ 272 uint m_inobt_mxr[2]; /* max inobt btree records */
284 uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ 273 uint m_inobt_mnr[2]; /* min inobt btree records */
285 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 274 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
286 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 275 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
287 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ 276 uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */
@@ -312,9 +301,6 @@ typedef struct xfs_mount {
312 int m_sinoalign; /* stripe unit inode alignment */ 301 int m_sinoalign; /* stripe unit inode alignment */
313 int m_attr_magicpct;/* 37% of the blocksize */ 302 int m_attr_magicpct;/* 37% of the blocksize */
314 int m_dir_magicpct; /* 37% of the dir blocksize */ 303 int m_dir_magicpct; /* 37% of the dir blocksize */
315 __uint8_t m_mk_sharedro; /* mark shared ro on unmount */
316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
317 field governed by m_ilock */
318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 304 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 305 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
320 int m_dirblksize; /* directory block sz--bytes */ 306 int m_dirblksize; /* directory block sz--bytes */
@@ -362,7 +348,6 @@ typedef struct xfs_mount {
362#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ 348#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
363#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ 349#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
364#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ 350#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
365#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */
366#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ 351#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
367#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */ 352#define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */
368 /* osyncisdsync is now default*/ 353 /* osyncisdsync is now default*/
@@ -439,6 +424,16 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
439#define xfs_force_shutdown(m,f) \ 424#define xfs_force_shutdown(m,f) \
440 xfs_do_force_shutdown(m, f, __FILE__, __LINE__) 425 xfs_do_force_shutdown(m, f, __FILE__, __LINE__)
441 426
427#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
428#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
429#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */
430#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */
431#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
432#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
433
434#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
435#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
436
442/* 437/*
443 * Flags for xfs_mountfs 438 * Flags for xfs_mountfs
444 */ 439 */
@@ -508,14 +503,12 @@ typedef struct xfs_mod_sb {
508#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 503#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
509#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 504#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
510 505
511extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
512extern int xfs_log_sbcount(xfs_mount_t *, uint); 506extern int xfs_log_sbcount(xfs_mount_t *, uint);
513extern int xfs_mountfs(xfs_mount_t *mp); 507extern int xfs_mountfs(xfs_mount_t *mp);
514extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 508extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
515 509
516extern void xfs_unmountfs(xfs_mount_t *); 510extern void xfs_unmountfs(xfs_mount_t *);
517extern int xfs_unmountfs_writesb(xfs_mount_t *); 511extern int xfs_unmountfs_writesb(xfs_mount_t *);
518extern int xfs_unmount_flush(xfs_mount_t *, int);
519extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 512extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
520extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, 513extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
521 int64_t, int); 514 int64_t, int);
@@ -525,20 +518,20 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
525extern int xfs_readsb(xfs_mount_t *, int); 518extern int xfs_readsb(xfs_mount_t *, int);
526extern void xfs_freesb(xfs_mount_t *); 519extern void xfs_freesb(xfs_mount_t *);
527extern int xfs_fs_writable(xfs_mount_t *); 520extern int xfs_fs_writable(xfs_mount_t *);
528extern int xfs_syncsub(xfs_mount_t *, int, int *);
529extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
530extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t);
531extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
532extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
533extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); 521extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
534 522
535extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); 523extern int xfs_dmops_get(struct xfs_mount *);
536extern void xfs_dmops_put(struct xfs_mount *); 524extern void xfs_dmops_put(struct xfs_mount *);
537extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); 525extern int xfs_qmops_get(struct xfs_mount *);
538extern void xfs_qmops_put(struct xfs_mount *); 526extern void xfs_qmops_put(struct xfs_mount *);
539 527
540extern struct xfs_dmops xfs_dmcore_xfs; 528extern struct xfs_dmops xfs_dmcore_xfs;
541 529
542#endif /* __KERNEL__ */ 530#endif /* __KERNEL__ */
543 531
532extern void xfs_mod_sb(struct xfs_trans *, __int64_t);
533extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t);
534extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
535extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
536
544#endif /* __XFS_MOUNT_H__ */ 537#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index a294e58db8dd..27f80581520a 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -28,7 +28,6 @@
28#include "xfs_mount.h" 28#include "xfs_mount.h"
29#include "xfs_quota.h" 29#include "xfs_quota.h"
30#include "xfs_error.h" 30#include "xfs_error.h"
31#include "xfs_clnt.h"
32 31
33 32
34STATIC struct xfs_dquot * 33STATIC struct xfs_dquot *
@@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = {
131}; 130};
132 131
133int 132int
134xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) 133xfs_qmops_get(struct xfs_mount *mp)
135{ 134{
136 if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { 135 if (XFS_IS_QUOTA_RUNNING(mp)) {
137#ifdef CONFIG_XFS_QUOTA 136#ifdef CONFIG_XFS_QUOTA
138 mp->m_qm_ops = &xfs_qmcore_xfs; 137 mp->m_qm_ops = &xfs_qmcore_xfs;
139#else 138#else
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 12c4ec775af8..48965ecaa155 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -84,11 +84,9 @@ typedef struct xfs_dqblk {
84#define XFS_DQ_USER 0x0001 /* a user quota */ 84#define XFS_DQ_USER 0x0001 /* a user quota */
85#define XFS_DQ_PROJ 0x0002 /* project quota */ 85#define XFS_DQ_PROJ 0x0002 /* project quota */
86#define XFS_DQ_GROUP 0x0004 /* a group quota */ 86#define XFS_DQ_GROUP 0x0004 /* a group quota */
87#define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ 87#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
88#define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ 88#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */
89#define XFS_DQ_WANT 0x0020 /* for lookup/reclaim race */ 89#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
90#define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */
91#define XFS_DQ_MARKER 0x0080 /* sentinel */
92 90
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 91#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 92
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index c903130be7fd..86471bb40fd4 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -42,31 +42,6 @@
42 42
43 43
44/* 44/*
45 * Given an array of up to 4 inode pointers, unlock the pointed to inodes.
46 * If there are fewer than 4 entries in the array, the empty entries will
47 * be at the end and will have NULL pointers in them.
48 */
49STATIC void
50xfs_rename_unlock4(
51 xfs_inode_t **i_tab,
52 uint lock_mode)
53{
54 int i;
55
56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL)
59 break;
60
61 /*
62 * Watch out for duplicate entries in the table.
63 */
64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67}
68
69/*
70 * Enter all inodes for a rename transaction into a sorted array. 45 * Enter all inodes for a rename transaction into a sorted array.
71 */ 46 */
72STATIC void 47STATIC void
@@ -205,19 +180,6 @@ xfs_rename(
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 180 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206 181
207 /* 182 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
211 */
212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
219
220 /*
221 * Join all the inodes to the transaction. From this point on, 183 * Join all the inodes to the transaction. From this point on,
222 * we can rely on either trans_commit or trans_cancel to unlock 184 * we can rely on either trans_commit or trans_cancel to unlock
223 * them. Note that we need to add a vnode reference to the 185 * them. Note that we need to add a vnode reference to the
@@ -242,6 +204,17 @@ xfs_rename(
242 } 204 }
243 205
244 /* 206 /*
207 * If we are using project inheritance, we only allow renames
208 * into our tree when the project IDs are the same; else the
209 * tree quota mechanism would be circumvented.
210 */
211 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
212 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
213 error = XFS_ERROR(EXDEV);
214 goto error_return;
215 }
216
217 /*
245 * Set up the target. 218 * Set up the target.
246 */ 219 */
247 if (target_ip == NULL) { 220 if (target_ip == NULL) {
@@ -367,19 +340,11 @@ xfs_rename(
367 &first_block, &free_list, spaceres); 340 &first_block, &free_list, spaceres);
368 if (error) 341 if (error)
369 goto abort_return; 342 goto abort_return;
370 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
371 343
372 /* 344 xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
373 * Update the generation counts on all the directory inodes
374 * that we're modifying.
375 */
376 src_dp->i_gen++;
377 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); 345 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
378 346 if (new_parent)
379 if (new_parent) {
380 target_dp->i_gen++;
381 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); 347 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
382 }
383 348
384 /* 349 /*
385 * If this is a synchronous mount, make sure that the 350 * If this is a synchronous mount, make sure that the
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index e2f68de16159..edf12c7b834c 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -85,7 +85,6 @@ xfs_growfs_rt_alloc(
85{ 85{
86 xfs_fileoff_t bno; /* block number in file */ 86 xfs_fileoff_t bno; /* block number in file */
87 xfs_buf_t *bp; /* temporary buffer for zeroing */ 87 xfs_buf_t *bp; /* temporary buffer for zeroing */
88 int cancelflags; /* flags for xfs_trans_cancel */
89 int committed; /* transaction committed flag */ 88 int committed; /* transaction committed flag */
90 xfs_daddr_t d; /* disk block address */ 89 xfs_daddr_t d; /* disk block address */
91 int error; /* error return value */ 90 int error; /* error return value */
@@ -96,15 +95,16 @@ xfs_growfs_rt_alloc(
96 xfs_bmbt_irec_t map; /* block map output */ 95 xfs_bmbt_irec_t map; /* block map output */
97 int nmap; /* number of block maps */ 96 int nmap; /* number of block maps */
98 int resblks; /* space reservation */ 97 int resblks; /* space reservation */
99 xfs_trans_t *tp; /* transaction pointer */
100 98
101 /* 99 /*
102 * Allocate space to the file, as necessary. 100 * Allocate space to the file, as necessary.
103 */ 101 */
104 while (oblocks < nblocks) { 102 while (oblocks < nblocks) {
103 int cancelflags = 0;
104 xfs_trans_t *tp;
105
105 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); 106 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
106 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); 107 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
107 cancelflags = 0;
108 /* 108 /*
109 * Reserve space & log for one extent added to the file. 109 * Reserve space & log for one extent added to the file.
110 */ 110 */
@@ -171,7 +171,9 @@ xfs_growfs_rt_alloc(
171 mp->m_bsize, 0); 171 mp->m_bsize, 0);
172 if (bp == NULL) { 172 if (bp == NULL) {
173 error = XFS_ERROR(EIO); 173 error = XFS_ERROR(EIO);
174 goto error_cancel; 174error_cancel:
175 xfs_trans_cancel(tp, cancelflags);
176 goto error;
175 } 177 }
176 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); 178 memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
177 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); 179 xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
@@ -188,8 +190,6 @@ xfs_growfs_rt_alloc(
188 oblocks = map.br_startoff + map.br_blockcount; 190 oblocks = map.br_startoff + map.br_blockcount;
189 } 191 }
190 return 0; 192 return 0;
191error_cancel:
192 xfs_trans_cancel(tp, cancelflags);
193error: 193error:
194 return error; 194 return error;
195} 195}
@@ -1856,7 +1856,6 @@ xfs_growfs_rt(
1856{ 1856{
1857 xfs_rtblock_t bmbno; /* bitmap block number */ 1857 xfs_rtblock_t bmbno; /* bitmap block number */
1858 xfs_buf_t *bp; /* temporary buffer */ 1858 xfs_buf_t *bp; /* temporary buffer */
1859 int cancelflags; /* flags for xfs_trans_cancel */
1860 int error; /* error return value */ 1859 int error; /* error return value */
1861 xfs_inode_t *ip; /* bitmap inode, used as lock */ 1860 xfs_inode_t *ip; /* bitmap inode, used as lock */
1862 xfs_mount_t *nmp; /* new (fake) mount structure */ 1861 xfs_mount_t *nmp; /* new (fake) mount structure */
@@ -1872,13 +1871,13 @@ xfs_growfs_rt(
1872 xfs_extlen_t rsumblocks; /* current number of rt summary blks */ 1871 xfs_extlen_t rsumblocks; /* current number of rt summary blks */
1873 xfs_sb_t *sbp; /* old superblock */ 1872 xfs_sb_t *sbp; /* old superblock */
1874 xfs_fsblock_t sumbno; /* summary block number */ 1873 xfs_fsblock_t sumbno; /* summary block number */
1875 xfs_trans_t *tp; /* transaction pointer */
1876 1874
1877 sbp = &mp->m_sb; 1875 sbp = &mp->m_sb;
1878 cancelflags = 0;
1879 /* 1876 /*
1880 * Initial error checking. 1877 * Initial error checking.
1881 */ 1878 */
1879 if (!capable(CAP_SYS_ADMIN))
1880 return XFS_ERROR(EPERM);
1882 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || 1881 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
1883 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 1882 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
1884 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 1883 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
@@ -1942,6 +1941,9 @@ xfs_growfs_rt(
1942 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); 1941 ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
1943 bmbno < nrbmblocks; 1942 bmbno < nrbmblocks;
1944 bmbno++) { 1943 bmbno++) {
1944 xfs_trans_t *tp;
1945 int cancelflags = 0;
1946
1945 *nmp = *mp; 1947 *nmp = *mp;
1946 nsbp = &nmp->m_sb; 1948 nsbp = &nmp->m_sb;
1947 /* 1949 /*
@@ -1967,16 +1969,15 @@ xfs_growfs_rt(
1967 * Start a transaction, get the log reservation. 1969 * Start a transaction, get the log reservation.
1968 */ 1970 */
1969 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); 1971 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE);
1970 cancelflags = 0;
1971 if ((error = xfs_trans_reserve(tp, 0, 1972 if ((error = xfs_trans_reserve(tp, 0,
1972 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) 1973 XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
1973 break; 1974 goto error_cancel;
1974 /* 1975 /*
1975 * Lock out other callers by grabbing the bitmap inode lock. 1976 * Lock out other callers by grabbing the bitmap inode lock.
1976 */ 1977 */
1977 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 1978 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
1978 XFS_ILOCK_EXCL, &ip))) 1979 XFS_ILOCK_EXCL, &ip)))
1979 break; 1980 goto error_cancel;
1980 ASSERT(ip == mp->m_rbmip); 1981 ASSERT(ip == mp->m_rbmip);
1981 /* 1982 /*
1982 * Update the bitmap inode's size. 1983 * Update the bitmap inode's size.
@@ -1990,7 +1991,7 @@ xfs_growfs_rt(
1990 */ 1991 */
1991 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 1992 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0,
1992 XFS_ILOCK_EXCL, &ip))) 1993 XFS_ILOCK_EXCL, &ip)))
1993 break; 1994 goto error_cancel;
1994 ASSERT(ip == mp->m_rsumip); 1995 ASSERT(ip == mp->m_rsumip);
1995 /* 1996 /*
1996 * Update the summary inode's size. 1997 * Update the summary inode's size.
@@ -2005,7 +2006,7 @@ xfs_growfs_rt(
2005 mp->m_rsumlevels != nmp->m_rsumlevels) { 2006 mp->m_rsumlevels != nmp->m_rsumlevels) {
2006 error = xfs_rtcopy_summary(mp, nmp, tp); 2007 error = xfs_rtcopy_summary(mp, nmp, tp);
2007 if (error) 2008 if (error)
2008 break; 2009 goto error_cancel;
2009 } 2010 }
2010 /* 2011 /*
2011 * Update superblock fields. 2012 * Update superblock fields.
@@ -2031,8 +2032,11 @@ xfs_growfs_rt(
2031 bp = NULL; 2032 bp = NULL;
2032 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, 2033 error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
2033 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); 2034 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
2034 if (error) 2035 if (error) {
2036error_cancel:
2037 xfs_trans_cancel(tp, cancelflags);
2035 break; 2038 break;
2039 }
2036 /* 2040 /*
2037 * Mark more blocks free in the superblock. 2041 * Mark more blocks free in the superblock.
2038 */ 2042 */
@@ -2045,15 +2049,10 @@ xfs_growfs_rt(
2045 mp->m_rsumsize = nrsumsize; 2049 mp->m_rsumsize = nrsumsize;
2046 2050
2047 error = xfs_trans_commit(tp, 0); 2051 error = xfs_trans_commit(tp, 0);
2048 if (error) { 2052 if (error)
2049 tp = NULL;
2050 break; 2053 break;
2051 }
2052 } 2054 }
2053 2055
2054 if (error && tp)
2055 xfs_trans_cancel(tp, cancelflags);
2056
2057 /* 2056 /*
2058 * Free the fake mp structure. 2057 * Free the fake mp structure.
2059 */ 2058 */
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3a82576dde9a..36f3a21c54d2 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -406,7 +406,7 @@ xfs_bwrite(
406 * XXXsup how does this work for quotas. 406 * XXXsup how does this work for quotas.
407 */ 407 */
408 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); 408 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
409 XFS_BUF_SET_FSPRIVATE3(bp, mp); 409 bp->b_mount = mp;
410 XFS_BUF_WRITE(bp); 410 XFS_BUF_WRITE(bp);
411 411
412 if ((error = XFS_bwrite(bp))) { 412 if ((error = XFS_bwrite(bp))) {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 3f8cf1587f4c..1ed71916e4c9 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -79,6 +79,7 @@ struct xfs_mount;
79#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ 79#define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */
80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 80#define XFS_SB_VERSION2_RESERVED4BIT 0x00000004
81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ 81#define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */
82#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */
82 83
83#define XFS_SB_VERSION2_OKREALFBITS \ 84#define XFS_SB_VERSION2_OKREALFBITS \
84 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ 85 (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
@@ -296,30 +297,34 @@ typedef enum {
296 297
297#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) 298#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
298 299
299#ifdef __KERNEL__
300static inline int xfs_sb_good_version(xfs_sb_t *sbp) 300static inline int xfs_sb_good_version(xfs_sb_t *sbp)
301{ 301{
302 return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ 302 /* We always support version 1-3 */
303 (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ 303 if (sbp->sb_versionnum >= XFS_SB_VERSION_1 &&
304 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 304 sbp->sb_versionnum <= XFS_SB_VERSION_3)
305 !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ 305 return 1;
306 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ 306
307 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ 307 /* We support version 4 if all feature bits are supported */
308 (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))); 308 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) {
309} 309 if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) ||
310 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) &&
311 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
312 return 0;
313
314#ifdef __KERNEL__
315 if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
316 return 0;
310#else 317#else
311static inline int xfs_sb_good_version(xfs_sb_t *sbp) 318 if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) &&
312{ 319 sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
313 return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ 320 return 0;
314 (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ 321#endif
315 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 322
316 !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ 323 return 1;
317 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ 324 }
318 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ 325
319 (!(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \ 326 return 0;
320 (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))));
321} 327}
322#endif /* __KERNEL__ */
323 328
324/* 329/*
325 * Detect a mismatched features2 field. Older kernels read/wrote 330 * Detect a mismatched features2 field. Older kernels read/wrote
@@ -332,123 +337,127 @@ static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp)
332 337
333static inline unsigned xfs_sb_version_tonew(unsigned v) 338static inline unsigned xfs_sb_version_tonew(unsigned v)
334{ 339{
335 return ((((v) == XFS_SB_VERSION_1) ? \ 340 if (v == XFS_SB_VERSION_1)
336 0 : \ 341 return XFS_SB_VERSION_4;
337 (((v) == XFS_SB_VERSION_2) ? \ 342
338 XFS_SB_VERSION_ATTRBIT : \ 343 if (v == XFS_SB_VERSION_2)
339 (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \ 344 return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
340 XFS_SB_VERSION_4); 345
346 return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT |
347 XFS_SB_VERSION_NLINKBIT;
341} 348}
342 349
343static inline unsigned xfs_sb_version_toold(unsigned v) 350static inline unsigned xfs_sb_version_toold(unsigned v)
344{ 351{
345 return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ 352 if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT))
346 0 : \ 353 return 0;
347 (((v) & XFS_SB_VERSION_NLINKBIT) ? \ 354 if (v & XFS_SB_VERSION_NLINKBIT)
348 XFS_SB_VERSION_3 : \ 355 return XFS_SB_VERSION_3;
349 (((v) & XFS_SB_VERSION_ATTRBIT) ? \ 356 if (v & XFS_SB_VERSION_ATTRBIT)
350 XFS_SB_VERSION_2 : \ 357 return XFS_SB_VERSION_2;
351 XFS_SB_VERSION_1))); 358 return XFS_SB_VERSION_1;
352} 359}
353 360
354static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) 361static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
355{ 362{
356 return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ 363 return sbp->sb_versionnum == XFS_SB_VERSION_2 ||
357 ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ 364 sbp->sb_versionnum == XFS_SB_VERSION_3 ||
358 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 365 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
359 ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); 366 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
360} 367}
361 368
362static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) 369static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
363{ 370{
364 (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ 371 if (sbp->sb_versionnum == XFS_SB_VERSION_1)
365 XFS_SB_VERSION_2 : \ 372 sbp->sb_versionnum = XFS_SB_VERSION_2;
366 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \ 373 else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
367 ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \ 374 sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;
368 (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))); 375 else
376 sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT;
369} 377}
370 378
371static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) 379static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
372{ 380{
373 return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ 381 return sbp->sb_versionnum == XFS_SB_VERSION_3 ||
374 ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 382 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
375 ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); 383 (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
376} 384}
377 385
378static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) 386static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
379{ 387{
380 (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ 388 if (sbp->sb_versionnum <= XFS_SB_VERSION_2)
381 XFS_SB_VERSION_3 : \ 389 sbp->sb_versionnum = XFS_SB_VERSION_3;
382 ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)); 390 else
391 sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT;
383} 392}
384 393
385static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) 394static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
386{ 395{
387 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 396 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
388 ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT); 397 (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
389} 398}
390 399
391static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) 400static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
392{ 401{
393 (sbp)->sb_versionnum = \ 402 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4)
394 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ 403 sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;
395 ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ 404 else
396 (xfs_sb_version_tonew((sbp)->sb_versionnum) | \ 405 sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) |
397 XFS_SB_VERSION_QUOTABIT)); 406 XFS_SB_VERSION_QUOTABIT;
398} 407}
399 408
400static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) 409static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
401{ 410{
402 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 411 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
403 ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); 412 (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
404} 413}
405 414
406static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) 415static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
407{ 416{
408 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 417 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
409 ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); 418 (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
410} 419}
411 420
412static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) 421static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
413{ 422{
414 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 423 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
415 ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); 424 (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
416} 425}
417 426
418static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) 427static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
419{ 428{
420 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 429 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
421 ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); 430 (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
422} 431}
423 432
424static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) 433static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
425{ 434{
426 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 435 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
427 ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); 436 (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
428} 437}
429 438
430static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) 439static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
431{ 440{
432 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 441 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
433 ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); 442 (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
434} 443}
435 444
436static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) 445static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
437{ 446{
438 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 447 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
439 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 448 (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
440} 449}
441 450
442static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) 451static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
443{ 452{
444 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 453 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
445 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); 454 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
446} 455}
447 456
448static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 457static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
449{ 458{
450 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 459 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 &&
451 ((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); 460 (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
452} 461}
453 462
454/* 463/*
@@ -463,22 +472,20 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
463 472
464static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) 473static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
465{ 474{
466 return (xfs_sb_version_hasmorebits(sbp) && \ 475 return xfs_sb_version_hasmorebits(sbp) &&
467 ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); 476 (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT);
468} 477}
469 478
470static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) 479static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
471{ 480{
472 return (xfs_sb_version_hasmorebits(sbp)) && \ 481 return xfs_sb_version_hasmorebits(sbp) &&
473 ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); 482 (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
474} 483}
475 484
476static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) 485static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
477{ 486{
478 ((sbp)->sb_versionnum = \ 487 sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
479 ((sbp)->sb_versionnum | XFS_SB_VERSION_MOREBITSBIT), \ 488 sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT;
480 ((sbp)->sb_features2 = \
481 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
482} 489}
483 490
484static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) 491static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 4e1c22a23be5..8570b826fedd 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -290,7 +290,7 @@ xfs_trans_dup(
290 ASSERT(tp->t_ticket != NULL); 290 ASSERT(tp->t_ticket != NULL);
291 291
292 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); 292 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
293 ntp->t_ticket = tp->t_ticket; 293 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
294 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 294 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
295 tp->t_blk_res = tp->t_blk_res_used; 295 tp->t_blk_res = tp->t_blk_res_used;
296 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; 296 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
@@ -1260,6 +1260,13 @@ xfs_trans_roll(
1260 trans = *tpp; 1260 trans = *tpp;
1261 1261
1262 /* 1262 /*
1263 * transaction commit worked ok so we can drop the extra ticket
1264 * reference that we gained in xfs_trans_dup()
1265 */
1266 xfs_log_ticket_put(trans->t_ticket);
1267
1268
1269 /*
1263 * Reserve space in the log for th next transaction. 1270 * Reserve space in the log for th next transaction.
1264 * This also pushes items in the "AIL", the list of logged items, 1271 * This also pushes items in the "AIL", the list of logged items,
1265 * out to disk if they are taking up space at the tail of the log 1272 * out to disk if they are taking up space at the tail of the log
@@ -1383,11 +1390,12 @@ xfs_trans_chunk_committed(
1383 xfs_log_item_desc_t *lidp; 1390 xfs_log_item_desc_t *lidp;
1384 xfs_log_item_t *lip; 1391 xfs_log_item_t *lip;
1385 xfs_lsn_t item_lsn; 1392 xfs_lsn_t item_lsn;
1386 struct xfs_mount *mp;
1387 int i; 1393 int i;
1388 1394
1389 lidp = licp->lic_descs; 1395 lidp = licp->lic_descs;
1390 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1396 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1397 struct xfs_ail *ailp;
1398
1391 if (xfs_lic_isfree(licp, i)) { 1399 if (xfs_lic_isfree(licp, i)) {
1392 continue; 1400 continue;
1393 } 1401 }
@@ -1424,19 +1432,19 @@ xfs_trans_chunk_committed(
1424 * This would cause the earlier transaction to fail 1432 * This would cause the earlier transaction to fail
1425 * the test below. 1433 * the test below.
1426 */ 1434 */
1427 mp = lip->li_mountp; 1435 ailp = lip->li_ailp;
1428 spin_lock(&mp->m_ail_lock); 1436 spin_lock(&ailp->xa_lock);
1429 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { 1437 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1430 /* 1438 /*
1431 * This will set the item's lsn to item_lsn 1439 * This will set the item's lsn to item_lsn
1432 * and update the position of the item in 1440 * and update the position of the item in
1433 * the AIL. 1441 * the AIL.
1434 * 1442 *
1435 * xfs_trans_update_ail() drops the AIL lock. 1443 * xfs_trans_ail_update() drops the AIL lock.
1436 */ 1444 */
1437 xfs_trans_update_ail(mp, lip, item_lsn); 1445 xfs_trans_ail_update(ailp, lip, item_lsn);
1438 } else { 1446 } else {
1439 spin_unlock(&mp->m_ail_lock); 1447 spin_unlock(&ailp->xa_lock);
1440 } 1448 }
1441 1449
1442 /* 1450 /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 74c80bd2b0ec..d6fe4a88d79f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_TRANS_H__ 18#ifndef __XFS_TRANS_H__
19#define __XFS_TRANS_H__ 19#define __XFS_TRANS_H__
20 20
21struct xfs_log_item;
22
21/* 23/*
22 * This is the structure written in the log at the head of 24 * This is the structure written in the log at the head of
23 * every transaction. It identifies the type and id of the 25 * every transaction. It identifies the type and id of the
@@ -98,76 +100,6 @@ typedef struct xfs_trans_header {
98#define XFS_TRANS_TYPE_MAX 41 100#define XFS_TRANS_TYPE_MAX 41
99/* new transaction types need to be reflected in xfs_logprint(8) */ 101/* new transaction types need to be reflected in xfs_logprint(8) */
100 102
101
102#ifdef __KERNEL__
103struct xfs_buf;
104struct xfs_buftarg;
105struct xfs_efd_log_item;
106struct xfs_efi_log_item;
107struct xfs_inode;
108struct xfs_item_ops;
109struct xfs_log_iovec;
110struct xfs_log_item;
111struct xfs_log_item_desc;
112struct xfs_mount;
113struct xfs_trans;
114struct xfs_dquot_acct;
115
116typedef struct xfs_log_item {
117 struct list_head li_ail; /* AIL pointers */
118 xfs_lsn_t li_lsn; /* last on-disk lsn */
119 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
120 struct xfs_mount *li_mountp; /* ptr to fs mount */
121 uint li_type; /* item type */
122 uint li_flags; /* misc flags */
123 struct xfs_log_item *li_bio_list; /* buffer item list */
124 void (*li_cb)(struct xfs_buf *,
125 struct xfs_log_item *);
126 /* buffer item iodone */
127 /* callback func */
128 struct xfs_item_ops *li_ops; /* function list */
129} xfs_log_item_t;
130
131#define XFS_LI_IN_AIL 0x1
132#define XFS_LI_ABORTED 0x2
133
134typedef struct xfs_item_ops {
135 uint (*iop_size)(xfs_log_item_t *);
136 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
137 void (*iop_pin)(xfs_log_item_t *);
138 void (*iop_unpin)(xfs_log_item_t *, int);
139 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
140 uint (*iop_trylock)(xfs_log_item_t *);
141 void (*iop_unlock)(xfs_log_item_t *);
142 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
143 void (*iop_push)(xfs_log_item_t *);
144 void (*iop_pushbuf)(xfs_log_item_t *);
145 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
146} xfs_item_ops_t;
147
148#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
149#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
150#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
151#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
152#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
153#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
154#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
155#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
156#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
157#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
158#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
159
160/*
161 * Return values for the IOP_TRYLOCK() routines.
162 */
163#define XFS_ITEM_SUCCESS 0
164#define XFS_ITEM_PINNED 1
165#define XFS_ITEM_LOCKED 2
166#define XFS_ITEM_FLUSHING 3
167#define XFS_ITEM_PUSHBUF 4
168
169#endif /* __KERNEL__ */
170
171/* 103/*
172 * This structure is used to track log items associated with 104 * This structure is used to track log items associated with
173 * a transaction. It points to the log item and keeps some 105 * a transaction. It points to the log item and keeps some
@@ -176,7 +108,7 @@ typedef struct xfs_item_ops {
176 * once we get to commit processing (see xfs_trans_commit()). 108 * once we get to commit processing (see xfs_trans_commit()).
177 */ 109 */
178typedef struct xfs_log_item_desc { 110typedef struct xfs_log_item_desc {
179 xfs_log_item_t *lid_item; 111 struct xfs_log_item *lid_item;
180 ushort lid_size; 112 ushort lid_size;
181 unsigned char lid_flags; 113 unsigned char lid_flags;
182 unsigned char lid_index; 114 unsigned char lid_index;
@@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
276 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); 208 (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
277} 209}
278 210
279#ifdef __KERNEL__
280/*
281 * This structure is used to maintain a list of block ranges that have been
282 * freed in the transaction. The ranges are listed in the perag[] busy list
283 * between when they're freed and the transaction is committed to disk.
284 */
285
286typedef struct xfs_log_busy_slot {
287 xfs_agnumber_t lbc_ag;
288 ushort lbc_idx; /* index in perag.busy[] */
289} xfs_log_busy_slot_t;
290
291#define XFS_LBC_NUM_SLOTS 31
292typedef struct xfs_log_busy_chunk {
293 struct xfs_log_busy_chunk *lbc_next;
294 uint lbc_free; /* free slots bitmask */
295 ushort lbc_unused; /* first unused */
296 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
297} xfs_log_busy_chunk_t;
298
299#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
300#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
301
302#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
303#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
304#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
305#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
306#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
307
308/*
309 * This is the type of function which can be given to xfs_trans_callback()
310 * to be called upon the transaction's commit to disk.
311 */
312typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
313
314/*
315 * This is the structure maintained for every active transaction.
316 */
317typedef struct xfs_trans {
318 unsigned int t_magic; /* magic number */
319 xfs_log_callback_t t_logcb; /* log callback struct */
320 unsigned int t_type; /* transaction type */
321 unsigned int t_log_res; /* amt of log space resvd */
322 unsigned int t_log_count; /* count for perm log res */
323 unsigned int t_blk_res; /* # of blocks resvd */
324 unsigned int t_blk_res_used; /* # of resvd blocks used */
325 unsigned int t_rtx_res; /* # of rt extents resvd */
326 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
327 xfs_log_ticket_t t_ticket; /* log mgr ticket */
328 xfs_lsn_t t_lsn; /* log seq num of start of
329 * transaction. */
330 xfs_lsn_t t_commit_lsn; /* log seq num of end of
331 * transaction. */
332 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
333 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
334 xfs_trans_callback_t t_callback; /* transaction callback */
335 void *t_callarg; /* callback arg */
336 unsigned int t_flags; /* misc flags */
337 int64_t t_icount_delta; /* superblock icount change */
338 int64_t t_ifree_delta; /* superblock ifree change */
339 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
340 int64_t t_res_fdblocks_delta; /* on-disk only chg */
341 int64_t t_frextents_delta;/* superblock freextents chg*/
342 int64_t t_res_frextents_delta; /* on-disk only chg */
343#ifdef DEBUG
344 int64_t t_ag_freeblks_delta; /* debugging counter */
345 int64_t t_ag_flist_delta; /* debugging counter */
346 int64_t t_ag_btree_delta; /* debugging counter */
347#endif
348 int64_t t_dblocks_delta;/* superblock dblocks change */
349 int64_t t_agcount_delta;/* superblock agcount change */
350 int64_t t_imaxpct_delta;/* superblock imaxpct change */
351 int64_t t_rextsize_delta;/* superblock rextsize chg */
352 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
353 int64_t t_rblocks_delta;/* superblock rblocks change */
354 int64_t t_rextents_delta;/* superblocks rextents chg */
355 int64_t t_rextslog_delta;/* superblocks rextslog chg */
356 unsigned int t_items_free; /* log item descs free */
357 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
358 xfs_trans_header_t t_header; /* header for in-log trans */
359 unsigned int t_busy_free; /* busy descs free */
360 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
361 unsigned long t_pflags; /* saved process flags state */
362} xfs_trans_t;
363
364#endif /* __KERNEL__ */
365
366
367#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ 211#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
368/* 212/*
369 * Values for t_flags. 213 * Values for t_flags.
@@ -906,6 +750,157 @@ typedef struct xfs_trans {
906#define XFS_DQUOT_REF 1 750#define XFS_DQUOT_REF 1
907 751
908#ifdef __KERNEL__ 752#ifdef __KERNEL__
753
754struct xfs_buf;
755struct xfs_buftarg;
756struct xfs_efd_log_item;
757struct xfs_efi_log_item;
758struct xfs_inode;
759struct xfs_item_ops;
760struct xfs_log_iovec;
761struct xfs_log_item_desc;
762struct xfs_mount;
763struct xfs_trans;
764struct xfs_dquot_acct;
765
766typedef struct xfs_log_item {
767 struct list_head li_ail; /* AIL pointers */
768 xfs_lsn_t li_lsn; /* last on-disk lsn */
769 struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
770 struct xfs_mount *li_mountp; /* ptr to fs mount */
771 struct xfs_ail *li_ailp; /* ptr to AIL */
772 uint li_type; /* item type */
773 uint li_flags; /* misc flags */
774 struct xfs_log_item *li_bio_list; /* buffer item list */
775 void (*li_cb)(struct xfs_buf *,
776 struct xfs_log_item *);
777 /* buffer item iodone */
778 /* callback func */
779 struct xfs_item_ops *li_ops; /* function list */
780} xfs_log_item_t;
781
782#define XFS_LI_IN_AIL 0x1
783#define XFS_LI_ABORTED 0x2
784
785typedef struct xfs_item_ops {
786 uint (*iop_size)(xfs_log_item_t *);
787 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
788 void (*iop_pin)(xfs_log_item_t *);
789 void (*iop_unpin)(xfs_log_item_t *, int);
790 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
791 uint (*iop_trylock)(xfs_log_item_t *);
792 void (*iop_unlock)(xfs_log_item_t *);
793 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
794 void (*iop_push)(xfs_log_item_t *);
795 void (*iop_pushbuf)(xfs_log_item_t *);
796 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
797} xfs_item_ops_t;
798
799#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
800#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
801#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
802#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags)
803#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
804#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
805#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
806#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
807#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
808#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
809#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
810
811/*
812 * Return values for the IOP_TRYLOCK() routines.
813 */
814#define XFS_ITEM_SUCCESS 0
815#define XFS_ITEM_PINNED 1
816#define XFS_ITEM_LOCKED 2
817#define XFS_ITEM_FLUSHING 3
818#define XFS_ITEM_PUSHBUF 4
819
820/*
821 * This structure is used to maintain a list of block ranges that have been
822 * freed in the transaction. The ranges are listed in the perag[] busy list
823 * between when they're freed and the transaction is committed to disk.
824 */
825
826typedef struct xfs_log_busy_slot {
827 xfs_agnumber_t lbc_ag;
828 ushort lbc_idx; /* index in perag.busy[] */
829} xfs_log_busy_slot_t;
830
831#define XFS_LBC_NUM_SLOTS 31
832typedef struct xfs_log_busy_chunk {
833 struct xfs_log_busy_chunk *lbc_next;
834 uint lbc_free; /* free slots bitmask */
835 ushort lbc_unused; /* first unused */
836 xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
837} xfs_log_busy_chunk_t;
838
839#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
840#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
841
842#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
843#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
844#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
845#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
846#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
847
848/*
849 * This is the type of function which can be given to xfs_trans_callback()
850 * to be called upon the transaction's commit to disk.
851 */
852typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
853
854/*
855 * This is the structure maintained for every active transaction.
856 */
857typedef struct xfs_trans {
858 unsigned int t_magic; /* magic number */
859 xfs_log_callback_t t_logcb; /* log callback struct */
860 unsigned int t_type; /* transaction type */
861 unsigned int t_log_res; /* amt of log space resvd */
862 unsigned int t_log_count; /* count for perm log res */
863 unsigned int t_blk_res; /* # of blocks resvd */
864 unsigned int t_blk_res_used; /* # of resvd blocks used */
865 unsigned int t_rtx_res; /* # of rt extents resvd */
866 unsigned int t_rtx_res_used; /* # of resvd rt extents used */
867 xfs_log_ticket_t t_ticket; /* log mgr ticket */
868 xfs_lsn_t t_lsn; /* log seq num of start of
869 * transaction. */
870 xfs_lsn_t t_commit_lsn; /* log seq num of end of
871 * transaction. */
872 struct xfs_mount *t_mountp; /* ptr to fs mount struct */
873 struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
874 xfs_trans_callback_t t_callback; /* transaction callback */
875 void *t_callarg; /* callback arg */
876 unsigned int t_flags; /* misc flags */
877 int64_t t_icount_delta; /* superblock icount change */
878 int64_t t_ifree_delta; /* superblock ifree change */
879 int64_t t_fdblocks_delta; /* superblock fdblocks chg */
880 int64_t t_res_fdblocks_delta; /* on-disk only chg */
881 int64_t t_frextents_delta;/* superblock freextents chg*/
882 int64_t t_res_frextents_delta; /* on-disk only chg */
883#ifdef DEBUG
884 int64_t t_ag_freeblks_delta; /* debugging counter */
885 int64_t t_ag_flist_delta; /* debugging counter */
886 int64_t t_ag_btree_delta; /* debugging counter */
887#endif
888 int64_t t_dblocks_delta;/* superblock dblocks change */
889 int64_t t_agcount_delta;/* superblock agcount change */
890 int64_t t_imaxpct_delta;/* superblock imaxpct change */
891 int64_t t_rextsize_delta;/* superblock rextsize chg */
892 int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
893 int64_t t_rblocks_delta;/* superblock rblocks change */
894 int64_t t_rextents_delta;/* superblocks rextents chg */
895 int64_t t_rextslog_delta;/* superblocks rextslog chg */
896 unsigned int t_items_free; /* log item descs free */
897 xfs_log_item_chunk_t t_items; /* first log item desc chunk */
898 xfs_trans_header_t t_header; /* header for in-log trans */
899 unsigned int t_busy_free; /* busy descs free */
900 xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
901 unsigned long t_pflags; /* saved process flags state */
902} xfs_trans_t;
903
909/* 904/*
910 * XFS transaction mechanism exported interfaces that are 905 * XFS transaction mechanism exported interfaces that are
911 * actually macros. 906 * actually macros.
@@ -928,7 +923,6 @@ typedef struct xfs_trans {
928/* 923/*
929 * XFS transaction mechanism exported interfaces. 924 * XFS transaction mechanism exported interfaces.
930 */ 925 */
931void xfs_trans_init(struct xfs_mount *);
932xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
933xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
934xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 928xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
@@ -975,13 +969,8 @@ int _xfs_trans_commit(xfs_trans_t *,
975 int *); 969 int *);
976#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) 970#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
977void xfs_trans_cancel(xfs_trans_t *, int); 971void xfs_trans_cancel(xfs_trans_t *, int);
978int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
979int xfs_trans_ail_init(struct xfs_mount *); 972int xfs_trans_ail_init(struct xfs_mount *);
980void xfs_trans_ail_destroy(struct xfs_mount *); 973void xfs_trans_ail_destroy(struct xfs_mount *);
981void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
982xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *);
983void xfs_trans_unlocked_item(struct xfs_mount *,
984 xfs_log_item_t *);
985xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, 974xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
986 xfs_agnumber_t ag, 975 xfs_agnumber_t ag,
987 xfs_extlen_t idx); 976 xfs_extlen_t idx);
@@ -990,4 +979,7 @@ extern kmem_zone_t *xfs_trans_zone;
990 979
991#endif /* __KERNEL__ */ 980#endif /* __KERNEL__ */
992 981
982void xfs_trans_init(struct xfs_mount *);
983int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
984
993#endif /* __XFS_TRANS_H__ */ 985#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1f77c00af566..2d47f10f8bed 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * Copyright (c) 2008 Dave Chinner
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -28,13 +29,13 @@
28#include "xfs_trans_priv.h" 29#include "xfs_trans_priv.h"
29#include "xfs_error.h" 30#include "xfs_error.h"
30 31
31STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); 32STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *);
32STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); 33STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); 34STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); 35STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 36
36#ifdef DEBUG 37#ifdef DEBUG
37STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); 38STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *);
38#else 39#else
39#define xfs_ail_check(a,l) 40#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 41#endif /* DEBUG */
@@ -50,20 +51,20 @@ STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
50 * lsn of the last item in the AIL. 51 * lsn of the last item in the AIL.
51 */ 52 */
52xfs_lsn_t 53xfs_lsn_t
53xfs_trans_tail_ail( 54xfs_trans_ail_tail(
54 xfs_mount_t *mp) 55 struct xfs_ail *ailp)
55{ 56{
56 xfs_lsn_t lsn; 57 xfs_lsn_t lsn;
57 xfs_log_item_t *lip; 58 xfs_log_item_t *lip;
58 59
59 spin_lock(&mp->m_ail_lock); 60 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(&mp->m_ail); 61 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 62 if (lip == NULL) {
62 lsn = (xfs_lsn_t)0; 63 lsn = (xfs_lsn_t)0;
63 } else { 64 } else {
64 lsn = lip->li_lsn; 65 lsn = lip->li_lsn;
65 } 66 }
66 spin_unlock(&mp->m_ail_lock); 67 spin_unlock(&ailp->xa_lock);
67 68
68 return lsn; 69 return lsn;
69} 70}
@@ -85,16 +86,125 @@ xfs_trans_tail_ail(
85 * any of the objects, so the lock is not needed. 86 * any of the objects, so the lock is not needed.
86 */ 87 */
87void 88void
88xfs_trans_push_ail( 89xfs_trans_ail_push(
89 xfs_mount_t *mp, 90 struct xfs_ail *ailp,
90 xfs_lsn_t threshold_lsn) 91 xfs_lsn_t threshold_lsn)
91{ 92{
92 xfs_log_item_t *lip; 93 xfs_log_item_t *lip;
94
95 lip = xfs_ail_min(ailp);
96 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
97 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0)
98 xfsaild_wakeup(ailp, threshold_lsn);
99 }
100}
101
102/*
103 * AIL traversal cursor initialisation.
104 *
105 * The cursor keeps track of where our current traversal is up
106 * to by tracking the next ƣtem in the list for us. However, for
107 * this to be safe, removing an object from the AIL needs to invalidate
108 * any cursor that points to it. hence the traversal cursor needs to
109 * be linked to the struct xfs_ail so that deletion can search all the
110 * active cursors for invalidation.
111 *
112 * We don't link the push cursor because it is embedded in the struct
113 * xfs_ail and hence easily findable.
114 */
115STATIC void
116xfs_trans_ail_cursor_init(
117 struct xfs_ail *ailp,
118 struct xfs_ail_cursor *cur)
119{
120 cur->item = NULL;
121 if (cur == &ailp->xa_cursors)
122 return;
123
124 cur->next = ailp->xa_cursors.next;
125 ailp->xa_cursors.next = cur;
126}
127
128/*
129 * Set the cursor to the next item, because when we look
130 * up the cursor the current item may have been freed.
131 */
132STATIC void
133xfs_trans_ail_cursor_set(
134 struct xfs_ail *ailp,
135 struct xfs_ail_cursor *cur,
136 struct xfs_log_item *lip)
137{
138 if (lip)
139 cur->item = xfs_ail_next(ailp, lip);
140}
141
142/*
143 * Get the next item in the traversal and advance the cursor.
144 * If the cursor was invalidated (inidicated by a lip of 1),
145 * restart the traversal.
146 */
147struct xfs_log_item *
148xfs_trans_ail_cursor_next(
149 struct xfs_ail *ailp,
150 struct xfs_ail_cursor *cur)
151{
152 struct xfs_log_item *lip = cur->item;
153
154 if ((__psint_t)lip & 1)
155 lip = xfs_ail_min(ailp);
156 xfs_trans_ail_cursor_set(ailp, cur, lip);
157 return lip;
158}
159
160/*
161 * Now that the traversal is complete, we need to remove the cursor
162 * from the list of traversing cursors. Avoid removing the embedded
163 * push cursor, but use the fact it is alway present to make the
164 * list deletion simple.
165 */
166void
167xfs_trans_ail_cursor_done(
168 struct xfs_ail *ailp,
169 struct xfs_ail_cursor *done)
170{
171 struct xfs_ail_cursor *prev = NULL;
172 struct xfs_ail_cursor *cur;
173
174 done->item = NULL;
175 if (done == &ailp->xa_cursors)
176 return;
177 prev = &ailp->xa_cursors;
178 for (cur = prev->next; cur; prev = cur, cur = prev->next) {
179 if (cur == done) {
180 prev->next = cur->next;
181 break;
182 }
183 }
184 ASSERT(cur);
185}
186
187/*
188 * Invalidate any cursor that is pointing to this item. This is
189 * called when an item is removed from the AIL. Any cursor pointing
190 * to this object is now invalid and the traversal needs to be
191 * terminated so it doesn't reference a freed object. We set the
192 * cursor item to a value of 1 so we can distinguish between an
193 * invalidation and the end of the list when getting the next item
194 * from the cursor.
195 */
196STATIC void
197xfs_trans_ail_cursor_clear(
198 struct xfs_ail *ailp,
199 struct xfs_log_item *lip)
200{
201 struct xfs_ail_cursor *cur;
93 202
94 lip = xfs_ail_min(&mp->m_ail); 203 /* need to search all cursors */
95 if (lip && !XFS_FORCED_SHUTDOWN(mp)) { 204 for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
96 if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) 205 if (cur->item == lip)
97 xfsaild_wakeup(mp, threshold_lsn); 206 cur->item = (struct xfs_log_item *)
207 ((__psint_t)cur->item | 1);
98 } 208 }
99} 209}
100 210
@@ -103,25 +213,27 @@ xfs_trans_push_ail(
103 * Return the current tree generation number for use 213 * Return the current tree generation number for use
104 * in calls to xfs_trans_next_ail(). 214 * in calls to xfs_trans_next_ail().
105 */ 215 */
106STATIC xfs_log_item_t * 216xfs_log_item_t *
107xfs_trans_first_push_ail( 217xfs_trans_ail_cursor_first(
108 xfs_mount_t *mp, 218 struct xfs_ail *ailp,
109 int *gen, 219 struct xfs_ail_cursor *cur,
110 xfs_lsn_t lsn) 220 xfs_lsn_t lsn)
111{ 221{
112 xfs_log_item_t *lip; 222 xfs_log_item_t *lip;
113 223
114 lip = xfs_ail_min(&mp->m_ail); 224 xfs_trans_ail_cursor_init(ailp, cur);
115 *gen = (int)mp->m_ail.xa_gen; 225 lip = xfs_ail_min(ailp);
116 if (lsn == 0) 226 if (lsn == 0)
117 return lip; 227 goto out;
118 228
119 list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { 229 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
120 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) 230 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
121 return lip; 231 goto out;
122 } 232 }
123 233 lip = NULL;
124 return NULL; 234out:
235 xfs_trans_ail_cursor_set(ailp, cur, lip);
236 return lip;
125} 237}
126 238
127/* 239/*
@@ -129,29 +241,29 @@ xfs_trans_first_push_ail(
129 */ 241 */
130long 242long
131xfsaild_push( 243xfsaild_push(
132 xfs_mount_t *mp, 244 struct xfs_ail *ailp,
133 xfs_lsn_t *last_lsn) 245 xfs_lsn_t *last_lsn)
134{ 246{
135 long tout = 1000; /* milliseconds */ 247 long tout = 1000; /* milliseconds */
136 xfs_lsn_t last_pushed_lsn = *last_lsn; 248 xfs_lsn_t last_pushed_lsn = *last_lsn;
137 xfs_lsn_t target = mp->m_ail.xa_target; 249 xfs_lsn_t target = ailp->xa_target;
138 xfs_lsn_t lsn; 250 xfs_lsn_t lsn;
139 xfs_log_item_t *lip; 251 xfs_log_item_t *lip;
140 int gen;
141 int restarts;
142 int flush_log, count, stuck; 252 int flush_log, count, stuck;
253 xfs_mount_t *mp = ailp->xa_mount;
254 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
143 255
144#define XFS_TRANS_PUSH_AIL_RESTARTS 10 256 spin_lock(&ailp->xa_lock);
145 257 xfs_trans_ail_cursor_init(ailp, cur);
146 spin_lock(&mp->m_ail_lock); 258 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn);
147 lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
148 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 259 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
149 /* 260 /*
150 * AIL is empty or our push has reached the end. 261 * AIL is empty or our push has reached the end.
151 */ 262 */
152 spin_unlock(&mp->m_ail_lock); 263 xfs_trans_ail_cursor_done(ailp, cur);
264 spin_unlock(&ailp->xa_lock);
153 last_pushed_lsn = 0; 265 last_pushed_lsn = 0;
154 goto out; 266 return tout;
155 } 267 }
156 268
157 XFS_STATS_INC(xs_push_ail); 269 XFS_STATS_INC(xs_push_ail);
@@ -169,7 +281,7 @@ xfsaild_push(
169 */ 281 */
170 tout = 10; 282 tout = 10;
171 lsn = lip->li_lsn; 283 lsn = lip->li_lsn;
172 flush_log = stuck = count = restarts = 0; 284 flush_log = stuck = count = 0;
173 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { 285 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
174 int lock_result; 286 int lock_result;
175 /* 287 /*
@@ -184,7 +296,7 @@ xfsaild_push(
184 * skip to the next item in the list. 296 * skip to the next item in the list.
185 */ 297 */
186 lock_result = IOP_TRYLOCK(lip); 298 lock_result = IOP_TRYLOCK(lip);
187 spin_unlock(&mp->m_ail_lock); 299 spin_unlock(&ailp->xa_lock);
188 switch (lock_result) { 300 switch (lock_result) {
189 case XFS_ITEM_SUCCESS: 301 case XFS_ITEM_SUCCESS:
190 XFS_STATS_INC(xs_push_ail_success); 302 XFS_STATS_INC(xs_push_ail_success);
@@ -221,7 +333,7 @@ xfsaild_push(
221 break; 333 break;
222 } 334 }
223 335
224 spin_lock(&mp->m_ail_lock); 336 spin_lock(&ailp->xa_lock);
225 /* should we bother continuing? */ 337 /* should we bother continuing? */
226 if (XFS_FORCED_SHUTDOWN(mp)) 338 if (XFS_FORCED_SHUTDOWN(mp))
227 break; 339 break;
@@ -244,14 +356,13 @@ xfsaild_push(
244 if (stuck > 100) 356 if (stuck > 100)
245 break; 357 break;
246 358
247 lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); 359 lip = xfs_trans_ail_cursor_next(ailp, cur);
248 if (lip == NULL) 360 if (lip == NULL)
249 break; 361 break;
250 if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
251 break;
252 lsn = lip->li_lsn; 362 lsn = lip->li_lsn;
253 } 363 }
254 spin_unlock(&mp->m_ail_lock); 364 xfs_trans_ail_cursor_done(ailp, cur);
365 spin_unlock(&ailp->xa_lock);
255 366
256 if (flush_log) { 367 if (flush_log) {
257 /* 368 /*
@@ -274,8 +385,7 @@ xfsaild_push(
274 */ 385 */
275 tout += 20; 386 tout += 20;
276 last_pushed_lsn = 0; 387 last_pushed_lsn = 0;
277 } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || 388 } else if ((stuck * 100) / count > 90) {
278 ((stuck * 100) / count > 90)) {
279 /* 389 /*
280 * Either there is a lot of contention on the AIL or we 390 * Either there is a lot of contention on the AIL or we
281 * are stuck due to operations in progress. "Stuck" in this 391 * are stuck due to operations in progress. "Stuck" in this
@@ -287,7 +397,6 @@ xfsaild_push(
287 */ 397 */
288 tout += 10; 398 tout += 10;
289 } 399 }
290out:
291 *last_lsn = last_pushed_lsn; 400 *last_lsn = last_pushed_lsn;
292 return tout; 401 return tout;
293} /* xfsaild_push */ 402} /* xfsaild_push */
@@ -303,7 +412,7 @@ out:
303 */ 412 */
304void 413void
305xfs_trans_unlocked_item( 414xfs_trans_unlocked_item(
306 xfs_mount_t *mp, 415 struct xfs_ail *ailp,
307 xfs_log_item_t *lip) 416 xfs_log_item_t *lip)
308{ 417{
309 xfs_log_item_t *min_lip; 418 xfs_log_item_t *min_lip;
@@ -315,7 +424,7 @@ xfs_trans_unlocked_item(
315 * over some potentially valid data. 424 * over some potentially valid data.
316 */ 425 */
317 if (!(lip->li_flags & XFS_LI_IN_AIL) || 426 if (!(lip->li_flags & XFS_LI_IN_AIL) ||
318 XFS_FORCED_SHUTDOWN(mp)) { 427 XFS_FORCED_SHUTDOWN(ailp->xa_mount)) {
319 return; 428 return;
320 } 429 }
321 430
@@ -331,10 +440,10 @@ xfs_trans_unlocked_item(
331 * the call to xfs_log_move_tail() doesn't do anything if there's 440 * the call to xfs_log_move_tail() doesn't do anything if there's
332 * not enough free space to wake people up so we're safe calling it. 441 * not enough free space to wake people up so we're safe calling it.
333 */ 442 */
334 min_lip = xfs_ail_min(&mp->m_ail); 443 min_lip = xfs_ail_min(ailp);
335 444
336 if (min_lip == lip) 445 if (min_lip == lip)
337 xfs_log_move_tail(mp, 1); 446 xfs_log_move_tail(ailp->xa_mount, 1);
338} /* xfs_trans_unlocked_item */ 447} /* xfs_trans_unlocked_item */
339 448
340 449
@@ -347,41 +456,37 @@ xfs_trans_unlocked_item(
347 * we move in the AIL is the minimum one, update the tail lsn in the 456 * we move in the AIL is the minimum one, update the tail lsn in the
348 * log manager. 457 * log manager.
349 * 458 *
350 * Increment the AIL's generation count to indicate that the tree
351 * has changed.
352 *
353 * This function must be called with the AIL lock held. The lock 459 * This function must be called with the AIL lock held. The lock
354 * is dropped before returning. 460 * is dropped before returning.
355 */ 461 */
356void 462void
357xfs_trans_update_ail( 463xfs_trans_ail_update(
358 xfs_mount_t *mp, 464 struct xfs_ail *ailp,
359 xfs_log_item_t *lip, 465 xfs_log_item_t *lip,
360 xfs_lsn_t lsn) __releases(mp->m_ail_lock) 466 xfs_lsn_t lsn) __releases(ailp->xa_lock)
361{ 467{
362 xfs_log_item_t *dlip=NULL; 468 xfs_log_item_t *dlip = NULL;
363 xfs_log_item_t *mlip; /* ptr to minimum lip */ 469 xfs_log_item_t *mlip; /* ptr to minimum lip */
364 470
365 mlip = xfs_ail_min(&mp->m_ail); 471 mlip = xfs_ail_min(ailp);
366 472
367 if (lip->li_flags & XFS_LI_IN_AIL) { 473 if (lip->li_flags & XFS_LI_IN_AIL) {
368 dlip = xfs_ail_delete(&mp->m_ail, lip); 474 dlip = xfs_ail_delete(ailp, lip);
369 ASSERT(dlip == lip); 475 ASSERT(dlip == lip);
476 xfs_trans_ail_cursor_clear(ailp, dlip);
370 } else { 477 } else {
371 lip->li_flags |= XFS_LI_IN_AIL; 478 lip->li_flags |= XFS_LI_IN_AIL;
372 } 479 }
373 480
374 lip->li_lsn = lsn; 481 lip->li_lsn = lsn;
375 482 xfs_ail_insert(ailp, lip);
376 xfs_ail_insert(&mp->m_ail, lip);
377 mp->m_ail.xa_gen++;
378 483
379 if (mlip == dlip) { 484 if (mlip == dlip) {
380 mlip = xfs_ail_min(&mp->m_ail); 485 mlip = xfs_ail_min(ailp);
381 spin_unlock(&mp->m_ail_lock); 486 spin_unlock(&ailp->xa_lock);
382 xfs_log_move_tail(mp, mlip->li_lsn); 487 xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn);
383 } else { 488 } else {
384 spin_unlock(&mp->m_ail_lock); 489 spin_unlock(&ailp->xa_lock);
385 } 490 }
386 491
387 492
@@ -403,29 +508,30 @@ xfs_trans_update_ail(
403 * is dropped before returning. 508 * is dropped before returning.
404 */ 509 */
405void 510void
406xfs_trans_delete_ail( 511xfs_trans_ail_delete(
407 xfs_mount_t *mp, 512 struct xfs_ail *ailp,
408 xfs_log_item_t *lip) __releases(mp->m_ail_lock) 513 xfs_log_item_t *lip) __releases(ailp->xa_lock)
409{ 514{
410 xfs_log_item_t *dlip; 515 xfs_log_item_t *dlip;
411 xfs_log_item_t *mlip; 516 xfs_log_item_t *mlip;
412 517
413 if (lip->li_flags & XFS_LI_IN_AIL) { 518 if (lip->li_flags & XFS_LI_IN_AIL) {
414 mlip = xfs_ail_min(&mp->m_ail); 519 mlip = xfs_ail_min(ailp);
415 dlip = xfs_ail_delete(&mp->m_ail, lip); 520 dlip = xfs_ail_delete(ailp, lip);
416 ASSERT(dlip == lip); 521 ASSERT(dlip == lip);
522 xfs_trans_ail_cursor_clear(ailp, dlip);
417 523
418 524
419 lip->li_flags &= ~XFS_LI_IN_AIL; 525 lip->li_flags &= ~XFS_LI_IN_AIL;
420 lip->li_lsn = 0; 526 lip->li_lsn = 0;
421 mp->m_ail.xa_gen++;
422 527
423 if (mlip == dlip) { 528 if (mlip == dlip) {
424 mlip = xfs_ail_min(&mp->m_ail); 529 mlip = xfs_ail_min(ailp);
425 spin_unlock(&mp->m_ail_lock); 530 spin_unlock(&ailp->xa_lock);
426 xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); 531 xfs_log_move_tail(ailp->xa_mount,
532 (mlip ? mlip->li_lsn : 0));
427 } else { 533 } else {
428 spin_unlock(&mp->m_ail_lock); 534 spin_unlock(&ailp->xa_lock);
429 } 535 }
430 } 536 }
431 else { 537 else {
@@ -433,13 +539,13 @@ xfs_trans_delete_ail(
433 * If the file system is not being shutdown, we are in 539 * If the file system is not being shutdown, we are in
434 * serious trouble if we get to this stage. 540 * serious trouble if we get to this stage.
435 */ 541 */
436 if (XFS_FORCED_SHUTDOWN(mp)) 542 struct xfs_mount *mp = ailp->xa_mount;
437 spin_unlock(&mp->m_ail_lock); 543
438 else { 544 spin_unlock(&ailp->xa_lock);
545 if (!XFS_FORCED_SHUTDOWN(mp)) {
439 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 546 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
440 "%s: attempting to delete a log item that is not in the AIL", 547 "%s: attempting to delete a log item that is not in the AIL",
441 __func__); 548 __func__);
442 spin_unlock(&mp->m_ail_lock);
443 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 549 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
444 } 550 }
445 } 551 }
@@ -448,56 +554,6 @@ xfs_trans_delete_ail(
448 554
449 555
450/* 556/*
451 * Return the item in the AIL with the smallest lsn.
452 * Return the current tree generation number for use
453 * in calls to xfs_trans_next_ail().
454 */
455xfs_log_item_t *
456xfs_trans_first_ail(
457 xfs_mount_t *mp,
458 int *gen)
459{
460 xfs_log_item_t *lip;
461
462 lip = xfs_ail_min(&mp->m_ail);
463 *gen = (int)mp->m_ail.xa_gen;
464
465 return lip;
466}
467
468/*
469 * If the generation count of the tree has not changed since the
470 * caller last took something from the AIL, then return the elmt
471 * in the tree which follows the one given. If the count has changed,
472 * then return the minimum elmt of the AIL and bump the restarts counter
473 * if one is given.
474 */
475xfs_log_item_t *
476xfs_trans_next_ail(
477 xfs_mount_t *mp,
478 xfs_log_item_t *lip,
479 int *gen,
480 int *restarts)
481{
482 xfs_log_item_t *nlip;
483
484 ASSERT(mp && lip && gen);
485 if (mp->m_ail.xa_gen == *gen) {
486 nlip = xfs_ail_next(&mp->m_ail, lip);
487 } else {
488 nlip = xfs_ail_min(&mp->m_ail);
489 *gen = (int)mp->m_ail.xa_gen;
490 if (restarts != NULL) {
491 XFS_STATS_INC(xs_push_ail_restarts);
492 (*restarts)++;
493 }
494 }
495
496 return (nlip);
497}
498
499
500/*
501 * The active item list (AIL) is a doubly linked list of log 557 * The active item list (AIL) is a doubly linked list of log
502 * items sorted by ascending lsn. The base of the list is 558 * items sorted by ascending lsn. The base of the list is
503 * a forw/back pointer pair embedded in the xfs mount structure. 559 * a forw/back pointer pair embedded in the xfs mount structure.
@@ -515,15 +571,35 @@ int
515xfs_trans_ail_init( 571xfs_trans_ail_init(
516 xfs_mount_t *mp) 572 xfs_mount_t *mp)
517{ 573{
518 INIT_LIST_HEAD(&mp->m_ail.xa_ail); 574 struct xfs_ail *ailp;
519 return xfsaild_start(mp); 575 int error;
576
577 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
578 if (!ailp)
579 return ENOMEM;
580
581 ailp->xa_mount = mp;
582 INIT_LIST_HEAD(&ailp->xa_ail);
583 spin_lock_init(&ailp->xa_lock);
584 error = xfsaild_start(ailp);
585 if (error)
586 goto out_free_ailp;
587 mp->m_ail = ailp;
588 return 0;
589
590out_free_ailp:
591 kmem_free(ailp);
592 return error;
520} 593}
521 594
522void 595void
523xfs_trans_ail_destroy( 596xfs_trans_ail_destroy(
524 xfs_mount_t *mp) 597 xfs_mount_t *mp)
525{ 598{
526 xfsaild_stop(mp); 599 struct xfs_ail *ailp = mp->m_ail;
600
601 xfsaild_stop(ailp);
602 kmem_free(ailp);
527} 603}
528 604
529/* 605/*
@@ -534,7 +610,7 @@ xfs_trans_ail_destroy(
534 */ 610 */
535STATIC void 611STATIC void
536xfs_ail_insert( 612xfs_ail_insert(
537 xfs_ail_t *ailp, 613 struct xfs_ail *ailp,
538 xfs_log_item_t *lip) 614 xfs_log_item_t *lip)
539/* ARGSUSED */ 615/* ARGSUSED */
540{ 616{
@@ -568,7 +644,7 @@ xfs_ail_insert(
568/*ARGSUSED*/ 644/*ARGSUSED*/
569STATIC xfs_log_item_t * 645STATIC xfs_log_item_t *
570xfs_ail_delete( 646xfs_ail_delete(
571 xfs_ail_t *ailp, 647 struct xfs_ail *ailp,
572 xfs_log_item_t *lip) 648 xfs_log_item_t *lip)
573/* ARGSUSED */ 649/* ARGSUSED */
574{ 650{
@@ -585,7 +661,7 @@ xfs_ail_delete(
585 */ 661 */
586STATIC xfs_log_item_t * 662STATIC xfs_log_item_t *
587xfs_ail_min( 663xfs_ail_min(
588 xfs_ail_t *ailp) 664 struct xfs_ail *ailp)
589/* ARGSUSED */ 665/* ARGSUSED */
590{ 666{
591 if (list_empty(&ailp->xa_ail)) 667 if (list_empty(&ailp->xa_ail))
@@ -601,7 +677,7 @@ xfs_ail_min(
601 */ 677 */
602STATIC xfs_log_item_t * 678STATIC xfs_log_item_t *
603xfs_ail_next( 679xfs_ail_next(
604 xfs_ail_t *ailp, 680 struct xfs_ail *ailp,
605 xfs_log_item_t *lip) 681 xfs_log_item_t *lip)
606/* ARGSUSED */ 682/* ARGSUSED */
607{ 683{
@@ -617,7 +693,7 @@ xfs_ail_next(
617 */ 693 */
618STATIC void 694STATIC void
619xfs_ail_check( 695xfs_ail_check(
620 xfs_ail_t *ailp, 696 struct xfs_ail *ailp,
621 xfs_log_item_t *lip) 697 xfs_log_item_t *lip)
622{ 698{
623 xfs_log_item_t *prev_lip; 699 xfs_log_item_t *prev_lip;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4e855b5ced66..8ee2f8c8b0a6 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp,
527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 527 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
528 if (lip->li_type == XFS_LI_BUF) { 528 if (lip->li_type == XFS_LI_BUF) {
529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); 529 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
530 xfs_trans_unlocked_item( 530 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
531 bip->bli_item.li_mountp, 531 lip);
532 lip);
533 } 532 }
534 } 533 }
535 xfs_buf_relse(bp); 534 xfs_buf_relse(bp);
@@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
626 * tell the AIL that the buffer is being unlocked. 625 * tell the AIL that the buffer is being unlocked.
627 */ 626 */
628 if (bip != NULL) { 627 if (bip != NULL) {
629 xfs_trans_unlocked_item(bip->bli_item.li_mountp, 628 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
630 (xfs_log_item_t*)bip); 629 (xfs_log_item_t*)bip);
631 } 630 }
632 631
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 2a1c0f071f91..23d276af2e0c 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -85,7 +85,6 @@ xfs_trans_iget(
85{ 85{
86 int error; 86 int error;
87 xfs_inode_t *ip; 87 xfs_inode_t *ip;
88 xfs_inode_log_item_t *iip;
89 88
90 /* 89 /*
91 * If the transaction pointer is NULL, just call the normal 90 * If the transaction pointer is NULL, just call the normal
@@ -138,34 +137,7 @@ xfs_trans_iget(
138 } 137 }
139 ASSERT(ip != NULL); 138 ASSERT(ip != NULL);
140 139
141 /* 140 xfs_trans_ijoin(tp, ip, lock_flags);
142 * Get a log_item_desc to point at the new item.
143 */
144 if (ip->i_itemp == NULL)
145 xfs_inode_item_init(ip, mp);
146 iip = ip->i_itemp;
147 (void) xfs_trans_add_item(tp, (xfs_log_item_t *)(iip));
148
149 xfs_trans_inode_broot_debug(ip);
150
151 /*
152 * If the IO lock has been acquired, mark that in
153 * the inode log item so we'll know to unlock it
154 * when the transaction commits.
155 */
156 ASSERT(iip->ili_flags == 0);
157 if (lock_flags & XFS_IOLOCK_EXCL) {
158 iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL;
159 } else if (lock_flags & XFS_IOLOCK_SHARED) {
160 iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED;
161 }
162
163 /*
164 * Initialize i_transp so we can find it with xfs_inode_incore()
165 * above.
166 */
167 ip->i_transp = tp;
168
169 *ipp = ip; 141 *ipp = ip;
170 return 0; 142 return 0;
171} 143}
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 3c666e8317f8..e110bf57d7f4 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -22,6 +22,14 @@
22#include "xfs_inum.h" 22#include "xfs_inum.h"
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_trans_priv.h" 24#include "xfs_trans_priv.h"
25/* XXX: from here down needed until struct xfs_trans has it's own ailp */
26#include "xfs_bit.h"
27#include "xfs_buf_item.h"
28#include "xfs_sb.h"
29#include "xfs_ag.h"
30#include "xfs_dir2.h"
31#include "xfs_dmapi.h"
32#include "xfs_mount.h"
25 33
26STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, 34STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *,
27 int, int, xfs_lsn_t); 35 int, int, xfs_lsn_t);
@@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
79 lidp->lid_size = 0; 87 lidp->lid_size = 0;
80 lip->li_desc = lidp; 88 lip->li_desc = lidp;
81 lip->li_mountp = tp->t_mountp; 89 lip->li_mountp = tp->t_mountp;
90 lip->li_ailp = tp->t_mountp->m_ail;
82 return lidp; 91 return lidp;
83 } 92 }
84 93
@@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip)
120 lidp->lid_size = 0; 129 lidp->lid_size = 0;
121 lip->li_desc = lidp; 130 lip->li_desc = lidp;
122 lip->li_mountp = tp->t_mountp; 131 lip->li_mountp = tp->t_mountp;
132 lip->li_ailp = tp->t_mountp->m_ail;
123 return lidp; 133 return lidp;
124} 134}
125 135
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 3c748c456ed4..73e2ad397432 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -44,25 +44,93 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
44 xfs_extlen_t idx); 44 xfs_extlen_t idx);
45 45
46/* 46/*
47 * From xfs_trans_ail.c 47 * AIL traversal cursor.
48 *
49 * Rather than using a generation number for detecting changes in the ail, use
50 * a cursor that is protected by the ail lock. The aild cursor exists in the
51 * struct xfs_ail, but other traversals can declare it on the stack and link it
52 * to the ail list.
53 *
54 * When an object is deleted from or moved int the AIL, the cursor list is
55 * searched to see if the object is a designated cursor item. If it is, it is
56 * deleted from the cursor so that the next time the cursor is used traversal
57 * will return to the start.
58 *
59 * This means a traversal colliding with a removal will cause a restart of the
60 * list scan, rather than any insertion or deletion anywhere in the list. The
61 * low bit of the item pointer is set if the cursor has been invalidated so
62 * that we can tell the difference between invalidation and reaching the end
63 * of the list to trigger traversal restarts.
48 */ 64 */
49void xfs_trans_update_ail(struct xfs_mount *mp, 65struct xfs_ail_cursor {
50 struct xfs_log_item *lip, xfs_lsn_t lsn) 66 struct xfs_ail_cursor *next;
51 __releases(mp->m_ail_lock); 67 struct xfs_log_item *item;
52void xfs_trans_delete_ail(struct xfs_mount *mp, 68};
53 struct xfs_log_item *lip)
54 __releases(mp->m_ail_lock);
55struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *);
56struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *,
57 struct xfs_log_item *, int *, int *);
58 69
70/*
71 * Private AIL structures.
72 *
73 * Eventually we need to drive the locking in here as well.
74 */
75struct xfs_ail {
76 struct xfs_mount *xa_mount;
77 struct list_head xa_ail;
78 uint xa_gen;
79 struct task_struct *xa_task;
80 xfs_lsn_t xa_target;
81 struct xfs_ail_cursor xa_cursors;
82 spinlock_t xa_lock;
83};
59 84
60/* 85/*
61 * AIL push thread support 86 * From xfs_trans_ail.c
62 */ 87 */
63long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); 88void xfs_trans_ail_update(struct xfs_ail *ailp,
64void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); 89 struct xfs_log_item *lip, xfs_lsn_t lsn)
65int xfsaild_start(struct xfs_mount *); 90 __releases(ailp->xa_lock);
66void xfsaild_stop(struct xfs_mount *); 91void xfs_trans_ail_delete(struct xfs_ail *ailp,
92 struct xfs_log_item *lip)
93 __releases(ailp->xa_lock);
94void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t);
95void xfs_trans_unlocked_item(struct xfs_ail *,
96 xfs_log_item_t *);
97
98xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
99
100struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
101 struct xfs_ail_cursor *cur,
102 xfs_lsn_t lsn);
103struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
104 struct xfs_ail_cursor *cur);
105void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
106 struct xfs_ail_cursor *cur);
107
108long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
109void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
110int xfsaild_start(struct xfs_ail *);
111void xfsaild_stop(struct xfs_ail *);
67 112
113#if BITS_PER_LONG != 64
114static inline void
115xfs_trans_ail_copy_lsn(
116 struct xfs_ail *ailp,
117 xfs_lsn_t *dst,
118 xfs_lsn_t *src)
119{
120 ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
121 spin_lock(&ailp->xa_lock);
122 *dst = *src;
123 spin_unlock(&ailp->xa_lock);
124}
125#else
126static inline void
127xfs_trans_ail_copy_lsn(
128 struct xfs_ail *ailp,
129 xfs_lsn_t *dst,
130 xfs_lsn_t *src)
131{
132 ASSERT(sizeof(xfs_lsn_t) == 8);
133 *dst = *src;
134}
135#endif
68#endif /* __XFS_TRANS_PRIV_H__ */ 136#endif /* __XFS_TRANS_PRIV_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 35d4d414bcc2..fcc2285d03ed 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -172,6 +172,12 @@ xfs_dir_ialloc(
172 *ipp = NULL; 172 *ipp = NULL;
173 return code; 173 return code;
174 } 174 }
175
176 /*
177 * transaction commit worked ok so we can drop the extra ticket
178 * reference that we gained in xfs_trans_dup()
179 */
180 xfs_log_ticket_put(tp->t_ticket);
175 code = xfs_trans_reserve(tp, 0, log_res, 0, 181 code = xfs_trans_reserve(tp, 0, log_res, 0,
176 XFS_TRANS_PERM_LOG_RES, log_count); 182 XFS_TRANS_PERM_LOG_RES, log_count);
177 /* 183 /*
@@ -268,9 +274,9 @@ xfs_bump_ino_vers2(
268 xfs_mount_t *mp; 274 xfs_mount_t *mp;
269 275
270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 276 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 277 ASSERT(ip->i_d.di_version == 1);
272 278
273 ip->i_d.di_version = XFS_DINODE_VERSION_2; 279 ip->i_d.di_version = 2;
274 ip->i_d.di_onlink = 0; 280 ip->i_d.di_onlink = 0;
275 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); 281 memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
276 mp = tp->t_mountp; 282 mp = tp->t_mountp;
@@ -302,7 +308,7 @@ xfs_bumplink(
302 ASSERT(ip->i_d.di_nlink > 0); 308 ASSERT(ip->i_d.di_nlink > 0);
303 ip->i_d.di_nlink++; 309 ip->i_d.di_nlink++;
304 inc_nlink(VFS_I(ip)); 310 inc_nlink(VFS_I(ip));
305 if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && 311 if ((ip->i_d.di_version == 1) &&
306 (ip->i_d.di_nlink > XFS_MAXLINK_1)) { 312 (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
307 /* 313 /*
308 * The inode has increased its number of links beyond 314 * The inode has increased its number of links beyond
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
deleted file mode 100644
index 439dd3939dda..000000000000
--- a/fs/xfs/xfs_vfsops.c
+++ /dev/null
@@ -1,757 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_bit.h"
22#include "xfs_log.h"
23#include "xfs_inum.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
30#include "xfs_da_btree.h"
31#include "xfs_bmap_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_alloc_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_dinode.h"
37#include "xfs_inode.h"
38#include "xfs_inode_item.h"
39#include "xfs_btree.h"
40#include "xfs_alloc.h"
41#include "xfs_ialloc.h"
42#include "xfs_quota.h"
43#include "xfs_error.h"
44#include "xfs_bmap.h"
45#include "xfs_rw.h"
46#include "xfs_buf_item.h"
47#include "xfs_log_priv.h"
48#include "xfs_dir2_trace.h"
49#include "xfs_extfree_item.h"
50#include "xfs_acl.h"
51#include "xfs_attr.h"
52#include "xfs_clnt.h"
53#include "xfs_mru_cache.h"
54#include "xfs_filestream.h"
55#include "xfs_fsops.h"
56#include "xfs_vnodeops.h"
57#include "xfs_vfsops.h"
58#include "xfs_utils.h"
59
60
61STATIC void
62xfs_quiesce_fs(
63 xfs_mount_t *mp)
64{
65 int count = 0, pincount;
66
67 xfs_flush_buftarg(mp->m_ddev_targp, 0);
68 xfs_finish_reclaim_all(mp, 0);
69
70 /* This loop must run at least twice.
71 * The first instance of the loop will flush
72 * most meta data but that will generate more
73 * meta data (typically directory updates).
74 * Which then must be flushed and logged before
75 * we can write the unmount record.
76 */
77 do {
78 xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL);
79 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
80 if (!pincount) {
81 delay(50);
82 count++;
83 }
84 } while (count < 2);
85}
86
87/*
88 * Second stage of a quiesce. The data is already synced, now we have to take
89 * care of the metadata. New transactions are already blocked, so we need to
90 * wait for any remaining transactions to drain out before proceding.
91 */
92void
93xfs_attr_quiesce(
94 xfs_mount_t *mp)
95{
96 int error = 0;
97
98 /* wait for all modifications to complete */
99 while (atomic_read(&mp->m_active_trans) > 0)
100 delay(100);
101
102 /* flush inodes and push all remaining buffers out to disk */
103 xfs_quiesce_fs(mp);
104
105 ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
106
107 /* Push the superblock and write an unmount record */
108 error = xfs_log_sbcount(mp, 1);
109 if (error)
110 xfs_fs_cmn_err(CE_WARN, mp,
111 "xfs_attr_quiesce: failed to log sb changes. "
112 "Frozen image may not be consistent.");
113 xfs_log_unmount_write(mp);
114 xfs_unmountfs_writesb(mp);
115}
116
117/*
118 * xfs_unmount_flush implements a set of flush operation on special
119 * inodes, which are needed as a separate set of operations so that
120 * they can be called as part of relocation process.
121 */
122int
123xfs_unmount_flush(
124 xfs_mount_t *mp, /* Mount structure we are getting
125 rid of. */
126 int relocation) /* Called from vfs relocation. */
127{
128 xfs_inode_t *rip = mp->m_rootip;
129 xfs_inode_t *rbmip;
130 xfs_inode_t *rsumip = NULL;
131 int error;
132
133 xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
134 xfs_iflock(rip);
135
136 /*
137 * Flush out the real time inodes.
138 */
139 if ((rbmip = mp->m_rbmip) != NULL) {
140 xfs_ilock(rbmip, XFS_ILOCK_EXCL);
141 xfs_iflock(rbmip);
142 error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC);
143 xfs_iunlock(rbmip, XFS_ILOCK_EXCL);
144
145 if (error == EFSCORRUPTED)
146 goto fscorrupt_out;
147
148 ASSERT(vn_count(VFS_I(rbmip)) == 1);
149
150 rsumip = mp->m_rsumip;
151 xfs_ilock(rsumip, XFS_ILOCK_EXCL);
152 xfs_iflock(rsumip);
153 error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC);
154 xfs_iunlock(rsumip, XFS_ILOCK_EXCL);
155
156 if (error == EFSCORRUPTED)
157 goto fscorrupt_out;
158
159 ASSERT(vn_count(VFS_I(rsumip)) == 1);
160 }
161
162 /*
163 * Synchronously flush root inode to disk
164 */
165 error = xfs_iflush(rip, XFS_IFLUSH_SYNC);
166 if (error == EFSCORRUPTED)
167 goto fscorrupt_out2;
168
169 if (vn_count(VFS_I(rip)) != 1 && !relocation) {
170 xfs_iunlock(rip, XFS_ILOCK_EXCL);
171 return XFS_ERROR(EBUSY);
172 }
173
174 /*
175 * Release dquot that rootinode, rbmino and rsumino might be holding,
176 * flush and purge the quota inodes.
177 */
178 error = XFS_QM_UNMOUNT(mp);
179 if (error == EFSCORRUPTED)
180 goto fscorrupt_out2;
181
182 if (rbmip) {
183 IRELE(rbmip);
184 IRELE(rsumip);
185 }
186
187 xfs_iunlock(rip, XFS_ILOCK_EXCL);
188 return 0;
189
190fscorrupt_out:
191 xfs_ifunlock(rip);
192
193fscorrupt_out2:
194 xfs_iunlock(rip, XFS_ILOCK_EXCL);
195
196 return XFS_ERROR(EFSCORRUPTED);
197}
198
199/*
200 * xfs_sync flushes any pending I/O to file system vfsp.
201 *
202 * This routine is called by vfs_sync() to make sure that things make it
203 * out to disk eventually, on sync() system calls to flush out everything,
204 * and when the file system is unmounted. For the vfs_sync() case, all
205 * we really need to do is sync out the log to make all of our meta-data
206 * updates permanent (except for timestamps). For calls from pflushd(),
207 * dirty pages are kept moving by calling pdflush() on the inodes
208 * containing them. We also flush the inodes that we can lock without
209 * sleeping and the superblock if we can lock it without sleeping from
210 * vfs_sync() so that items at the tail of the log are always moving out.
211 *
212 * Flags:
213 * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want
214 * to sleep if we can help it. All we really need
215 * to do is ensure that the log is synced at least
216 * periodically. We also push the inodes and
217 * superblock if we can lock them without sleeping
218 * and they are not pinned.
219 * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not
220 * set, then we really want to lock each inode and flush
221 * it.
222 * SYNC_WAIT - All the flushes that take place in this call should
223 * be synchronous.
224 * SYNC_DELWRI - This tells us to push dirty pages associated with
225 * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to
226 * determine if they should be flushed sync, async, or
227 * delwri.
228 * SYNC_CLOSE - This flag is passed when the system is being
229 * unmounted. We should sync and invalidate everything.
230 * SYNC_FSDATA - This indicates that the caller would like to make
231 * sure the superblock is safe on disk. We can ensure
232 * this by simply making sure the log gets flushed
233 * if SYNC_BDFLUSH is set, and by actually writing it
234 * out otherwise.
235 * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
236 * before we return (including direct I/O). Forms the drain
237 * side of the write barrier needed to safely quiesce the
238 * filesystem.
239 *
240 */
241int
242xfs_sync(
243 xfs_mount_t *mp,
244 int flags)
245{
246 int error;
247
248 /*
249 * Get the Quota Manager to flush the dquots.
250 *
251 * If XFS quota support is not enabled or this filesystem
252 * instance does not use quotas XFS_QM_DQSYNC will always
253 * return zero.
254 */
255 error = XFS_QM_DQSYNC(mp, flags);
256 if (error) {
257 /*
258 * If we got an IO error, we will be shutting down.
259 * So, there's nothing more for us to do here.
260 */
261 ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp));
262 if (XFS_FORCED_SHUTDOWN(mp))
263 return XFS_ERROR(error);
264 }
265
266 if (flags & SYNC_IOWAIT)
267 xfs_filestream_flush(mp);
268
269 return xfs_syncsub(mp, flags, NULL);
270}
271
272/*
273 * xfs sync routine for internal use
274 *
275 * This routine supports all of the flags defined for the generic vfs_sync
276 * interface as explained above under xfs_sync.
277 *
278 */
279int
280xfs_sync_inodes(
281 xfs_mount_t *mp,
282 int flags,
283 int *bypassed)
284{
285 xfs_inode_t *ip = NULL;
286 struct inode *vp = NULL;
287 int error;
288 int last_error;
289 uint64_t fflag;
290 uint lock_flags;
291 uint base_lock_flags;
292 boolean_t mount_locked;
293 boolean_t vnode_refed;
294 int preempt;
295 xfs_iptr_t *ipointer;
296#ifdef DEBUG
297 boolean_t ipointer_in = B_FALSE;
298
299#define IPOINTER_SET ipointer_in = B_TRUE
300#define IPOINTER_CLR ipointer_in = B_FALSE
301#else
302#define IPOINTER_SET
303#define IPOINTER_CLR
304#endif
305
306
307/* Insert a marker record into the inode list after inode ip. The list
308 * must be locked when this is called. After the call the list will no
309 * longer be locked.
310 */
311#define IPOINTER_INSERT(ip, mp) { \
312 ASSERT(ipointer_in == B_FALSE); \
313 ipointer->ip_mnext = ip->i_mnext; \
314 ipointer->ip_mprev = ip; \
315 ip->i_mnext = (xfs_inode_t *)ipointer; \
316 ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
317 preempt = 0; \
318 XFS_MOUNT_IUNLOCK(mp); \
319 mount_locked = B_FALSE; \
320 IPOINTER_SET; \
321 }
322
323/* Remove the marker from the inode list. If the marker was the only item
324 * in the list then there are no remaining inodes and we should zero out
325 * the whole list. If we are the current head of the list then move the head
326 * past us.
327 */
328#define IPOINTER_REMOVE(ip, mp) { \
329 ASSERT(ipointer_in == B_TRUE); \
330 if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
331 ip = ipointer->ip_mnext; \
332 ip->i_mprev = ipointer->ip_mprev; \
333 ipointer->ip_mprev->i_mnext = ip; \
334 if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
335 mp->m_inodes = ip; \
336 } \
337 } else { \
338 ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
339 mp->m_inodes = NULL; \
340 ip = NULL; \
341 } \
342 IPOINTER_CLR; \
343 }
344
345#define XFS_PREEMPT_MASK 0x7f
346
347 ASSERT(!(flags & SYNC_BDFLUSH));
348
349 if (bypassed)
350 *bypassed = 0;
351 if (mp->m_flags & XFS_MOUNT_RDONLY)
352 return 0;
353 error = 0;
354 last_error = 0;
355 preempt = 0;
356
357 /* Allocate a reference marker */
358 ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
359
360 fflag = XFS_B_ASYNC; /* default is don't wait */
361 if (flags & SYNC_DELWRI)
362 fflag = XFS_B_DELWRI;
363 if (flags & SYNC_WAIT)
364 fflag = 0; /* synchronous overrides all */
365
366 base_lock_flags = XFS_ILOCK_SHARED;
367 if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
368 /*
369 * We need the I/O lock if we're going to call any of
370 * the flush/inval routines.
371 */
372 base_lock_flags |= XFS_IOLOCK_SHARED;
373 }
374
375 XFS_MOUNT_ILOCK(mp);
376
377 ip = mp->m_inodes;
378
379 mount_locked = B_TRUE;
380 vnode_refed = B_FALSE;
381
382 IPOINTER_CLR;
383
384 do {
385 ASSERT(ipointer_in == B_FALSE);
386 ASSERT(vnode_refed == B_FALSE);
387
388 lock_flags = base_lock_flags;
389
390 /*
391 * There were no inodes in the list, just break out
392 * of the loop.
393 */
394 if (ip == NULL) {
395 break;
396 }
397
398 /*
399 * We found another sync thread marker - skip it
400 */
401 if (ip->i_mount == NULL) {
402 ip = ip->i_mnext;
403 continue;
404 }
405
406 vp = VFS_I(ip);
407
408 /*
409 * If the vnode is gone then this is being torn down,
410 * call reclaim if it is flushed, else let regular flush
411 * code deal with it later in the loop.
412 */
413
414 if (vp == NULL) {
415 /* Skip ones already in reclaim */
416 if (ip->i_flags & XFS_IRECLAIM) {
417 ip = ip->i_mnext;
418 continue;
419 }
420 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
421 ip = ip->i_mnext;
422 } else if ((xfs_ipincount(ip) == 0) &&
423 xfs_iflock_nowait(ip)) {
424 IPOINTER_INSERT(ip, mp);
425
426 xfs_finish_reclaim(ip, 1,
427 XFS_IFLUSH_DELWRI_ELSE_ASYNC);
428
429 XFS_MOUNT_ILOCK(mp);
430 mount_locked = B_TRUE;
431 IPOINTER_REMOVE(ip, mp);
432 } else {
433 xfs_iunlock(ip, XFS_ILOCK_EXCL);
434 ip = ip->i_mnext;
435 }
436 continue;
437 }
438
439 if (VN_BAD(vp)) {
440 ip = ip->i_mnext;
441 continue;
442 }
443
444 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
445 XFS_MOUNT_IUNLOCK(mp);
446 kmem_free(ipointer);
447 return 0;
448 }
449
450 /*
451 * Try to lock without sleeping. We're out of order with
452 * the inode list lock here, so if we fail we need to drop
453 * the mount lock and try again. If we're called from
454 * bdflush() here, then don't bother.
455 *
456 * The inode lock here actually coordinates with the
457 * almost spurious inode lock in xfs_ireclaim() to prevent
458 * the vnode we handle here without a reference from
459 * being freed while we reference it. If we lock the inode
460 * while it's on the mount list here, then the spurious inode
461 * lock in xfs_ireclaim() after the inode is pulled from
462 * the mount list will sleep until we release it here.
463 * This keeps the vnode from being freed while we reference
464 * it.
465 */
466 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
467 if (vp == NULL) {
468 ip = ip->i_mnext;
469 continue;
470 }
471
472 vp = vn_grab(vp);
473 if (vp == NULL) {
474 ip = ip->i_mnext;
475 continue;
476 }
477
478 IPOINTER_INSERT(ip, mp);
479 xfs_ilock(ip, lock_flags);
480
481 ASSERT(vp == VFS_I(ip));
482 ASSERT(ip->i_mount == mp);
483
484 vnode_refed = B_TRUE;
485 }
486
487 /* From here on in the loop we may have a marker record
488 * in the inode list.
489 */
490
491 /*
492 * If we have to flush data or wait for I/O completion
493 * we need to drop the ilock that we currently hold.
494 * If we need to drop the lock, insert a marker if we
495 * have not already done so.
496 */
497 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) ||
498 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
499 if (mount_locked) {
500 IPOINTER_INSERT(ip, mp);
501 }
502 xfs_iunlock(ip, XFS_ILOCK_SHARED);
503
504 if (flags & SYNC_CLOSE) {
505 /* Shutdown case. Flush and invalidate. */
506 if (XFS_FORCED_SHUTDOWN(mp))
507 xfs_tosspages(ip, 0, -1,
508 FI_REMAPF);
509 else
510 error = xfs_flushinval_pages(ip,
511 0, -1, FI_REMAPF);
512 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
513 error = xfs_flush_pages(ip, 0,
514 -1, fflag, FI_NONE);
515 }
516
517 /*
518 * When freezing, we need to wait ensure all I/O (including direct
519 * I/O) is complete to ensure no further data modification can take
520 * place after this point
521 */
522 if (flags & SYNC_IOWAIT)
523 vn_iowait(ip);
524
525 xfs_ilock(ip, XFS_ILOCK_SHARED);
526 }
527
528 if ((flags & SYNC_ATTR) &&
529 (ip->i_update_core ||
530 (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) {
531 if (mount_locked)
532 IPOINTER_INSERT(ip, mp);
533
534 if (flags & SYNC_WAIT) {
535 xfs_iflock(ip);
536 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
537
538 /*
539 * If we can't acquire the flush lock, then the inode
540 * is already being flushed so don't bother waiting.
541 *
542 * If we can lock it then do a delwri flush so we can
543 * combine multiple inode flushes in each disk write.
544 */
545 } else if (xfs_iflock_nowait(ip)) {
546 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
547 } else if (bypassed) {
548 (*bypassed)++;
549 }
550 }
551
552 if (lock_flags != 0) {
553 xfs_iunlock(ip, lock_flags);
554 }
555
556 if (vnode_refed) {
557 /*
558 * If we had to take a reference on the vnode
559 * above, then wait until after we've unlocked
560 * the inode to release the reference. This is
561 * because we can be already holding the inode
562 * lock when IRELE() calls xfs_inactive().
563 *
564 * Make sure to drop the mount lock before calling
565 * IRELE() so that we don't trip over ourselves if
566 * we have to go for the mount lock again in the
567 * inactive code.
568 */
569 if (mount_locked) {
570 IPOINTER_INSERT(ip, mp);
571 }
572
573 IRELE(ip);
574
575 vnode_refed = B_FALSE;
576 }
577
578 if (error) {
579 last_error = error;
580 }
581
582 /*
583 * bail out if the filesystem is corrupted.
584 */
585 if (error == EFSCORRUPTED) {
586 if (!mount_locked) {
587 XFS_MOUNT_ILOCK(mp);
588 IPOINTER_REMOVE(ip, mp);
589 }
590 XFS_MOUNT_IUNLOCK(mp);
591 ASSERT(ipointer_in == B_FALSE);
592 kmem_free(ipointer);
593 return XFS_ERROR(error);
594 }
595
596 /* Let other threads have a chance at the mount lock
597 * if we have looped many times without dropping the
598 * lock.
599 */
600 if ((++preempt & XFS_PREEMPT_MASK) == 0) {
601 if (mount_locked) {
602 IPOINTER_INSERT(ip, mp);
603 }
604 }
605
606 if (mount_locked == B_FALSE) {
607 XFS_MOUNT_ILOCK(mp);
608 mount_locked = B_TRUE;
609 IPOINTER_REMOVE(ip, mp);
610 continue;
611 }
612
613 ASSERT(ipointer_in == B_FALSE);
614 ip = ip->i_mnext;
615
616 } while (ip != mp->m_inodes);
617
618 XFS_MOUNT_IUNLOCK(mp);
619
620 ASSERT(ipointer_in == B_FALSE);
621
622 kmem_free(ipointer);
623 return XFS_ERROR(last_error);
624}
625
626/*
627 * xfs sync routine for internal use
628 *
629 * This routine supports all of the flags defined for the generic vfs_sync
630 * interface as explained above under xfs_sync.
631 *
632 */
633int
634xfs_syncsub(
635 xfs_mount_t *mp,
636 int flags,
637 int *bypassed)
638{
639 int error = 0;
640 int last_error = 0;
641 uint log_flags = XFS_LOG_FORCE;
642 xfs_buf_t *bp;
643 xfs_buf_log_item_t *bip;
644
645 /*
646 * Sync out the log. This ensures that the log is periodically
647 * flushed even if there is not enough activity to fill it up.
648 */
649 if (flags & SYNC_WAIT)
650 log_flags |= XFS_LOG_SYNC;
651
652 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
653
654 if (flags & (SYNC_ATTR|SYNC_DELWRI)) {
655 if (flags & SYNC_BDFLUSH)
656 xfs_finish_reclaim_all(mp, 1);
657 else
658 error = xfs_sync_inodes(mp, flags, bypassed);
659 }
660
661 /*
662 * Flushing out dirty data above probably generated more
663 * log activity, so if this isn't vfs_sync() then flush
664 * the log again.
665 */
666 if (flags & SYNC_DELWRI) {
667 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
668 }
669
670 if (flags & SYNC_FSDATA) {
671 /*
672 * If this is vfs_sync() then only sync the superblock
673 * if we can lock it without sleeping and it is not pinned.
674 */
675 if (flags & SYNC_BDFLUSH) {
676 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
677 if (bp != NULL) {
678 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
679 if ((bip != NULL) &&
680 xfs_buf_item_dirty(bip)) {
681 if (!(XFS_BUF_ISPINNED(bp))) {
682 XFS_BUF_ASYNC(bp);
683 error = xfs_bwrite(mp, bp);
684 } else {
685 xfs_buf_relse(bp);
686 }
687 } else {
688 xfs_buf_relse(bp);
689 }
690 }
691 } else {
692 bp = xfs_getsb(mp, 0);
693 /*
694 * If the buffer is pinned then push on the log so
695 * we won't get stuck waiting in the write for
696 * someone, maybe ourselves, to flush the log.
697 * Even though we just pushed the log above, we
698 * did not have the superblock buffer locked at
699 * that point so it can become pinned in between
700 * there and here.
701 */
702 if (XFS_BUF_ISPINNED(bp))
703 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
704 if (flags & SYNC_WAIT)
705 XFS_BUF_UNASYNC(bp);
706 else
707 XFS_BUF_ASYNC(bp);
708 error = xfs_bwrite(mp, bp);
709 }
710 if (error) {
711 last_error = error;
712 }
713 }
714
715 /*
716 * Now check to see if the log needs a "dummy" transaction.
717 */
718 if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) {
719 xfs_trans_t *tp;
720 xfs_inode_t *ip;
721
722 /*
723 * Put a dummy transaction in the log to tell
724 * recovery that all others are OK.
725 */
726 tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
727 if ((error = xfs_trans_reserve(tp, 0,
728 XFS_ICHANGE_LOG_RES(mp),
729 0, 0, 0))) {
730 xfs_trans_cancel(tp, 0);
731 return error;
732 }
733
734 ip = mp->m_rootip;
735 xfs_ilock(ip, XFS_ILOCK_EXCL);
736
737 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
738 xfs_trans_ihold(tp, ip);
739 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
740 error = xfs_trans_commit(tp, 0);
741 xfs_iunlock(ip, XFS_ILOCK_EXCL);
742 xfs_log_force(mp, (xfs_lsn_t)0, log_flags);
743 }
744
745 /*
746 * When shutting down, we need to insure that the AIL is pushed
747 * to disk or the filesystem can appear corrupt from the PROM.
748 */
749 if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) {
750 XFS_bflush(mp->m_ddev_targp);
751 if (mp->m_rtdev_targp) {
752 XFS_bflush(mp->m_rtdev_targp);
753 }
754 }
755
756 return XFS_ERROR(last_error);
757}
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
deleted file mode 100644
index a74b05087da4..000000000000
--- a/fs/xfs/xfs_vfsops.h
+++ /dev/null
@@ -1,16 +0,0 @@
1#ifndef _XFS_VFSOPS_H
2#define _XFS_VFSOPS_H 1
3
4struct cred;
5struct xfs_fid;
6struct inode;
7struct kstatfs;
8struct xfs_mount;
9struct xfs_mount_args;
10
11int xfs_sync(struct xfs_mount *mp, int flags);
12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
13 int lnnum);
14void xfs_attr_quiesce(struct xfs_mount *mp);
15
16#endif /* _XFS_VFSOPS_H */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 8b6812f66a15..f07bf8768c3a 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -54,33 +54,10 @@
54#include "xfs_vnodeops.h" 54#include "xfs_vnodeops.h"
55 55
56int 56int
57xfs_open(
58 xfs_inode_t *ip)
59{
60 int mode;
61
62 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
63 return XFS_ERROR(EIO);
64
65 /*
66 * If it's a directory with any blocks, read-ahead block 0
67 * as we're almost certain to have the next operation be a read there.
68 */
69 if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) {
70 mode = xfs_ilock_map_shared(ip);
71 if (ip->i_d.di_nextents > 0)
72 (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
73 xfs_iunlock(ip, mode);
74 }
75 return 0;
76}
77
78int
79xfs_setattr( 57xfs_setattr(
80 struct xfs_inode *ip, 58 struct xfs_inode *ip,
81 struct iattr *iattr, 59 struct iattr *iattr,
82 int flags, 60 int flags)
83 cred_t *credp)
84{ 61{
85 xfs_mount_t *mp = ip->i_mount; 62 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = VFS_I(ip); 63 struct inode *inode = VFS_I(ip);
@@ -93,7 +70,6 @@ xfs_setattr(
93 gid_t gid=0, igid=0; 70 gid_t gid=0, igid=0;
94 int timeflags = 0; 71 int timeflags = 0;
95 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 72 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
96 int file_owner;
97 int need_iolock = 1; 73 int need_iolock = 1;
98 74
99 xfs_itrace_entry(ip); 75 xfs_itrace_entry(ip);
@@ -104,6 +80,10 @@ xfs_setattr(
104 if (XFS_FORCED_SHUTDOWN(mp)) 80 if (XFS_FORCED_SHUTDOWN(mp))
105 return XFS_ERROR(EIO); 81 return XFS_ERROR(EIO);
106 82
83 code = -inode_change_ok(inode, iattr);
84 if (code)
85 return code;
86
107 olddquot1 = olddquot2 = NULL; 87 olddquot1 = olddquot2 = NULL;
108 udqp = gdqp = NULL; 88 udqp = gdqp = NULL;
109 89
@@ -181,62 +161,8 @@ xfs_setattr(
181 161
182 xfs_ilock(ip, lock_flags); 162 xfs_ilock(ip, lock_flags);
183 163
184 /* boolean: are we the file owner? */
185 file_owner = (current_fsuid() == ip->i_d.di_uid);
186
187 /*
188 * Change various properties of a file.
189 * Only the owner or users with CAP_FOWNER
190 * capability may do these things.
191 */
192 if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
193 /*
194 * CAP_FOWNER overrides the following restrictions:
195 *
196 * The user ID of the calling process must be equal
197 * to the file owner ID, except in cases where the
198 * CAP_FSETID capability is applicable.
199 */
200 if (!file_owner && !capable(CAP_FOWNER)) {
201 code = XFS_ERROR(EPERM);
202 goto error_return;
203 }
204
205 /*
206 * CAP_FSETID overrides the following restrictions:
207 *
208 * The effective user ID of the calling process shall match
209 * the file owner when setting the set-user-ID and
210 * set-group-ID bits on that file.
211 *
212 * The effective group ID or one of the supplementary group
213 * IDs of the calling process shall match the group owner of
214 * the file when setting the set-group-ID bit on that file
215 */
216 if (mask & ATTR_MODE) {
217 mode_t m = 0;
218
219 if ((iattr->ia_mode & S_ISUID) && !file_owner)
220 m |= S_ISUID;
221 if ((iattr->ia_mode & S_ISGID) &&
222 !in_group_p((gid_t)ip->i_d.di_gid))
223 m |= S_ISGID;
224#if 0
225 /* Linux allows this, Irix doesn't. */
226 if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
227 m |= S_ISVTX;
228#endif
229 if (m && !capable(CAP_FSETID))
230 iattr->ia_mode &= ~m;
231 }
232 }
233
234 /* 164 /*
235 * Change file ownership. Must be the owner or privileged. 165 * Change file ownership. Must be the owner or privileged.
236 * If the system was configured with the "restricted_chown"
237 * option, the owner is not permitted to give away the file,
238 * and can change the group id only to a group of which he
239 * or she is a member.
240 */ 166 */
241 if (mask & (ATTR_UID|ATTR_GID)) { 167 if (mask & (ATTR_UID|ATTR_GID)) {
242 /* 168 /*
@@ -251,23 +177,6 @@ xfs_setattr(
251 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; 177 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
252 178
253 /* 179 /*
254 * CAP_CHOWN overrides the following restrictions:
255 *
256 * If _POSIX_CHOWN_RESTRICTED is defined, this capability
257 * shall override the restriction that a process cannot
258 * change the user ID of a file it owns and the restriction
259 * that the group ID supplied to the chown() function
260 * shall be equal to either the group ID or one of the
261 * supplementary group IDs of the calling process.
262 */
263 if (restricted_chown &&
264 (iuid != uid || (igid != gid &&
265 !in_group_p((gid_t)gid))) &&
266 !capable(CAP_CHOWN)) {
267 code = XFS_ERROR(EPERM);
268 goto error_return;
269 }
270 /*
271 * Do a quota reservation only if uid/gid is actually 180 * Do a quota reservation only if uid/gid is actually
272 * going to change. 181 * going to change.
273 */ 182 */
@@ -304,36 +213,22 @@ xfs_setattr(
304 code = XFS_ERROR(EINVAL); 213 code = XFS_ERROR(EINVAL);
305 goto error_return; 214 goto error_return;
306 } 215 }
216
307 /* 217 /*
308 * Make sure that the dquots are attached to the inode. 218 * Make sure that the dquots are attached to the inode.
309 */ 219 */
310 if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) 220 code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
221 if (code)
311 goto error_return; 222 goto error_return;
312 }
313
314 /*
315 * Change file access or modified times.
316 */
317 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
318 if (!file_owner) {
319 if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
320 !capable(CAP_FOWNER)) {
321 code = XFS_ERROR(EPERM);
322 goto error_return;
323 }
324 }
325 }
326 223
327 /* 224 /*
328 * Now we can make the changes. Before we join the inode 225 * Now we can make the changes. Before we join the inode
329 * to the transaction, if ATTR_SIZE is set then take care of 226 * to the transaction, if ATTR_SIZE is set then take care of
330 * the part of the truncation that must be done without the 227 * the part of the truncation that must be done without the
331 * inode lock. This needs to be done before joining the inode 228 * inode lock. This needs to be done before joining the inode
332 * to the transaction, because the inode cannot be unlocked 229 * to the transaction, because the inode cannot be unlocked
333 * once it is a part of the transaction. 230 * once it is a part of the transaction.
334 */ 231 */
335 if (mask & ATTR_SIZE) {
336 code = 0;
337 if (iattr->ia_size > ip->i_size) { 232 if (iattr->ia_size > ip->i_size) {
338 /* 233 /*
339 * Do the first part of growing a file: zero any data 234 * Do the first part of growing a file: zero any data
@@ -366,7 +261,7 @@ xfs_setattr(
366 } 261 }
367 262
368 /* wait for all I/O to complete */ 263 /* wait for all I/O to complete */
369 vn_iowait(ip); 264 xfs_ioend_wait(ip);
370 265
371 if (!code) 266 if (!code)
372 code = xfs_itruncate_data(ip, iattr->ia_size); 267 code = xfs_itruncate_data(ip, iattr->ia_size);
@@ -388,17 +283,10 @@ xfs_setattr(
388 } 283 }
389 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 284 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
390 xfs_ilock(ip, XFS_ILOCK_EXCL); 285 xfs_ilock(ip, XFS_ILOCK_EXCL);
391 }
392 286
393 if (tp) {
394 xfs_trans_ijoin(tp, ip, lock_flags); 287 xfs_trans_ijoin(tp, ip, lock_flags);
395 xfs_trans_ihold(tp, ip); 288 xfs_trans_ihold(tp, ip);
396 }
397 289
398 /*
399 * Truncate file. Must have write permission and not be a directory.
400 */
401 if (mask & ATTR_SIZE) {
402 /* 290 /*
403 * Only change the c/mtime if we are changing the size 291 * Only change the c/mtime if we are changing the size
404 * or we are explicitly asked to change it. This handles 292 * or we are explicitly asked to change it. This handles
@@ -438,28 +326,13 @@ xfs_setattr(
438 */ 326 */
439 xfs_iflags_set(ip, XFS_ITRUNCATED); 327 xfs_iflags_set(ip, XFS_ITRUNCATED);
440 } 328 }
441 } 329 } else if (tp) {
442 330 xfs_trans_ijoin(tp, ip, lock_flags);
443 /* 331 xfs_trans_ihold(tp, ip);
444 * Change file access modes.
445 */
446 if (mask & ATTR_MODE) {
447 ip->i_d.di_mode &= S_IFMT;
448 ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
449
450 inode->i_mode &= S_IFMT;
451 inode->i_mode |= iattr->ia_mode & ~S_IFMT;
452
453 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
454 timeflags |= XFS_ICHGTIME_CHG;
455 } 332 }
456 333
457 /* 334 /*
458 * Change file ownership. Must be the owner or privileged. 335 * Change file ownership. Must be the owner or privileged.
459 * If the system was configured with the "restricted_chown"
460 * option, the owner is not permitted to give away the file,
461 * and can change the group id only to a group of which he
462 * or she is a member.
463 */ 336 */
464 if (mask & (ATTR_UID|ATTR_GID)) { 337 if (mask & (ATTR_UID|ATTR_GID)) {
465 /* 338 /*
@@ -503,6 +376,24 @@ xfs_setattr(
503 timeflags |= XFS_ICHGTIME_CHG; 376 timeflags |= XFS_ICHGTIME_CHG;
504 } 377 }
505 378
379 /*
380 * Change file access modes.
381 */
382 if (mask & ATTR_MODE) {
383 umode_t mode = iattr->ia_mode;
384
385 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
386 mode &= ~S_ISGID;
387
388 ip->i_d.di_mode &= S_IFMT;
389 ip->i_d.di_mode |= mode & ~S_IFMT;
390
391 inode->i_mode &= S_IFMT;
392 inode->i_mode |= mode & ~S_IFMT;
393
394 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
395 timeflags |= XFS_ICHGTIME_CHG;
396 }
506 397
507 /* 398 /*
508 * Change file access or modified times. 399 * Change file access or modified times.
@@ -713,7 +604,7 @@ xfs_fsync(
713 return XFS_ERROR(EIO); 604 return XFS_ERROR(EIO);
714 605
715 /* capture size updates in I/O completion before writing the inode. */ 606 /* capture size updates in I/O completion before writing the inode. */
716 error = filemap_fdatawait(VFS_I(ip)->i_mapping); 607 error = xfs_wait_on_pages(ip, 0, -1);
717 if (error) 608 if (error)
718 return XFS_ERROR(error); 609 return XFS_ERROR(error);
719 610
@@ -1029,6 +920,12 @@ xfs_inactive_symlink_rmt(
1029 goto error0; 920 goto error0;
1030 } 921 }
1031 /* 922 /*
923 * transaction commit worked ok so we can drop the extra ticket
924 * reference that we gained in xfs_trans_dup()
925 */
926 xfs_log_ticket_put(tp->t_ticket);
927
928 /*
1032 * Remove the memory for extent descriptions (just bookkeeping). 929 * Remove the memory for extent descriptions (just bookkeeping).
1033 */ 930 */
1034 if (ip->i_df.if_bytes) 931 if (ip->i_df.if_bytes)
@@ -1625,8 +1522,6 @@ xfs_create(
1625 xfs_trans_set_sync(tp); 1522 xfs_trans_set_sync(tp);
1626 } 1523 }
1627 1524
1628 dp->i_gen++;
1629
1630 /* 1525 /*
1631 * Attach the dquot(s) to the inodes and modify them incore. 1526 * Attach the dquot(s) to the inodes and modify them incore.
1632 * These ids of the inode couldn't have changed since the new 1527 * These ids of the inode couldn't have changed since the new
@@ -1993,13 +1888,6 @@ xfs_remove(
1993 } 1888 }
1994 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1889 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1995 1890
1996 /*
1997 * Bump the in memory generation count on the parent
1998 * directory so that other can know that it has changed.
1999 */
2000 dp->i_gen++;
2001 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2002
2003 if (is_dir) { 1891 if (is_dir) {
2004 /* 1892 /*
2005 * Drop the link from ip's "..". 1893 * Drop the link from ip's "..".
@@ -2009,7 +1897,7 @@ xfs_remove(
2009 goto out_bmap_cancel; 1897 goto out_bmap_cancel;
2010 1898
2011 /* 1899 /*
2012 * Drop the link from dp to ip. 1900 * Drop the "." link from ip to self.
2013 */ 1901 */
2014 error = xfs_droplink(tp, ip); 1902 error = xfs_droplink(tp, ip);
2015 if (error) 1903 if (error)
@@ -2017,14 +1905,14 @@ xfs_remove(
2017 } else { 1905 } else {
2018 /* 1906 /*
2019 * When removing a non-directory we need to log the parent 1907 * When removing a non-directory we need to log the parent
2020 * inode here for the i_gen update. For a directory this is 1908 * inode here. For a directory this is done implicitly
2021 * done implicitly by the xfs_droplink call for the ".." entry. 1909 * by the xfs_droplink call for the ".." entry.
2022 */ 1910 */
2023 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1911 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2024 } 1912 }
2025 1913
2026 /* 1914 /*
2027 * Drop the "." link from ip to self. 1915 * Drop the link from dp to ip.
2028 */ 1916 */
2029 error = xfs_droplink(tp, ip); 1917 error = xfs_droplink(tp, ip);
2030 if (error) 1918 if (error)
@@ -2178,7 +2066,6 @@ xfs_link(
2178 if (error) 2066 if (error)
2179 goto abort_return; 2067 goto abort_return;
2180 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2068 xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2181 tdp->i_gen++;
2182 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 2069 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2183 2070
2184 error = xfs_bumplink(tp, sip); 2071 error = xfs_bumplink(tp, sip);
@@ -2355,18 +2242,10 @@ xfs_mkdir(
2355 } 2242 }
2356 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2243 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2357 2244
2358 /*
2359 * Bump the in memory version number of the parent directory
2360 * so that other processes accessing it will recognize that
2361 * the directory has changed.
2362 */
2363 dp->i_gen++;
2364
2365 error = xfs_dir_init(tp, cdp, dp); 2245 error = xfs_dir_init(tp, cdp, dp);
2366 if (error) 2246 if (error)
2367 goto error2; 2247 goto error2;
2368 2248
2369 cdp->i_gen = 1;
2370 error = xfs_bumplink(tp, dp); 2249 error = xfs_bumplink(tp, dp);
2371 if (error) 2250 if (error)
2372 goto error2; 2251 goto error2;
@@ -2653,13 +2532,6 @@ xfs_symlink(
2653 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2532 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2654 2533
2655 /* 2534 /*
2656 * Bump the in memory version number of the parent directory
2657 * so that other processes accessing it will recognize that
2658 * the directory has changed.
2659 */
2660 dp->i_gen++;
2661
2662 /*
2663 * If this is a synchronous mount, make sure that the 2535 * If this is a synchronous mount, make sure that the
2664 * symlink transaction goes to disk before returning to 2536 * symlink transaction goes to disk before returning to
2665 * the user. 2537 * the user.
@@ -2809,7 +2681,7 @@ xfs_reclaim(
2809 return 0; 2681 return 0;
2810 } 2682 }
2811 2683
2812 vn_iowait(ip); 2684 xfs_ioend_wait(ip);
2813 2685
2814 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 2686 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2815 2687
@@ -2833,122 +2705,10 @@ xfs_reclaim(
2833 if (!ip->i_update_core && (ip->i_itemp == NULL)) { 2705 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2834 xfs_ilock(ip, XFS_ILOCK_EXCL); 2706 xfs_ilock(ip, XFS_ILOCK_EXCL);
2835 xfs_iflock(ip); 2707 xfs_iflock(ip);
2836 return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); 2708 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2837 } else { 2709 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2838 xfs_mount_t *mp = ip->i_mount;
2839
2840 /* Protect sync and unpin from us */
2841 XFS_MOUNT_ILOCK(mp);
2842 spin_lock(&ip->i_flags_lock);
2843 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2844 VFS_I(ip)->i_private = NULL;
2845 ip->i_vnode = NULL;
2846 spin_unlock(&ip->i_flags_lock);
2847 list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
2848 XFS_MOUNT_IUNLOCK(mp);
2849 }
2850 return 0;
2851}
2852
2853int
2854xfs_finish_reclaim(
2855 xfs_inode_t *ip,
2856 int locked,
2857 int sync_mode)
2858{
2859 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
2860 struct inode *vp = VFS_I(ip);
2861
2862 if (vp && VN_BAD(vp))
2863 goto reclaim;
2864
2865 /* The hash lock here protects a thread in xfs_iget_core from
2866 * racing with us on linking the inode back with a vnode.
2867 * Once we have the XFS_IRECLAIM flag set it will not touch
2868 * us.
2869 */
2870 write_lock(&pag->pag_ici_lock);
2871 spin_lock(&ip->i_flags_lock);
2872 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
2873 (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
2874 spin_unlock(&ip->i_flags_lock);
2875 write_unlock(&pag->pag_ici_lock);
2876 if (locked) {
2877 xfs_ifunlock(ip);
2878 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2879 }
2880 return 1;
2881 }
2882 __xfs_iflags_set(ip, XFS_IRECLAIM);
2883 spin_unlock(&ip->i_flags_lock);
2884 write_unlock(&pag->pag_ici_lock);
2885 xfs_put_perag(ip->i_mount, pag);
2886
2887 /*
2888 * If the inode is still dirty, then flush it out. If the inode
2889 * is not in the AIL, then it will be OK to flush it delwri as
2890 * long as xfs_iflush() does not keep any references to the inode.
2891 * We leave that decision up to xfs_iflush() since it has the
2892 * knowledge of whether it's OK to simply do a delwri flush of
2893 * the inode or whether we need to wait until the inode is
2894 * pulled from the AIL.
2895 * We get the flush lock regardless, though, just to make sure
2896 * we don't free it while it is being flushed.
2897 */
2898 if (!locked) {
2899 xfs_ilock(ip, XFS_ILOCK_EXCL);
2900 xfs_iflock(ip);
2901 } 2710 }
2902 2711 xfs_inode_set_reclaim_tag(ip);
2903 /*
2904 * In the case of a forced shutdown we rely on xfs_iflush() to
2905 * wait for the inode to be unpinned before returning an error.
2906 */
2907 if (xfs_iflush(ip, sync_mode) == 0) {
2908 /* synchronize with xfs_iflush_done */
2909 xfs_iflock(ip);
2910 xfs_ifunlock(ip);
2911 }
2912
2913 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2914
2915 reclaim:
2916 xfs_ireclaim(ip);
2917 return 0;
2918}
2919
2920int
2921xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
2922{
2923 int purged;
2924 xfs_inode_t *ip, *n;
2925 int done = 0;
2926
2927 while (!done) {
2928 purged = 0;
2929 XFS_MOUNT_ILOCK(mp);
2930 list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
2931 if (noblock) {
2932 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
2933 continue;
2934 if (xfs_ipincount(ip) ||
2935 !xfs_iflock_nowait(ip)) {
2936 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2937 continue;
2938 }
2939 }
2940 XFS_MOUNT_IUNLOCK(mp);
2941 if (xfs_finish_reclaim(ip, noblock,
2942 XFS_IFLUSH_DELWRI_ELSE_ASYNC))
2943 delay(1);
2944 purged = 1;
2945 break;
2946 }
2947
2948 done = !purged;
2949 }
2950
2951 XFS_MOUNT_IUNLOCK(mp);
2952 return 0; 2712 return 0;
2953} 2713}
2954 2714
@@ -3197,6 +2957,8 @@ xfs_zero_remaining_bytes(
3197 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, 2957 bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
3198 XFS_IS_REALTIME_INODE(ip) ? 2958 XFS_IS_REALTIME_INODE(ip) ?
3199 mp->m_rtdev_targp : mp->m_ddev_targp); 2959 mp->m_rtdev_targp : mp->m_ddev_targp);
2960 if (!bp)
2961 return XFS_ERROR(ENOMEM);
3200 2962
3201 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 2963 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
3202 offset_fsb = XFS_B_TO_FSBT(mp, offset); 2964 offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -3312,7 +3074,8 @@ xfs_free_file_space(
3312 need_iolock = 0; 3074 need_iolock = 0;
3313 if (need_iolock) { 3075 if (need_iolock) {
3314 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3076 xfs_ilock(ip, XFS_IOLOCK_EXCL);
3315 vn_iowait(ip); /* wait for the completion of any pending DIOs */ 3077 /* wait for the completion of any pending DIOs */
3078 xfs_ioend_wait(ip);
3316 } 3079 }
3317 3080
3318 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 3081 rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
@@ -3474,7 +3237,6 @@ xfs_change_file_space(
3474 int cmd, 3237 int cmd,
3475 xfs_flock64_t *bf, 3238 xfs_flock64_t *bf,
3476 xfs_off_t offset, 3239 xfs_off_t offset,
3477 cred_t *credp,
3478 int attr_flags) 3240 int attr_flags)
3479{ 3241{
3480 xfs_mount_t *mp = ip->i_mount; 3242 xfs_mount_t *mp = ip->i_mount;
@@ -3562,7 +3324,7 @@ xfs_change_file_space(
3562 iattr.ia_valid = ATTR_SIZE; 3324 iattr.ia_valid = ATTR_SIZE;
3563 iattr.ia_size = startoffset; 3325 iattr.ia_size = startoffset;
3564 3326
3565 error = xfs_setattr(ip, &iattr, attr_flags, credp); 3327 error = xfs_setattr(ip, &iattr, attr_flags);
3566 3328
3567 if (error) 3329 if (error)
3568 return error; 3330 return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 7b0c2ab88333..76df328c61b4 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -14,9 +14,7 @@ struct xfs_inode;
14struct xfs_iomap; 14struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
19 cred_t *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ 18#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 19#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 20#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
@@ -44,8 +42,7 @@ int xfs_inode_flush(struct xfs_inode *ip, int flags);
44int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 42int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
45int xfs_reclaim(struct xfs_inode *ip); 43int xfs_reclaim(struct xfs_inode *ip);
46int xfs_change_file_space(struct xfs_inode *ip, int cmd, 44int xfs_change_file_space(struct xfs_inode *ip, int cmd,
47 xfs_flock64_t *bf, xfs_off_t offset, 45 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
48 cred_t *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 46int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 47 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name, struct xfs_inode *target_ip); 48 struct xfs_name *target_name, struct xfs_inode *target_ip);
@@ -56,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 53int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
57int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, 54int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
58 int flags, struct attrlist_cursor_kern *cursor); 55 int flags, struct attrlist_cursor_kern *cursor);
59int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
60 int ioflags, unsigned int cmd, void __user *arg);
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 56ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 57 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 58 loff_t *offset, int ioflags);
@@ -78,5 +73,6 @@ int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first,
78 xfs_off_t last, int fiopt); 73 xfs_off_t last, int fiopt);
79int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, 74int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
80 xfs_off_t last, uint64_t flags, int fiopt); 75 xfs_off_t last, uint64_t flags, int fiopt);
76int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
81 77
82#endif /* _XFS_VNODEOPS_H */ 78#endif /* _XFS_VNODEOPS_H */