aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-05-03 03:14:09 -0400
committerDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-05-03 03:14:09 -0400
commit27b030d58c8e72fc7a95187a791bd9406e350f02 (patch)
treeab3bab7f39a5ce5bab65578a7e08fa4dfdeb198c /fs
parent79d20b14a0d651f15b0ef9a22b6cf12d284a6d38 (diff)
parent6628465e33ca694bd8fd5c3cf4eb7ff9177bc694 (diff)
Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c3
-rw-r--r--fs/aio.c59
-rw-r--r--fs/autofs4/autofs_i.h15
-rw-r--r--fs/autofs4/expire.c16
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/bio.c2
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/TODO12
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/ext3/inode.c144
-rw-r--r--fs/fcntl.c3
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/hfs/mdb.c5
-rw-r--r--fs/hfs/super.c8
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/jffs2/file.c3
-rw-r--r--fs/jfs/inode.c35
-rw-r--r--fs/jfs/jfs_dmap.c12
-rw-r--r--fs/jfs/jfs_dtree.c6
-rw-r--r--fs/jfs/jfs_imap.c84
-rw-r--r--fs/jfs/jfs_incore.h1
-rw-r--r--fs/jfs/jfs_logmgr.c150
-rw-r--r--fs/jfs/jfs_logmgr.h9
-rw-r--r--fs/jfs/jfs_metapage.c908
-rw-r--r--fs/jfs/jfs_metapage.h80
-rw-r--r--fs/jfs/jfs_mount.c5
-rw-r--r--fs/jfs/jfs_txnmgr.c166
-rw-r--r--fs/jfs/jfs_umount.c16
-rw-r--r--fs/jfs/jfs_xtree.c61
-rw-r--r--fs/jfs/resize.c3
-rw-r--r--fs/jfs/super.c37
-rw-r--r--fs/mpage.c94
-rw-r--r--fs/proc/base.c32
-rw-r--r--fs/reiserfs/bitmap.c11
-rw-r--r--fs/reiserfs/dir.c8
-rw-r--r--fs/reiserfs/file.c13
-rw-r--r--fs/reiserfs/inode.c14
-rw-r--r--fs/reiserfs/item_ops.c5
-rw-r--r--fs/reiserfs/journal.c14
-rw-r--r--fs/reiserfs/objectid.c18
-rw-r--r--fs/reiserfs/procfs.c4
-rw-r--r--fs/reiserfs/stree.c51
-rw-r--r--fs/reiserfs/super.c35
-rw-r--r--fs/seq_file.c9
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/inode.c4
50 files changed, 1335 insertions, 897 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6b6bb7c8abf..23c12512802 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -131,8 +131,7 @@ static int afs_file_readpage(struct file *file, struct page *page)
131 131
132 vnode = AFS_FS_I(inode); 132 vnode = AFS_FS_I(inode);
133 133
134 if (!PageLocked(page)) 134 BUG_ON(!PageLocked(page));
135 PAGE_BUG(page);
136 135
137 ret = -ESTALE; 136 ret = -ESTALE;
138 if (vnode->flags & AFS_VNODE_DELETED) 137 if (vnode->flags & AFS_VNODE_DELETED)
diff --git a/fs/aio.c b/fs/aio.c
index a82214d2e46..7afa222f680 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,9 +40,6 @@
40#define dprintk(x...) do { ; } while (0) 40#define dprintk(x...) do { ; } while (0)
41#endif 41#endif
42 42
43static long aio_run = 0; /* for testing only */
44static long aio_wakeups = 0; /* for testing only */
45
46/*------ sysctl variables----*/ 43/*------ sysctl variables----*/
47atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */ 44atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */
48unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ 45unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
@@ -405,7 +402,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
405 req->ki_ctx = ctx; 402 req->ki_ctx = ctx;
406 req->ki_cancel = NULL; 403 req->ki_cancel = NULL;
407 req->ki_retry = NULL; 404 req->ki_retry = NULL;
408 req->ki_obj.user = NULL;
409 req->ki_dtor = NULL; 405 req->ki_dtor = NULL;
410 req->private = NULL; 406 req->private = NULL;
411 INIT_LIST_HEAD(&req->ki_run_list); 407 INIT_LIST_HEAD(&req->ki_run_list);
@@ -451,11 +447,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
451{ 447{
452 if (req->ki_dtor) 448 if (req->ki_dtor)
453 req->ki_dtor(req); 449 req->ki_dtor(req);
454 req->ki_ctx = NULL;
455 req->ki_filp = NULL;
456 req->ki_obj.user = NULL;
457 req->ki_dtor = NULL;
458 req->private = NULL;
459 kmem_cache_free(kiocb_cachep, req); 450 kmem_cache_free(kiocb_cachep, req);
460 ctx->reqs_active--; 451 ctx->reqs_active--;
461 452
@@ -623,7 +614,6 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb)
623 if (list_empty(&iocb->ki_run_list)) { 614 if (list_empty(&iocb->ki_run_list)) {
624 list_add_tail(&iocb->ki_run_list, 615 list_add_tail(&iocb->ki_run_list,
625 &ctx->run_list); 616 &ctx->run_list);
626 iocb->ki_queued++;
627 return 1; 617 return 1;
628 } 618 }
629 return 0; 619 return 0;
@@ -664,10 +654,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
664 } 654 }
665 655
666 if (!(iocb->ki_retried & 0xff)) { 656 if (!(iocb->ki_retried & 0xff)) {
667 pr_debug("%ld retry: %d of %d (kick %ld, Q %ld run %ld, wake %ld)\n", 657 pr_debug("%ld retry: %d of %d\n", iocb->ki_retried,
668 iocb->ki_retried, 658 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
669 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes,
670 iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups);
671 } 659 }
672 660
673 if (!(retry = iocb->ki_retry)) { 661 if (!(retry = iocb->ki_retry)) {
@@ -774,7 +762,6 @@ out:
774static int __aio_run_iocbs(struct kioctx *ctx) 762static int __aio_run_iocbs(struct kioctx *ctx)
775{ 763{
776 struct kiocb *iocb; 764 struct kiocb *iocb;
777 int count = 0;
778 LIST_HEAD(run_list); 765 LIST_HEAD(run_list);
779 766
780 list_splice_init(&ctx->run_list, &run_list); 767 list_splice_init(&ctx->run_list, &run_list);
@@ -789,9 +776,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
789 aio_run_iocb(iocb); 776 aio_run_iocb(iocb);
790 if (__aio_put_req(ctx, iocb)) /* drop extra ref */ 777 if (__aio_put_req(ctx, iocb)) /* drop extra ref */
791 put_ioctx(ctx); 778 put_ioctx(ctx);
792 count++;
793 } 779 }
794 aio_run++;
795 if (!list_empty(&ctx->run_list)) 780 if (!list_empty(&ctx->run_list))
796 return 1; 781 return 1;
797 return 0; 782 return 0;
@@ -890,10 +875,8 @@ static void queue_kicked_iocb(struct kiocb *iocb)
890 spin_lock_irqsave(&ctx->ctx_lock, flags); 875 spin_lock_irqsave(&ctx->ctx_lock, flags);
891 run = __queue_kicked_iocb(iocb); 876 run = __queue_kicked_iocb(iocb);
892 spin_unlock_irqrestore(&ctx->ctx_lock, flags); 877 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
893 if (run) { 878 if (run)
894 aio_queue_work(ctx); 879 aio_queue_work(ctx);
895 aio_wakeups++;
896 }
897} 880}
898 881
899/* 882/*
@@ -913,7 +896,6 @@ void fastcall kick_iocb(struct kiocb *iocb)
913 return; 896 return;
914 } 897 }
915 898
916 iocb->ki_kicked++;
917 /* If its already kicked we shouldn't queue it again */ 899 /* If its already kicked we shouldn't queue it again */
918 if (!kiocbTryKick(iocb)) { 900 if (!kiocbTryKick(iocb)) {
919 queue_kicked_iocb(iocb); 901 queue_kicked_iocb(iocb);
@@ -984,7 +966,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
984 966
985 tail = info->tail; 967 tail = info->tail;
986 event = aio_ring_event(info, tail, KM_IRQ0); 968 event = aio_ring_event(info, tail, KM_IRQ0);
987 tail = (tail + 1) % info->nr; 969 if (++tail >= info->nr)
970 tail = 0;
988 971
989 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 972 event->obj = (u64)(unsigned long)iocb->ki_obj.user;
990 event->data = iocb->ki_user_data; 973 event->data = iocb->ki_user_data;
@@ -1008,10 +991,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
1008 991
1009 pr_debug("added to ring %p at [%lu]\n", iocb, tail); 992 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
1010 993
1011 pr_debug("%ld retries: %d of %d (kicked %ld, Q %ld run %ld wake %ld)\n", 994 pr_debug("%ld retries: %d of %d\n", iocb->ki_retried,
1012 iocb->ki_retried, 995 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
1013 iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes,
1014 iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups);
1015put_rq: 996put_rq:
1016 /* everything turned out well, dispose of the aiocb. */ 997 /* everything turned out well, dispose of the aiocb. */
1017 ret = __aio_put_req(ctx, iocb); 998 ret = __aio_put_req(ctx, iocb);
@@ -1119,7 +1100,6 @@ static int read_events(struct kioctx *ctx,
1119 int i = 0; 1100 int i = 0;
1120 struct io_event ent; 1101 struct io_event ent;
1121 struct aio_timeout to; 1102 struct aio_timeout to;
1122 int event_loop = 0; /* testing only */
1123 int retry = 0; 1103 int retry = 0;
1124 1104
1125 /* needed to zero any padding within an entry (there shouldn't be 1105 /* needed to zero any padding within an entry (there shouldn't be
@@ -1186,7 +1166,6 @@ retry:
1186 if (to.timed_out) /* Only check after read evt */ 1166 if (to.timed_out) /* Only check after read evt */
1187 break; 1167 break;
1188 schedule(); 1168 schedule();
1189 event_loop++;
1190 if (signal_pending(tsk)) { 1169 if (signal_pending(tsk)) {
1191 ret = -EINTR; 1170 ret = -EINTR;
1192 break; 1171 break;
@@ -1214,9 +1193,6 @@ retry:
1214 if (timeout) 1193 if (timeout)
1215 clear_timeout(&to); 1194 clear_timeout(&to);
1216out: 1195out:
1217 pr_debug("event loop executed %d times\n", event_loop);
1218 pr_debug("aio_run %ld\n", aio_run);
1219 pr_debug("aio_wakeups %ld\n", aio_wakeups);
1220 return i ? i : ret; 1196 return i ? i : ret;
1221} 1197}
1222 1198
@@ -1515,8 +1491,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1515 } 1491 }
1516 1492
1517 req->ki_filp = file; 1493 req->ki_filp = file;
1518 iocb->aio_key = req->ki_key; 1494 ret = put_user(req->ki_key, &user_iocb->aio_key);
1519 ret = put_user(iocb->aio_key, &user_iocb->aio_key);
1520 if (unlikely(ret)) { 1495 if (unlikely(ret)) {
1521 dprintk("EFAULT: aio_key\n"); 1496 dprintk("EFAULT: aio_key\n");
1522 goto out_put_req; 1497 goto out_put_req;
@@ -1531,13 +1506,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1531 req->ki_opcode = iocb->aio_lio_opcode; 1506 req->ki_opcode = iocb->aio_lio_opcode;
1532 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); 1507 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
1533 INIT_LIST_HEAD(&req->ki_wait.task_list); 1508 INIT_LIST_HEAD(&req->ki_wait.task_list);
1534 req->ki_run_list.next = req->ki_run_list.prev = NULL;
1535 req->ki_retry = NULL;
1536 req->ki_retried = 0; 1509 req->ki_retried = 0;
1537 req->ki_kicked = 0;
1538 req->ki_queued = 0;
1539 aio_run = 0;
1540 aio_wakeups = 0;
1541 1510
1542 ret = aio_setup_iocb(req); 1511 ret = aio_setup_iocb(req);
1543 1512
@@ -1545,10 +1514,14 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1545 goto out_put_req; 1514 goto out_put_req;
1546 1515
1547 spin_lock_irq(&ctx->ctx_lock); 1516 spin_lock_irq(&ctx->ctx_lock);
1548 list_add_tail(&req->ki_run_list, &ctx->run_list); 1517 if (likely(list_empty(&ctx->run_list))) {
1549 /* drain the run list */ 1518 aio_run_iocb(req);
1550 while (__aio_run_iocbs(ctx)) 1519 } else {
1551 ; 1520 list_add_tail(&req->ki_run_list, &ctx->run_list);
1521 /* drain the run list */
1522 while (__aio_run_iocbs(ctx))
1523 ;
1524 }
1552 spin_unlock_irq(&ctx->ctx_lock); 1525 spin_unlock_irq(&ctx->ctx_lock);
1553 aio_put_req(req); /* drop extra ref to req */ 1526 aio_put_req(req); /* drop extra ref to req */
1554 return 0; 1527 return 0;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f5a52c87172..c7b2b889018 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -84,6 +84,7 @@ struct autofs_wait_queue {
84 char *name; 84 char *name;
85 /* This is for status reporting upon return */ 85 /* This is for status reporting upon return */
86 int status; 86 int status;
87 atomic_t notified;
87 atomic_t wait_ctr; 88 atomic_t wait_ctr;
88}; 89};
89 90
@@ -101,6 +102,7 @@ struct autofs_sb_info {
101 int needs_reghost; 102 int needs_reghost;
102 struct super_block *sb; 103 struct super_block *sb;
103 struct semaphore wq_sem; 104 struct semaphore wq_sem;
105 spinlock_t fs_lock;
104 struct autofs_wait_queue *queues; /* Wait queue pointer */ 106 struct autofs_wait_queue *queues; /* Wait queue pointer */
105}; 107};
106 108
@@ -126,9 +128,18 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
126static inline int autofs4_ispending(struct dentry *dentry) 128static inline int autofs4_ispending(struct dentry *dentry)
127{ 129{
128 struct autofs_info *inf = autofs4_dentry_ino(dentry); 130 struct autofs_info *inf = autofs4_dentry_ino(dentry);
131 int pending = 0;
129 132
130 return (dentry->d_flags & DCACHE_AUTOFS_PENDING) || 133 if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
131 (inf != NULL && inf->flags & AUTOFS_INF_EXPIRING); 134 return 1;
135
136 if (inf) {
137 spin_lock(&inf->sbi->fs_lock);
138 pending = inf->flags & AUTOFS_INF_EXPIRING;
139 spin_unlock(&inf->sbi->fs_lock);
140 }
141
142 return pending;
132} 143}
133 144
134static inline void autofs4_copy_atime(struct file *src, struct file *dst) 145static inline void autofs4_copy_atime(struct file *src, struct file *dst)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 31540a6404d..500425e24fb 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -99,6 +99,10 @@ static int autofs4_check_tree(struct vfsmount *mnt,
99 if (!autofs4_can_expire(top, timeout, do_now)) 99 if (!autofs4_can_expire(top, timeout, do_now))
100 return 0; 100 return 0;
101 101
102 /* Is someone visiting anywhere in the tree ? */
103 if (may_umount_tree(mnt))
104 return 0;
105
102 spin_lock(&dcache_lock); 106 spin_lock(&dcache_lock);
103repeat: 107repeat:
104 next = this_parent->d_subdirs.next; 108 next = this_parent->d_subdirs.next;
@@ -270,10 +274,18 @@ static struct dentry *autofs4_expire(struct super_block *sb,
270 274
271 /* Case 2: tree mount, expire iff entire tree is not busy */ 275 /* Case 2: tree mount, expire iff entire tree is not busy */
272 if (!exp_leaves) { 276 if (!exp_leaves) {
277 /* Lock the tree as we must expire as a whole */
278 spin_lock(&sbi->fs_lock);
273 if (autofs4_check_tree(mnt, dentry, timeout, do_now)) { 279 if (autofs4_check_tree(mnt, dentry, timeout, do_now)) {
274 expired = dentry; 280 struct autofs_info *inf = autofs4_dentry_ino(dentry);
275 break; 281
282 /* Set this flag early to catch sys_chdir and the like */
283 inf->flags |= AUTOFS_INF_EXPIRING;
284 spin_unlock(&sbi->fs_lock);
285 expired = dentry;
286 break;
276 } 287 }
288 spin_unlock(&sbi->fs_lock);
277 /* Case 3: direct mount, expire individual leaves */ 289 /* Case 3: direct mount, expire individual leaves */
278 } else { 290 } else {
279 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 291 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a5256074662..4bb14cc6804 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -206,6 +206,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
206 sbi->version = 0; 206 sbi->version = 0;
207 sbi->sub_version = 0; 207 sbi->sub_version = 0;
208 init_MUTEX(&sbi->wq_sem); 208 init_MUTEX(&sbi->wq_sem);
209 spin_lock_init(&sbi->fs_lock);
209 sbi->queues = NULL; 210 sbi->queues = NULL;
210 s->s_blocksize = 1024; 211 s->s_blocksize = 1024;
211 s->s_blocksize_bits = 10; 212 s->s_blocksize_bits = 10;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1ab24a662e0..5a40d36e5a5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -210,17 +210,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
210 wq->len = len; 210 wq->len = len;
211 wq->status = -EINTR; /* Status return if interrupted */ 211 wq->status = -EINTR; /* Status return if interrupted */
212 atomic_set(&wq->wait_ctr, 2); 212 atomic_set(&wq->wait_ctr, 2);
213 atomic_set(&wq->notified, 1);
213 up(&sbi->wq_sem); 214 up(&sbi->wq_sem);
214
215 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d",
216 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
217 /* autofs4_notify_daemon() may block */
218 if (notify != NFY_NONE) {
219 autofs4_notify_daemon(sbi,wq,
220 notify == NFY_MOUNT ?
221 autofs_ptype_missing :
222 autofs_ptype_expire_multi);
223 }
224 } else { 215 } else {
225 atomic_inc(&wq->wait_ctr); 216 atomic_inc(&wq->wait_ctr);
226 up(&sbi->wq_sem); 217 up(&sbi->wq_sem);
@@ -229,6 +220,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
229 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 220 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
230 } 221 }
231 222
223 if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
224 int type = (notify == NFY_MOUNT ?
225 autofs_ptype_missing : autofs_ptype_expire_multi);
226
227 DPRINTK(("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
228 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify));
229
230 /* autofs4_notify_daemon() may block */
231 autofs4_notify_daemon(sbi, wq, type);
232 }
233
232 /* wq->name is NULL if and only if the lock is already released */ 234 /* wq->name is NULL if and only if the lock is already released */
233 235
234 if ( sbi->catatonic ) { 236 if ( sbi->catatonic ) {
diff --git a/fs/bio.c b/fs/bio.c
index e5349e83456..3a1472acc36 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -140,6 +140,7 @@ inline void bio_init(struct bio *bio)
140 * bio_alloc_bioset - allocate a bio for I/O 140 * bio_alloc_bioset - allocate a bio for I/O
141 * @gfp_mask: the GFP_ mask given to the slab allocator 141 * @gfp_mask: the GFP_ mask given to the slab allocator
142 * @nr_iovecs: number of iovecs to pre-allocate 142 * @nr_iovecs: number of iovecs to pre-allocate
143 * @bs: the bio_set to allocate from
143 * 144 *
144 * Description: 145 * Description:
145 * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. 146 * bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
@@ -629,6 +630,7 @@ out:
629 630
630/** 631/**
631 * bio_map_user - map user address into bio 632 * bio_map_user - map user address into bio
633 * @q: the request_queue_t for the bio
632 * @bdev: destination block device 634 * @bdev: destination block device
633 * @uaddr: start of user address 635 * @uaddr: start of user address
634 * @len: length in bytes 636 * @len: length in bytes
diff --git a/fs/buffer.c b/fs/buffer.c
index 3b12cf947ab..5f525b3c6d9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -218,7 +218,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
218 sb = get_super(bdev); 218 sb = get_super(bdev);
219 if (sb && !(sb->s_flags & MS_RDONLY)) { 219 if (sb && !(sb->s_flags & MS_RDONLY)) {
220 sb->s_frozen = SB_FREEZE_WRITE; 220 sb->s_frozen = SB_FREEZE_WRITE;
221 wmb(); 221 smp_wmb();
222 222
223 sync_inodes_sb(sb, 0); 223 sync_inodes_sb(sb, 0);
224 DQUOT_SYNC(sb); 224 DQUOT_SYNC(sb);
@@ -235,7 +235,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
235 sync_inodes_sb(sb, 1); 235 sync_inodes_sb(sb, 1);
236 236
237 sb->s_frozen = SB_FREEZE_TRANS; 237 sb->s_frozen = SB_FREEZE_TRANS;
238 wmb(); 238 smp_wmb();
239 239
240 sync_blockdev(sb->s_bdev); 240 sync_blockdev(sb->s_bdev);
241 241
@@ -263,7 +263,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
263 if (sb->s_op->unlockfs) 263 if (sb->s_op->unlockfs)
264 sb->s_op->unlockfs(sb); 264 sb->s_op->unlockfs(sb);
265 sb->s_frozen = SB_UNFROZEN; 265 sb->s_frozen = SB_UNFROZEN;
266 wmb(); 266 smp_wmb();
267 wake_up(&sb->s_wait_unfrozen); 267 wake_up(&sb->s_wait_unfrozen);
268 drop_super(sb); 268 drop_super(sb);
269 } 269 }
@@ -774,15 +774,14 @@ repeat:
774/** 774/**
775 * sync_mapping_buffers - write out and wait upon a mapping's "associated" 775 * sync_mapping_buffers - write out and wait upon a mapping's "associated"
776 * buffers 776 * buffers
777 * @buffer_mapping - the mapping which backs the buffers' data 777 * @mapping: the mapping which wants those buffers written
778 * @mapping - the mapping which wants those buffers written
779 * 778 *
780 * Starts I/O against the buffers at mapping->private_list, and waits upon 779 * Starts I/O against the buffers at mapping->private_list, and waits upon
781 * that I/O. 780 * that I/O.
782 * 781 *
783 * Basically, this is a convenience function for fsync(). @buffer_mapping is 782 * Basically, this is a convenience function for fsync().
784 * the blockdev which "owns" the buffers and @mapping is a file or directory 783 * @mapping is a file or directory which needs those buffers to be written for
785 * which needs those buffers to be written for a successful fsync(). 784 * a successful fsync().
786 */ 785 */
787int sync_mapping_buffers(struct address_space *mapping) 786int sync_mapping_buffers(struct address_space *mapping)
788{ 787{
@@ -1263,6 +1262,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1263 1262
1264/** 1263/**
1265 * mark_buffer_dirty - mark a buffer_head as needing writeout 1264 * mark_buffer_dirty - mark a buffer_head as needing writeout
1265 * @bh: the buffer_head to mark dirty
1266 * 1266 *
1267 * mark_buffer_dirty() will set the dirty bit against the buffer, then set its 1267 * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
1268 * backing page dirty, then tag the page as dirty in its address_space's radix 1268 * backing page dirty, then tag the page as dirty in its address_space's radix
@@ -1501,6 +1501,7 @@ EXPORT_SYMBOL(__breadahead);
1501 1501
1502/** 1502/**
1503 * __bread() - reads a specified block and returns the bh 1503 * __bread() - reads a specified block and returns the bh
1504 * @bdev: the block_device to read from
1504 * @block: number of block 1505 * @block: number of block
1505 * @size: size (in bytes) to read 1506 * @size: size (in bytes) to read
1506 * 1507 *
@@ -2078,8 +2079,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2078 int nr, i; 2079 int nr, i;
2079 int fully_mapped = 1; 2080 int fully_mapped = 1;
2080 2081
2081 if (!PageLocked(page)) 2082 BUG_ON(!PageLocked(page));
2082 PAGE_BUG(page);
2083 blocksize = 1 << inode->i_blkbits; 2083 blocksize = 1 << inode->i_blkbits;
2084 if (!page_has_buffers(page)) 2084 if (!page_has_buffers(page))
2085 create_empty_buffers(page, blocksize, 0); 2085 create_empty_buffers(page, blocksize, 0);
@@ -2917,7 +2917,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
2917 2917
2918 bh = head; 2918 bh = head;
2919 do { 2919 do {
2920 if (buffer_write_io_error(bh)) 2920 if (buffer_write_io_error(bh) && page->mapping)
2921 set_bit(AS_EIO, &page->mapping->flags); 2921 set_bit(AS_EIO, &page->mapping->flags);
2922 if (buffer_busy(bh)) 2922 if (buffer_busy(bh))
2923 goto failed; 2923 goto failed;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 4d2404305ab..95483baab70 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,9 @@ Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
4Do not oops if root user kills cifs oplock kernel thread or 4Do not oops if root user kills cifs oplock kernel thread or
5kills the cifsd thread (NB: killing the cifs kernel threads is not 5kills the cifsd thread (NB: killing the cifs kernel threads is not
6recommended, unmount and rmmod cifs will kill them when they are 6recommended, unmount and rmmod cifs will kill them when they are
7no longer needed). 7no longer needed). Fix readdir to ASCII servers (ie older servers
8which do not support Unicode) and also require asterik.
9
8 10
9Version 1.33 11Version 1.33
10------------ 12------------
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 1e8490ed694..8cc881694e2 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -1,4 +1,4 @@
1version 1.32 April 3, 2005 1version 1.34 April 29, 2005
2 2
3A Partial List of Missing Features 3A Partial List of Missing Features
4================================== 4==================================
@@ -70,7 +70,15 @@ r) Implement O_DIRECT flag on open (already supported on mount)
70s) Allow remapping of last remaining character (\) to +0xF000 which 70s) Allow remapping of last remaining character (\) to +0xF000 which
71(this character is valid for POSIX but not for Windows) 71(this character is valid for POSIX but not for Windows)
72 72
73KNOWN BUGS (updated April 3, 2005) 73t) Create UID mapping facility so server UIDs can be mapped on a per
74mount or a per server basis to client UIDs or nobody if no mapping
75exists. This is helpful when Unix extensions are negotiated to
76allow better permission checking when UIDs differ on the server
77and client. Add new protocol request to the CIFS protocol
78standard for asking the server for the corresponding name of a
79particular uid.
80
81KNOWN BUGS (updated April 29, 2005)
74==================================== 82====================================
75See http://bugzilla.samba.org - search on product "CifsVFS" for 83See http://bugzilla.samba.org - search on product "CifsVFS" for
76current bug list. 84current bug list.
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b004fef0a42..741ff0c69f3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2451,12 +2451,14 @@ findFirstRetry:
2451 name_len += 2; 2451 name_len += 2;
2452 } else { /* BB add check for overrun of SMB buf BB */ 2452 } else { /* BB add check for overrun of SMB buf BB */
2453 name_len = strnlen(searchName, PATH_MAX); 2453 name_len = strnlen(searchName, PATH_MAX);
2454 name_len++; /* trailing null */
2455/* BB fix here and in unicode clause above ie 2454/* BB fix here and in unicode clause above ie
2456 if(name_len > buffersize-header) 2455 if(name_len > buffersize-header)
2457 free buffer exit; BB */ 2456 free buffer exit; BB */
2458 strncpy(pSMB->FileName, searchName, name_len); 2457 strncpy(pSMB->FileName, searchName, name_len);
2459 pSMB->FileName[name_len] = 0; /* just in case */ 2458 pSMB->FileName[name_len] = '\\';
2459 pSMB->FileName[name_len+1] = '*';
2460 pSMB->FileName[name_len+2] = 0;
2461 name_len += 3;
2460 } 2462 }
2461 2463
2462 params = 12 + name_len /* includes null */ ; 2464 params = 12 + name_len /* includes null */ ;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 040eb288bb1..ea5888688f9 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
455 * @goal: place to store the result. 455 * @goal: place to store the result.
456 * 456 *
457 * Normally this function find the prefered place for block allocation, 457 * Normally this function find the prefered place for block allocation,
458 * stores it in *@goal and returns zero. If the branch had been changed 458 * stores it in *@goal and returns zero.
459 * under us we return -EAGAIN.
460 */ 459 */
461 460
462static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], 461static unsigned long ext3_find_goal(struct inode *inode, long block,
463 Indirect *partial, unsigned long *goal) 462 Indirect chain[4], Indirect *partial)
464{ 463{
465 struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info; 464 struct ext3_block_alloc_info *block_i = EXT3_I(inode)->i_block_alloc_info;
466 465
@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
470 */ 469 */
471 if (block_i && (block == block_i->last_alloc_logical_block + 1) 470 if (block_i && (block == block_i->last_alloc_logical_block + 1)
472 && (block_i->last_alloc_physical_block != 0)) { 471 && (block_i->last_alloc_physical_block != 0)) {
473 *goal = block_i->last_alloc_physical_block + 1; 472 return block_i->last_alloc_physical_block + 1;
474 return 0;
475 } 473 }
476 474
477 if (verify_chain(chain, partial)) { 475 return ext3_find_near(inode, partial);
478 *goal = ext3_find_near(inode, partial);
479 return 0;
480 }
481 return -EAGAIN;
482} 476}
483 477
484/** 478/**
@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
582 * @where: location of missing link 576 * @where: location of missing link
583 * @num: number of blocks we are adding 577 * @num: number of blocks we are adding
584 * 578 *
585 * This function verifies that chain (up to the missing link) had not 579 * This function fills the missing link and does all housekeeping needed in
586 * changed, fills the missing link and does all housekeeping needed in
587 * inode (->i_blocks, etc.). In case of success we end up with the full 580 * inode (->i_blocks, etc.). In case of success we end up with the full
588 * chain to new block and return 0. Otherwise (== chain had been changed) 581 * chain to new block and return 0.
589 * we free the new blocks (forgetting their buffer_heads, indeed) and
590 * return -EAGAIN.
591 */ 582 */
592 583
593static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block, 584static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
608 if (err) 599 if (err)
609 goto err_out; 600 goto err_out;
610 } 601 }
611 /* Verify that place we are splicing to is still there and vacant */
612
613 if (!verify_chain(chain, where-1) || *where->p)
614 /* Writer: end */
615 goto changed;
616
617 /* That's it */ 602 /* That's it */
618 603
619 *where->p = where->key; 604 *where->p = where->key;
@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
657 } 642 }
658 return err; 643 return err;
659 644
660changed:
661 /*
662 * AKPM: if where[i].bh isn't part of the current updating
663 * transaction then we explode nastily. Test this code path.
664 */
665 jbd_debug(1, "the chain changed: try again\n");
666 err = -EAGAIN;
667
668err_out: 645err_out:
669 for (i = 1; i < num; i++) { 646 for (i = 1; i < num; i++) {
670 BUFFER_TRACE(where[i].bh, "call journal_forget"); 647 BUFFER_TRACE(where[i].bh, "call journal_forget");
671 ext3_journal_forget(handle, where[i].bh); 648 ext3_journal_forget(handle, where[i].bh);
672 } 649 }
673 /* For the normal collision cleanup case, we free up the blocks.
674 * On genuine filesystem errors we don't even think about doing
675 * that. */
676 if (err == -EAGAIN)
677 for (i = 0; i < num; i++)
678 ext3_free_blocks(handle, inode,
679 le32_to_cpu(where[i].key), 1);
680 return err; 650 return err;
681} 651}
682 652
@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
708 unsigned long goal; 678 unsigned long goal;
709 int left; 679 int left;
710 int boundary = 0; 680 int boundary = 0;
711 int depth = ext3_block_to_path(inode, iblock, offsets, &boundary); 681 const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
712 struct ext3_inode_info *ei = EXT3_I(inode); 682 struct ext3_inode_info *ei = EXT3_I(inode);
713 683
714 J_ASSERT(handle != NULL || create == 0); 684 J_ASSERT(handle != NULL || create == 0);
@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
716 if (depth == 0) 686 if (depth == 0)
717 goto out; 687 goto out;
718 688
719reread:
720 partial = ext3_get_branch(inode, depth, offsets, chain, &err); 689 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
721 690
722 /* Simplest case - block found, no allocation needed */ 691 /* Simplest case - block found, no allocation needed */
723 if (!partial) { 692 if (!partial) {
724 clear_buffer_new(bh_result); 693 clear_buffer_new(bh_result);
725got_it: 694 goto got_it;
726 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
727 if (boundary)
728 set_buffer_boundary(bh_result);
729 /* Clean up and exit */
730 partial = chain+depth-1; /* the whole chain */
731 goto cleanup;
732 } 695 }
733 696
734 /* Next simple case - plain lookup or failed read of indirect block */ 697 /* Next simple case - plain lookup or failed read of indirect block */
735 if (!create || err == -EIO) { 698 if (!create || err == -EIO)
736cleanup: 699 goto cleanup;
700
701 down(&ei->truncate_sem);
702
703 /*
704 * If the indirect block is missing while we are reading
705 * the chain(ext3_get_branch() returns -EAGAIN err), or
706 * if the chain has been changed after we grab the semaphore,
707 * (either because another process truncated this branch, or
708 * another get_block allocated this branch) re-grab the chain to see if
709 * the request block has been allocated or not.
710 *
711 * Since we already block the truncate/other get_block
712 * at this point, we will have the current copy of the chain when we
713 * splice the branch into the tree.
714 */
715 if (err == -EAGAIN || !verify_chain(chain, partial)) {
737 while (partial > chain) { 716 while (partial > chain) {
738 BUFFER_TRACE(partial->bh, "call brelse");
739 brelse(partial->bh); 717 brelse(partial->bh);
740 partial--; 718 partial--;
741 } 719 }
742 BUFFER_TRACE(bh_result, "returned"); 720 partial = ext3_get_branch(inode, depth, offsets, chain, &err);
743out: 721 if (!partial) {
744 return err; 722 up(&ei->truncate_sem);
723 if (err)
724 goto cleanup;
725 clear_buffer_new(bh_result);
726 goto got_it;
727 }
745 } 728 }
746 729
747 /* 730 /*
748 * Indirect block might be removed by truncate while we were 731 * Okay, we need to do block allocation. Lazily initialize the block
749 * reading it. Handling of that case (forget what we've got and 732 * allocation info here if necessary
750 * reread) is taken out of the main path. 733 */
751 */ 734 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
752 if (err == -EAGAIN)
753 goto changed;
754
755 goal = 0;
756 down(&ei->truncate_sem);
757
758 /* lazy initialize the block allocation info here if necessary */
759 if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) {
760 ext3_init_block_alloc_info(inode); 735 ext3_init_block_alloc_info(inode);
761 }
762 736
763 if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) { 737 goal = ext3_find_goal(inode, iblock, chain, partial);
764 up(&ei->truncate_sem);
765 goto changed;
766 }
767 738
768 left = (chain + depth) - partial; 739 left = (chain + depth) - partial;
769 740
@@ -771,38 +742,45 @@ out:
771 * Block out ext3_truncate while we alter the tree 742 * Block out ext3_truncate while we alter the tree
772 */ 743 */
773 err = ext3_alloc_branch(handle, inode, left, goal, 744 err = ext3_alloc_branch(handle, inode, left, goal,
774 offsets+(partial-chain), partial); 745 offsets + (partial - chain), partial);
775 746
776 /* The ext3_splice_branch call will free and forget any buffers 747 /*
748 * The ext3_splice_branch call will free and forget any buffers
777 * on the new chain if there is a failure, but that risks using 749 * on the new chain if there is a failure, but that risks using
778 * up transaction credits, especially for bitmaps where the 750 * up transaction credits, especially for bitmaps where the
779 * credits cannot be returned. Can we handle this somehow? We 751 * credits cannot be returned. Can we handle this somehow? We
780 * may need to return -EAGAIN upwards in the worst case. --sct */ 752 * may need to return -EAGAIN upwards in the worst case. --sct
753 */
781 if (!err) 754 if (!err)
782 err = ext3_splice_branch(handle, inode, iblock, chain, 755 err = ext3_splice_branch(handle, inode, iblock, chain,
783 partial, left); 756 partial, left);
784 /* i_disksize growing is protected by truncate_sem 757 /*
785 * don't forget to protect it if you're about to implement 758 * i_disksize growing is protected by truncate_sem. Don't forget to
786 * concurrent ext3_get_block() -bzzz */ 759 * protect it if you're about to implement concurrent
760 * ext3_get_block() -bzzz
761 */
787 if (!err && extend_disksize && inode->i_size > ei->i_disksize) 762 if (!err && extend_disksize && inode->i_size > ei->i_disksize)
788 ei->i_disksize = inode->i_size; 763 ei->i_disksize = inode->i_size;
789 up(&ei->truncate_sem); 764 up(&ei->truncate_sem);
790 if (err == -EAGAIN)
791 goto changed;
792 if (err) 765 if (err)
793 goto cleanup; 766 goto cleanup;
794 767
795 set_buffer_new(bh_result); 768 set_buffer_new(bh_result);
796 goto got_it; 769got_it:
797 770 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
798changed: 771 if (boundary)
772 set_buffer_boundary(bh_result);
773 /* Clean up and exit */
774 partial = chain + depth - 1; /* the whole chain */
775cleanup:
799 while (partial > chain) { 776 while (partial > chain) {
800 jbd_debug(1, "buffer chain changed, retrying\n"); 777 BUFFER_TRACE(partial->bh, "call brelse");
801 BUFFER_TRACE(partial->bh, "brelsing");
802 brelse(partial->bh); 778 brelse(partial->bh);
803 partial--; 779 partial--;
804 } 780 }
805 goto reread; 781 BUFFER_TRACE(bh_result, "returned");
782out:
783 return err;
806} 784}
807 785
808static int ext3_get_block(struct inode *inode, sector_t iblock, 786static int ext3_get_block(struct inode *inode, sector_t iblock,
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3e7ab16ed15..286a9f8f3d4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/signal.h>
18 19
19#include <asm/poll.h> 20#include <asm/poll.h>
20#include <asm/siginfo.h> 21#include <asm/siginfo.h>
@@ -308,7 +309,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
308 break; 309 break;
309 case F_SETSIG: 310 case F_SETSIG:
310 /* arg == 0 restores default behaviour. */ 311 /* arg == 0 restores default behaviour. */
311 if (arg < 0 || arg > _NSIG) { 312 if (!valid_signal(arg)) {
312 break; 313 break;
313 } 314 }
314 err = 0; 315 err = 0;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d6efb36cab2..8e050fa5821 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -512,7 +512,8 @@ restart:
512} 512}
513 513
514/** 514/**
515 * sync_inodes 515 * sync_inodes - writes all inodes to disk
516 * @wait: wait for completion
516 * 517 *
517 * sync_inodes() goes through each super block's dirty inode list, writes the 518 * sync_inodes() goes through each super block's dirty inode list, writes the
518 * inodes out, waits on the writeout and puts the inodes back on the normal 519 * inodes out, waits on the writeout and puts the inodes back on the normal
@@ -604,6 +605,7 @@ EXPORT_SYMBOL(sync_inode);
604/** 605/**
605 * generic_osync_inode - flush all dirty data for a given inode to disk 606 * generic_osync_inode - flush all dirty data for a given inode to disk
606 * @inode: inode to write 607 * @inode: inode to write
608 * @mapping: the address_space that should be flushed
607 * @what: what to write and wait upon 609 * @what: what to write and wait upon
608 * 610 *
609 * This can be called by file_write functions for files which have the 611 * This can be called by file_write functions for files which have the
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 4efb640c4d0..217e32f37e0 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -333,6 +333,8 @@ void hfs_mdb_close(struct super_block *sb)
333 * Release the resources associated with the in-core MDB. */ 333 * Release the resources associated with the in-core MDB. */
334void hfs_mdb_put(struct super_block *sb) 334void hfs_mdb_put(struct super_block *sb)
335{ 335{
336 if (!HFS_SB(sb))
337 return;
336 /* free the B-trees */ 338 /* free the B-trees */
337 hfs_btree_close(HFS_SB(sb)->ext_tree); 339 hfs_btree_close(HFS_SB(sb)->ext_tree);
338 hfs_btree_close(HFS_SB(sb)->cat_tree); 340 hfs_btree_close(HFS_SB(sb)->cat_tree);
@@ -340,4 +342,7 @@ void hfs_mdb_put(struct super_block *sb)
340 /* free the buffers holding the primary and alternate MDBs */ 342 /* free the buffers holding the primary and alternate MDBs */
341 brelse(HFS_SB(sb)->mdb_bh); 343 brelse(HFS_SB(sb)->mdb_bh);
342 brelse(HFS_SB(sb)->alt_mdb_bh); 344 brelse(HFS_SB(sb)->alt_mdb_bh);
345
346 kfree(HFS_SB(sb));
347 sb->s_fs_info = NULL;
343} 348}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1e2c193134c..ab783f6afa3 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
297 res = -EINVAL; 297 res = -EINVAL;
298 if (!parse_options((char *)data, sbi)) { 298 if (!parse_options((char *)data, sbi)) {
299 hfs_warn("hfs_fs: unable to parse mount options.\n"); 299 hfs_warn("hfs_fs: unable to parse mount options.\n");
300 goto bail3; 300 goto bail;
301 } 301 }
302 302
303 sb->s_op = &hfs_super_operations; 303 sb->s_op = &hfs_super_operations;
@@ -310,7 +310,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
310 hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n", 310 hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n",
311 hfs_mdb_name(sb)); 311 hfs_mdb_name(sb));
312 res = -EINVAL; 312 res = -EINVAL;
313 goto bail2; 313 goto bail;
314 } 314 }
315 315
316 /* try to get the root inode */ 316 /* try to get the root inode */
@@ -340,10 +340,8 @@ bail_iput:
340 iput(root_inode); 340 iput(root_inode);
341bail_no_root: 341bail_no_root:
342 hfs_warn("hfs_fs: get root inode failed.\n"); 342 hfs_warn("hfs_fs: get root inode failed.\n");
343bail:
343 hfs_mdb_put(sb); 344 hfs_mdb_put(sb);
344bail2:
345bail3:
346 kfree(sbi);
347 return res; 345 return res;
348} 346}
349 347
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5f8044664a3..d55ad67b8e4 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -208,7 +208,9 @@ static void hfsplus_write_super(struct super_block *sb)
208static void hfsplus_put_super(struct super_block *sb) 208static void hfsplus_put_super(struct super_block *sb)
209{ 209{
210 dprint(DBG_SUPER, "hfsplus_put_super\n"); 210 dprint(DBG_SUPER, "hfsplus_put_super\n");
211 if (!(sb->s_flags & MS_RDONLY)) { 211 if (!sb->s_fs_info)
212 return;
213 if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
212 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; 214 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
213 215
214 vhdr->modify_date = hfsp_now2mt(); 216 vhdr->modify_date = hfsp_now2mt();
@@ -226,6 +228,8 @@ static void hfsplus_put_super(struct super_block *sb)
226 brelse(HFSPLUS_SB(sb).s_vhbh); 228 brelse(HFSPLUS_SB(sb).s_vhbh);
227 if (HFSPLUS_SB(sb).nls) 229 if (HFSPLUS_SB(sb).nls)
228 unload_nls(HFSPLUS_SB(sb).nls); 230 unload_nls(HFSPLUS_SB(sb).nls);
231 kfree(sb->s_fs_info);
232 sb->s_fs_info = NULL;
229} 233}
230 234
231static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf) 235static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf)
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a88ad292485..e6c63d9cac7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -521,7 +521,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
521static struct address_space_operations hostfs_aops = { 521static struct address_space_operations hostfs_aops = {
522 .writepage = hostfs_writepage, 522 .writepage = hostfs_writepage,
523 .readpage = hostfs_readpage, 523 .readpage = hostfs_readpage,
524/* .set_page_dirty = __set_page_dirty_nobuffers, */ 524 .set_page_dirty = __set_page_dirty_nobuffers,
525 .prepare_write = hostfs_prepare_write, 525 .prepare_write = hostfs_prepare_write,
526 .commit_write = hostfs_commit_write 526 .commit_write = hostfs_commit_write
527}; 527};
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 0c607c1388f..771a554701d 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -79,8 +79,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
79 79
80 D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT)); 80 D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT));
81 81
82 if (!PageLocked(pg)) 82 BUG_ON(!PageLocked(pg));
83 PAGE_BUG(pg);
84 83
85 pg_buf = kmap(pg); 84 pg_buf = kmap(pg);
86 /* FIXME: Can kmap fail? */ 85 /* FIXME: Can kmap fail? */
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7bc906677b0..24a689179af 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
175{ 175{
176 s64 lblock64 = lblock; 176 s64 lblock64 = lblock;
177 int rc = 0; 177 int rc = 0;
178 int take_locks;
179 xad_t xad; 178 xad_t xad;
180 s64 xaddr; 179 s64 xaddr;
181 int xflag; 180 int xflag;
182 s32 xlen; 181 s32 xlen = max_blocks;
183
184 /*
185 * If this is a special inode (imap, dmap)
186 * the lock should already be taken
187 */
188 take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I);
189 182
190 /* 183 /*
191 * Take appropriate lock on inode 184 * Take appropriate lock on inode
192 */ 185 */
193 if (take_locks) { 186 if (create)
194 if (create) 187 IWRITE_LOCK(ip);
195 IWRITE_LOCK(ip); 188 else
196 else 189 IREAD_LOCK(ip);
197 IREAD_LOCK(ip);
198 }
199 190
200 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && 191 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
201 (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0) 192 (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
202 == 0) && xlen) { 193 xaddr) {
203 if (xflag & XAD_NOTRECORDED) { 194 if (xflag & XAD_NOTRECORDED) {
204 if (!create) 195 if (!create)
205 /* 196 /*
@@ -238,7 +229,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
238#ifdef _JFS_4K 229#ifdef _JFS_4K
239 if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) 230 if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
240 goto unlock; 231 goto unlock;
241 rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE); 232 rc = extAlloc(ip, xlen, lblock64, &xad, FALSE);
242 if (rc) 233 if (rc)
243 goto unlock; 234 goto unlock;
244 235
@@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
258 /* 249 /*
259 * Release lock on inode 250 * Release lock on inode
260 */ 251 */
261 if (take_locks) { 252 if (create)
262 if (create) 253 IWRITE_UNLOCK(ip);
263 IWRITE_UNLOCK(ip); 254 else
264 else 255 IREAD_UNLOCK(ip);
265 IREAD_UNLOCK(ip);
266 }
267 return rc; 256 return rc;
268} 257}
269 258
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d86e467c6e4..69007fd546e 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap,
471 struct metapage *mp; 471 struct metapage *mp;
472 struct jfs_log *log; 472 struct jfs_log *log;
473 int lsn, difft, diffp; 473 int lsn, difft, diffp;
474 unsigned long flags;
474 475
475 /* the blocks better be within the mapsize. */ 476 /* the blocks better be within the mapsize. */
476 if (blkno + nblocks > bmp->db_mapsize) { 477 if (blkno + nblocks > bmp->db_mapsize) {
@@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap,
504 0); 505 0);
505 if (mp == NULL) 506 if (mp == NULL)
506 return -EIO; 507 return -EIO;
508 metapage_wait_for_io(mp);
507 } 509 }
508 dp = (struct dmap *) mp->data; 510 dp = (struct dmap *) mp->data;
509 511
@@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap,
578 if (mp->lsn != 0) { 580 if (mp->lsn != 0) {
579 /* inherit older/smaller lsn */ 581 /* inherit older/smaller lsn */
580 logdiff(diffp, mp->lsn, log); 582 logdiff(diffp, mp->lsn, log);
583 LOGSYNC_LOCK(log, flags);
581 if (difft < diffp) { 584 if (difft < diffp) {
582 mp->lsn = lsn; 585 mp->lsn = lsn;
583 586
584 /* move bp after tblock in logsync list */ 587 /* move bp after tblock in logsync list */
585 LOGSYNC_LOCK(log);
586 list_move(&mp->synclist, &tblk->synclist); 588 list_move(&mp->synclist, &tblk->synclist);
587 LOGSYNC_UNLOCK(log);
588 } 589 }
589 590
590 /* inherit younger/larger clsn */ 591 /* inherit younger/larger clsn */
591 LOGSYNC_LOCK(log);
592 logdiff(difft, tblk->clsn, log); 592 logdiff(difft, tblk->clsn, log);
593 logdiff(diffp, mp->clsn, log); 593 logdiff(diffp, mp->clsn, log);
594 if (difft > diffp) 594 if (difft > diffp)
595 mp->clsn = tblk->clsn; 595 mp->clsn = tblk->clsn;
596 LOGSYNC_UNLOCK(log); 596 LOGSYNC_UNLOCK(log, flags);
597 } else { 597 } else {
598 mp->log = log; 598 mp->log = log;
599 mp->lsn = lsn; 599 mp->lsn = lsn;
600 600
601 /* insert bp after tblock in logsync list */ 601 /* insert bp after tblock in logsync list */
602 LOGSYNC_LOCK(log); 602 LOGSYNC_LOCK(log, flags);
603 603
604 log->count++; 604 log->count++;
605 list_add(&mp->synclist, &tblk->synclist); 605 list_add(&mp->synclist, &tblk->synclist);
606 606
607 mp->clsn = tblk->clsn; 607 mp->clsn = tblk->clsn;
608 LOGSYNC_UNLOCK(log); 608 LOGSYNC_UNLOCK(log, flags);
609 } 609 }
610 } 610 }
611 611
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index e357890adfb..ac41f72d6d5 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -212,7 +212,7 @@ static struct metapage *read_index_page(struct inode *inode, s64 blkno)
212 s32 xlen; 212 s32 xlen;
213 213
214 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 214 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
215 if (rc || (xlen == 0)) 215 if (rc || (xaddr == 0))
216 return NULL; 216 return NULL;
217 217
218 return read_metapage(inode, xaddr, PSIZE, 1); 218 return read_metapage(inode, xaddr, PSIZE, 1);
@@ -231,7 +231,7 @@ static struct metapage *get_index_page(struct inode *inode, s64 blkno)
231 s32 xlen; 231 s32 xlen;
232 232
233 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 233 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
234 if (rc || (xlen == 0)) 234 if (rc || (xaddr == 0))
235 return NULL; 235 return NULL;
236 236
237 return get_metapage(inode, xaddr, PSIZE, 1); 237 return get_metapage(inode, xaddr, PSIZE, 1);
@@ -3181,7 +3181,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3181 d = (struct ldtentry *) & p->slot[stbl[i]]; 3181 d = (struct ldtentry *) & p->slot[stbl[i]];
3182 3182
3183 if (((long) jfs_dirent + d->namlen + 1) > 3183 if (((long) jfs_dirent + d->namlen + 1) >
3184 (dirent_buf + PSIZE)) { 3184 (dirent_buf + PAGE_SIZE)) {
3185 /* DBCS codepages could overrun dirent_buf */ 3185 /* DBCS codepages could overrun dirent_buf */
3186 index = i; 3186 index = i;
3187 overflow = 1; 3187 overflow = 1;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 78383130162..7acff2ce3c8 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
502 502
503 } 503 }
504 504
505 ip->i_mapping->a_ops = &jfs_aops; 505 ip->i_mapping->a_ops = &jfs_metapage_aops;
506 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 506 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
507 507
508 /* Allocations to metadata inodes should not affect quotas */ 508 /* Allocations to metadata inodes should not affect quotas */
@@ -2573,9 +2573,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2573 goto out; 2573 goto out;
2574 } 2574 }
2575 2575
2576 /* assign a buffer for the page */ 2576 /*
2577 mp = get_metapage(ipimap, xaddr, PSIZE, 1); 2577 * start transaction of update of the inode map
2578 if (!mp) { 2578 * addressing structure pointing to the new iag page;
2579 */
2580 tid = txBegin(sb, COMMIT_FORCE);
2581 down(&JFS_IP(ipimap)->commit_sem);
2582
2583 /* update the inode map addressing structure to point to it */
2584 if ((rc =
2585 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2586 txEnd(tid);
2587 up(&JFS_IP(ipimap)->commit_sem);
2579 /* Free the blocks allocated for the iag since it was 2588 /* Free the blocks allocated for the iag since it was
2580 * not successfully added to the inode map 2589 * not successfully added to the inode map
2581 */ 2590 */
@@ -2584,6 +2593,29 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2584 /* release the inode map lock */ 2593 /* release the inode map lock */
2585 IWRITE_UNLOCK(ipimap); 2594 IWRITE_UNLOCK(ipimap);
2586 2595
2596 goto out;
2597 }
2598
2599 /* update the inode map's inode to reflect the extension */
2600 ipimap->i_size += PSIZE;
2601 inode_add_bytes(ipimap, PSIZE);
2602
2603 /* assign a buffer for the page */
2604 mp = get_metapage(ipimap, blkno, PSIZE, 0);
2605 if (!mp) {
2606 /*
2607 * This is very unlikely since we just created the
2608 * extent, but let's try to handle it correctly
2609 */
2610 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2611 COMMIT_PWMAP);
2612
2613 txAbort(tid, 0);
2614 txEnd(tid);
2615
2616 /* release the inode map lock */
2617 IWRITE_UNLOCK(ipimap);
2618
2587 rc = -EIO; 2619 rc = -EIO;
2588 goto out; 2620 goto out;
2589 } 2621 }
@@ -2605,41 +2637,11 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2605 iagp->inosmap[i] = cpu_to_le32(ONES); 2637 iagp->inosmap[i] = cpu_to_le32(ONES);
2606 2638
2607 /* 2639 /*
2608 * Invalidate the page after writing and syncing it. 2640 * Write and sync the metapage
2609 * After it's initialized, we access it in a different
2610 * address space
2611 */ 2641 */
2612 set_bit(META_discard, &mp->flag);
2613 flush_metapage(mp); 2642 flush_metapage(mp);
2614 2643
2615 /* 2644 /*
2616 * start tyransaction of update of the inode map
2617 * addressing structure pointing to the new iag page;
2618 */
2619 tid = txBegin(sb, COMMIT_FORCE);
2620 down(&JFS_IP(ipimap)->commit_sem);
2621
2622 /* update the inode map addressing structure to point to it */
2623 if ((rc =
2624 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2625 txEnd(tid);
2626 up(&JFS_IP(ipimap)->commit_sem);
2627 /* Free the blocks allocated for the iag since it was
2628 * not successfully added to the inode map
2629 */
2630 dbFree(ipimap, xaddr, (s64) xlen);
2631
2632 /* release the inode map lock */
2633 IWRITE_UNLOCK(ipimap);
2634
2635 goto out;
2636 }
2637
2638 /* update the inode map's inode to reflect the extension */
2639 ipimap->i_size += PSIZE;
2640 inode_add_bytes(ipimap, PSIZE);
2641
2642 /*
2643 * txCommit(COMMIT_FORCE) will synchronously write address 2645 * txCommit(COMMIT_FORCE) will synchronously write address
2644 * index pages and inode after commit in careful update order 2646 * index pages and inode after commit in careful update order
2645 * of address index pages (right to left, bottom up); 2647 * of address index pages (right to left, bottom up);
@@ -2789,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap,
2789 u32 mask; 2791 u32 mask;
2790 struct jfs_log *log; 2792 struct jfs_log *log;
2791 int lsn, difft, diffp; 2793 int lsn, difft, diffp;
2794 unsigned long flags;
2792 2795
2793 imap = JFS_IP(ipimap)->i_imap; 2796 imap = JFS_IP(ipimap)->i_imap;
2794 /* get the iag number containing the inode */ 2797 /* get the iag number containing the inode */
@@ -2805,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap,
2805 IREAD_UNLOCK(ipimap); 2808 IREAD_UNLOCK(ipimap);
2806 if (rc) 2809 if (rc)
2807 return (rc); 2810 return (rc);
2811 metapage_wait_for_io(mp);
2808 iagp = (struct iag *) mp->data; 2812 iagp = (struct iag *) mp->data;
2809 /* get the inode number and extent number of the inode within 2813 /* get the inode number and extent number of the inode within
2810 * the iag and the inode number within the extent. 2814 * the iag and the inode number within the extent.
@@ -2868,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap,
2868 /* inherit older/smaller lsn */ 2872 /* inherit older/smaller lsn */
2869 logdiff(difft, lsn, log); 2873 logdiff(difft, lsn, log);
2870 logdiff(diffp, mp->lsn, log); 2874 logdiff(diffp, mp->lsn, log);
2875 LOGSYNC_LOCK(log, flags);
2871 if (difft < diffp) { 2876 if (difft < diffp) {
2872 mp->lsn = lsn; 2877 mp->lsn = lsn;
2873 /* move mp after tblock in logsync list */ 2878 /* move mp after tblock in logsync list */
2874 LOGSYNC_LOCK(log);
2875 list_move(&mp->synclist, &tblk->synclist); 2879 list_move(&mp->synclist, &tblk->synclist);
2876 LOGSYNC_UNLOCK(log);
2877 } 2880 }
2878 /* inherit younger/larger clsn */ 2881 /* inherit younger/larger clsn */
2879 LOGSYNC_LOCK(log);
2880 assert(mp->clsn); 2882 assert(mp->clsn);
2881 logdiff(difft, tblk->clsn, log); 2883 logdiff(difft, tblk->clsn, log);
2882 logdiff(diffp, mp->clsn, log); 2884 logdiff(diffp, mp->clsn, log);
2883 if (difft > diffp) 2885 if (difft > diffp)
2884 mp->clsn = tblk->clsn; 2886 mp->clsn = tblk->clsn;
2885 LOGSYNC_UNLOCK(log); 2887 LOGSYNC_UNLOCK(log, flags);
2886 } else { 2888 } else {
2887 mp->log = log; 2889 mp->log = log;
2888 mp->lsn = lsn; 2890 mp->lsn = lsn;
2889 /* insert mp after tblock in logsync list */ 2891 /* insert mp after tblock in logsync list */
2890 LOGSYNC_LOCK(log); 2892 LOGSYNC_LOCK(log, flags);
2891 log->count++; 2893 log->count++;
2892 list_add(&mp->synclist, &tblk->synclist); 2894 list_add(&mp->synclist, &tblk->synclist);
2893 mp->clsn = tblk->clsn; 2895 mp->clsn = tblk->clsn;
2894 LOGSYNC_UNLOCK(log); 2896 LOGSYNC_UNLOCK(log, flags);
2895 } 2897 }
2896 write_metapage(mp); 2898 write_metapage(mp);
2897 return (0); 2899 return (0);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index ebd77c1bed6..c0fd7b3eadc 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -165,6 +165,7 @@ struct jfs_sb_info {
165 /* Formerly in ipbmap */ 165 /* Formerly in ipbmap */
166 struct bmap *bmap; /* incore bmap descriptor */ 166 struct bmap *bmap; /* incore bmap descriptor */
167 struct nls_table *nls_tab; /* current codepage */ 167 struct nls_table *nls_tab; /* current codepage */
168 struct inode *direct_inode; /* metadata inode */
168 uint state; /* mount/recovery state */ 169 uint state; /* mount/recovery state */
169 unsigned long flag; /* mount time flags */ 170 unsigned long flag; /* mount time flags */
170 uint p_state; /* state prior to going no integrity */ 171 uint p_state; /* state prior to going no integrity */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b6a6869ebb4..dfa1200daa6 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
234 int lsn; 234 int lsn;
235 int diffp, difft; 235 int diffp, difft;
236 struct metapage *mp = NULL; 236 struct metapage *mp = NULL;
237 unsigned long flags;
237 238
238 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", 239 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
239 log, tblk, lrd, tlck); 240 log, tblk, lrd, tlck);
@@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
254 */ 255 */
255 lsn = log->lsn; 256 lsn = log->lsn;
256 257
257 LOGSYNC_LOCK(log); 258 LOGSYNC_LOCK(log, flags);
258 259
259 /* 260 /*
260 * initialize page lsn if first log write of the page 261 * initialize page lsn if first log write of the page
@@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
310 } 311 }
311 } 312 }
312 313
313 LOGSYNC_UNLOCK(log); 314 LOGSYNC_UNLOCK(log, flags);
314 315
315 /* 316 /*
316 * write the log record 317 * write the log record
@@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
334 return lsn; 335 return lsn;
335} 336}
336 337
337
338/* 338/*
339 * NAME: lmWriteRecord() 339 * NAME: lmWriteRecord()
340 * 340 *
@@ -927,9 +927,8 @@ static void lmPostGC(struct lbuf * bp)
927 * calculate new value of i_nextsync which determines when 927 * calculate new value of i_nextsync which determines when
928 * this code is called again. 928 * this code is called again.
929 * 929 *
930 * this is called only from lmLog(). 930 * PARAMETERS: log - log structure
931 * 931 * nosyncwait - 1 if called asynchronously
932 * PARAMETER: ip - pointer to logs inode.
933 * 932 *
934 * RETURN: 0 933 * RETURN: 0
935 * 934 *
@@ -945,6 +944,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
945 struct lrd lrd; 944 struct lrd lrd;
946 int lsn; 945 int lsn;
947 struct logsyncblk *lp; 946 struct logsyncblk *lp;
947 struct jfs_sb_info *sbi;
948 unsigned long flags;
949
950 /* push dirty metapages out to disk */
951 list_for_each_entry(sbi, &log->sb_list, log_list) {
952 filemap_flush(sbi->ipbmap->i_mapping);
953 filemap_flush(sbi->ipimap->i_mapping);
954 filemap_flush(sbi->direct_inode->i_mapping);
955 }
948 956
949 /* 957 /*
950 * forward syncpt 958 * forward syncpt
@@ -954,10 +962,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
954 */ 962 */
955 963
956 if (log->sync == log->syncpt) { 964 if (log->sync == log->syncpt) {
957 LOGSYNC_LOCK(log); 965 LOGSYNC_LOCK(log, flags);
958 /* ToDo: push dirty metapages out to disk */
959// bmLogSync(log);
960
961 if (list_empty(&log->synclist)) 966 if (list_empty(&log->synclist))
962 log->sync = log->lsn; 967 log->sync = log->lsn;
963 else { 968 else {
@@ -965,7 +970,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
965 struct logsyncblk, synclist); 970 struct logsyncblk, synclist);
966 log->sync = lp->lsn; 971 log->sync = lp->lsn;
967 } 972 }
968 LOGSYNC_UNLOCK(log); 973 LOGSYNC_UNLOCK(log, flags);
969 974
970 } 975 }
971 976
@@ -974,27 +979,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
974 * reset syncpt = sync 979 * reset syncpt = sync
975 */ 980 */
976 if (log->sync != log->syncpt) { 981 if (log->sync != log->syncpt) {
977 struct jfs_sb_info *sbi;
978
979 /*
980 * We need to make sure all of the "written" metapages
981 * actually make it to disk
982 */
983 list_for_each_entry(sbi, &log->sb_list, log_list) {
984 if (sbi->flag & JFS_NOINTEGRITY)
985 continue;
986 filemap_fdatawrite(sbi->ipbmap->i_mapping);
987 filemap_fdatawrite(sbi->ipimap->i_mapping);
988 filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
989 }
990 list_for_each_entry(sbi, &log->sb_list, log_list) {
991 if (sbi->flag & JFS_NOINTEGRITY)
992 continue;
993 filemap_fdatawait(sbi->ipbmap->i_mapping);
994 filemap_fdatawait(sbi->ipimap->i_mapping);
995 filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
996 }
997
998 lrd.logtid = 0; 982 lrd.logtid = 0;
999 lrd.backchain = 0; 983 lrd.backchain = 0;
1000 lrd.type = cpu_to_le16(LOG_SYNCPT); 984 lrd.type = cpu_to_le16(LOG_SYNCPT);
@@ -1066,6 +1050,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
1066 return lsn; 1050 return lsn;
1067} 1051}
1068 1052
1053/*
1054 * NAME: jfs_syncpt
1055 *
1056 * FUNCTION: write log SYNCPT record for specified log
1057 *
1058 * PARAMETERS: log - log structure
1059 */
1060void jfs_syncpt(struct jfs_log *log)
1061{ LOG_LOCK(log);
1062 lmLogSync(log, 1);
1063 LOG_UNLOCK(log);
1064}
1069 1065
1070/* 1066/*
1071 * NAME: lmLogOpen() 1067 * NAME: lmLogOpen()
@@ -1547,6 +1543,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1547{ 1543{
1548 int i; 1544 int i;
1549 struct tblock *target = NULL; 1545 struct tblock *target = NULL;
1546 struct jfs_sb_info *sbi;
1550 1547
1551 /* jfs_write_inode may call us during read-only mount */ 1548 /* jfs_write_inode may call us during read-only mount */
1552 if (!log) 1549 if (!log)
@@ -1608,12 +1605,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1608 if (wait < 2) 1605 if (wait < 2)
1609 return; 1606 return;
1610 1607
1608 list_for_each_entry(sbi, &log->sb_list, log_list) {
1609 filemap_fdatawrite(sbi->ipbmap->i_mapping);
1610 filemap_fdatawrite(sbi->ipimap->i_mapping);
1611 filemap_fdatawrite(sbi->direct_inode->i_mapping);
1612 }
1613
1611 /* 1614 /*
1612 * If there was recent activity, we may need to wait 1615 * If there was recent activity, we may need to wait
1613 * for the lazycommit thread to catch up 1616 * for the lazycommit thread to catch up
1614 */ 1617 */
1615 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { 1618 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1616 for (i = 0; i < 800; i++) { /* Too much? */ 1619 for (i = 0; i < 200; i++) { /* Too much? */
1617 msleep(250); 1620 msleep(250);
1618 if (list_empty(&log->cqueue) && 1621 if (list_empty(&log->cqueue) &&
1619 list_empty(&log->synclist)) 1622 list_empty(&log->synclist))
@@ -1621,7 +1624,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
1621 } 1624 }
1622 } 1625 }
1623 assert(list_empty(&log->cqueue)); 1626 assert(list_empty(&log->cqueue));
1624 assert(list_empty(&log->synclist)); 1627 if (!list_empty(&log->synclist)) {
1628 struct logsyncblk *lp;
1629
1630 list_for_each_entry(lp, &log->synclist, synclist) {
1631 if (lp->xflag & COMMIT_PAGE) {
1632 struct metapage *mp = (struct metapage *)lp;
1633 dump_mem("orphan metapage", lp,
1634 sizeof(struct metapage));
1635 dump_mem("page", mp->page, sizeof(struct page));
1636 }
1637 else
1638 dump_mem("orphan tblock", lp,
1639 sizeof(struct tblock));
1640 }
1641// current->state = TASK_INTERRUPTIBLE;
1642// schedule();
1643 }
1644 //assert(list_empty(&log->synclist));
1625 clear_bit(log_FLUSH, &log->flag); 1645 clear_bit(log_FLUSH, &log->flag);
1626} 1646}
1627 1647
@@ -1669,6 +1689,7 @@ int lmLogShutdown(struct jfs_log * log)
1669 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1689 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1670 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); 1690 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1671 lbmIOWait(log->bp, lbmFREE); 1691 lbmIOWait(log->bp, lbmFREE);
1692 log->bp = NULL;
1672 1693
1673 /* 1694 /*
1674 * synchronous update log superblock 1695 * synchronous update log superblock
@@ -1819,20 +1840,34 @@ static int lbmLogInit(struct jfs_log * log)
1819 1840
1820 log->lbuf_free = NULL; 1841 log->lbuf_free = NULL;
1821 1842
1822 for (i = 0; i < LOGPAGES; i++) { 1843 for (i = 0; i < LOGPAGES;) {
1823 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); 1844 char *buffer;
1824 if (lbuf == 0) 1845 uint offset;
1825 goto error; 1846 struct page *page;
1826 lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL); 1847
1827 if (lbuf->l_ldata == 0) { 1848 buffer = (char *) get_zeroed_page(GFP_KERNEL);
1828 kfree(lbuf); 1849 if (buffer == NULL)
1829 goto error; 1850 goto error;
1851 page = virt_to_page(buffer);
1852 for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1853 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1854 if (lbuf == NULL) {
1855 if (offset == 0)
1856 free_page((unsigned long) buffer);
1857 goto error;
1858 }
1859 if (offset) /* we already have one reference */
1860 get_page(page);
1861 lbuf->l_offset = offset;
1862 lbuf->l_ldata = buffer + offset;
1863 lbuf->l_page = page;
1864 lbuf->l_log = log;
1865 init_waitqueue_head(&lbuf->l_ioevent);
1866
1867 lbuf->l_freelist = log->lbuf_free;
1868 log->lbuf_free = lbuf;
1869 i++;
1830 } 1870 }
1831 lbuf->l_log = log;
1832 init_waitqueue_head(&lbuf->l_ioevent);
1833
1834 lbuf->l_freelist = log->lbuf_free;
1835 log->lbuf_free = lbuf;
1836 } 1871 }
1837 1872
1838 return (0); 1873 return (0);
@@ -1857,12 +1892,10 @@ static void lbmLogShutdown(struct jfs_log * log)
1857 lbuf = log->lbuf_free; 1892 lbuf = log->lbuf_free;
1858 while (lbuf) { 1893 while (lbuf) {
1859 struct lbuf *next = lbuf->l_freelist; 1894 struct lbuf *next = lbuf->l_freelist;
1860 free_page((unsigned long) lbuf->l_ldata); 1895 __free_page(lbuf->l_page);
1861 kfree(lbuf); 1896 kfree(lbuf);
1862 lbuf = next; 1897 lbuf = next;
1863 } 1898 }
1864
1865 log->bp = NULL;
1866} 1899}
1867 1900
1868 1901
@@ -1974,9 +2007,9 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1974 2007
1975 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2008 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
1976 bio->bi_bdev = log->bdev; 2009 bio->bi_bdev = log->bdev;
1977 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); 2010 bio->bi_io_vec[0].bv_page = bp->l_page;
1978 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2011 bio->bi_io_vec[0].bv_len = LOGPSIZE;
1979 bio->bi_io_vec[0].bv_offset = 0; 2012 bio->bi_io_vec[0].bv_offset = bp->l_offset;
1980 2013
1981 bio->bi_vcnt = 1; 2014 bio->bi_vcnt = 1;
1982 bio->bi_idx = 0; 2015 bio->bi_idx = 0;
@@ -2115,9 +2148,9 @@ static void lbmStartIO(struct lbuf * bp)
2115 bio = bio_alloc(GFP_NOFS, 1); 2148 bio = bio_alloc(GFP_NOFS, 1);
2116 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2149 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
2117 bio->bi_bdev = log->bdev; 2150 bio->bi_bdev = log->bdev;
2118 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); 2151 bio->bi_io_vec[0].bv_page = bp->l_page;
2119 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2152 bio->bi_io_vec[0].bv_len = LOGPSIZE;
2120 bio->bi_io_vec[0].bv_offset = 0; 2153 bio->bi_io_vec[0].bv_offset = bp->l_offset;
2121 2154
2122 bio->bi_vcnt = 1; 2155 bio->bi_vcnt = 1;
2123 bio->bi_idx = 0; 2156 bio->bi_idx = 0;
@@ -2127,16 +2160,13 @@ static void lbmStartIO(struct lbuf * bp)
2127 bio->bi_private = bp; 2160 bio->bi_private = bp;
2128 2161
2129 /* check if journaling to disk has been disabled */ 2162 /* check if journaling to disk has been disabled */
2130 if (!log->no_integrity) { 2163 if (log->no_integrity) {
2164 bio->bi_size = 0;
2165 lbmIODone(bio, 0, 0);
2166 } else {
2131 submit_bio(WRITE_SYNC, bio); 2167 submit_bio(WRITE_SYNC, bio);
2132 INCREMENT(lmStat.submitted); 2168 INCREMENT(lmStat.submitted);
2133 } 2169 }
2134 else {
2135 bio->bi_size = 0;
2136 lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0
2137 * 3rd argument appears to not be used => 0
2138 */
2139 }
2140} 2170}
2141 2171
2142 2172
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 141ad74010c..51291fbc420 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -463,9 +463,10 @@ struct lbuf {
463 463
464 s64 l_blkno; /* 8: log page block number */ 464 s64 l_blkno; /* 8: log page block number */
465 caddr_t l_ldata; /* 4: data page */ 465 caddr_t l_ldata; /* 4: data page */
466 struct page *l_page; /* The page itself */
467 uint l_offset; /* Offset of l_ldata within the page */
466 468
467 wait_queue_head_t l_ioevent; /* 4: i/o done event */ 469 wait_queue_head_t l_ioevent; /* 4: i/o done event */
468 struct page *l_page; /* The page itself */
469}; 470};
470 471
471/* Reuse l_freelist for redrive list */ 472/* Reuse l_freelist for redrive list */
@@ -489,8 +490,9 @@ struct logsyncblk {
489 */ 490 */
490 491
491#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock) 492#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
492#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock) 493#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
493#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock) 494#define LOGSYNC_UNLOCK(log, flags) \
495 spin_unlock_irqrestore(&(log)->synclock, flags)
494 496
495/* compute the difference in bytes of lsn from sync point */ 497/* compute the difference in bytes of lsn from sync point */
496#define logdiff(diff, lsn, log)\ 498#define logdiff(diff, lsn, log)\
@@ -506,5 +508,6 @@ extern int lmLogShutdown(struct jfs_log * log);
506extern int lmLogInit(struct jfs_log * log); 508extern int lmLogInit(struct jfs_log * log);
507extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize); 509extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
508extern void jfs_flush_journal(struct jfs_log * log, int wait); 510extern void jfs_flush_journal(struct jfs_log * log, int wait);
511extern void jfs_syncpt(struct jfs_log *log);
509 512
510#endif /* _H_JFS_LOGMGR */ 513#endif /* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 4c0a3ac75c0..41bf078dce0 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) International Business Machines Corp., 2000-2003 2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
@@ -18,10 +18,11 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/mm.h>
22#include <linux/bio.h>
21#include <linux/init.h> 23#include <linux/init.h>
22#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
23#include <linux/mempool.h> 25#include <linux/mempool.h>
24#include <linux/delay.h>
25#include "jfs_incore.h" 26#include "jfs_incore.h"
26#include "jfs_superblock.h" 27#include "jfs_superblock.h"
27#include "jfs_filsys.h" 28#include "jfs_filsys.h"
@@ -29,8 +30,6 @@
29#include "jfs_txnmgr.h" 30#include "jfs_txnmgr.h"
30#include "jfs_debug.h" 31#include "jfs_debug.h"
31 32
32static DEFINE_SPINLOCK(meta_lock);
33
34#ifdef CONFIG_JFS_STATISTICS 33#ifdef CONFIG_JFS_STATISTICS
35static struct { 34static struct {
36 uint pagealloc; /* # of page allocations */ 35 uint pagealloc; /* # of page allocations */
@@ -39,22 +38,8 @@ static struct {
39} mpStat; 38} mpStat;
40#endif 39#endif
41 40
42 41#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
43#define HASH_BITS 10 /* This makes hash_table 1 4K page */ 42#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
44#define HASH_SIZE (1 << HASH_BITS)
45static struct metapage **hash_table = NULL;
46static unsigned long hash_order;
47
48
49static inline int metapage_locked(struct metapage *mp)
50{
51 return test_bit(META_locked, &mp->flag);
52}
53
54static inline int trylock_metapage(struct metapage *mp)
55{
56 return test_and_set_bit(META_locked, &mp->flag);
57}
58 43
59static inline void unlock_metapage(struct metapage *mp) 44static inline void unlock_metapage(struct metapage *mp)
60{ 45{
@@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp)
62 wake_up(&mp->wait); 47 wake_up(&mp->wait);
63} 48}
64 49
65static void __lock_metapage(struct metapage *mp) 50static inline void __lock_metapage(struct metapage *mp)
66{ 51{
67 DECLARE_WAITQUEUE(wait, current); 52 DECLARE_WAITQUEUE(wait, current);
68
69 INCREMENT(mpStat.lockwait); 53 INCREMENT(mpStat.lockwait);
70
71 add_wait_queue_exclusive(&mp->wait, &wait); 54 add_wait_queue_exclusive(&mp->wait, &wait);
72 do { 55 do {
73 set_current_state(TASK_UNINTERRUPTIBLE); 56 set_current_state(TASK_UNINTERRUPTIBLE);
74 if (metapage_locked(mp)) { 57 if (metapage_locked(mp)) {
75 spin_unlock(&meta_lock); 58 unlock_page(mp->page);
76 schedule(); 59 schedule();
77 spin_lock(&meta_lock); 60 lock_page(mp->page);
78 } 61 }
79 } while (trylock_metapage(mp)); 62 } while (trylock_metapage(mp));
80 __set_current_state(TASK_RUNNING); 63 __set_current_state(TASK_RUNNING);
81 remove_wait_queue(&mp->wait, &wait); 64 remove_wait_queue(&mp->wait, &wait);
82} 65}
83 66
84/* needs meta_lock */ 67/*
68 * Must have mp->page locked
69 */
85static inline void lock_metapage(struct metapage *mp) 70static inline void lock_metapage(struct metapage *mp)
86{ 71{
87 if (trylock_metapage(mp)) 72 if (trylock_metapage(mp))
@@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp)
92static kmem_cache_t *metapage_cache; 77static kmem_cache_t *metapage_cache;
93static mempool_t *metapage_mempool; 78static mempool_t *metapage_mempool;
94 79
80#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
81
82#if MPS_PER_PAGE > 1
83
84struct meta_anchor {
85 int mp_count;
86 atomic_t io_count;
87 struct metapage *mp[MPS_PER_PAGE];
88};
89#define mp_anchor(page) ((struct meta_anchor *)page->private)
90
91static inline struct metapage *page_to_mp(struct page *page, uint offset)
92{
93 if (!PagePrivate(page))
94 return NULL;
95 return mp_anchor(page)->mp[offset >> L2PSIZE];
96}
97
98static inline int insert_metapage(struct page *page, struct metapage *mp)
99{
100 struct meta_anchor *a;
101 int index;
102 int l2mp_blocks; /* log2 blocks per metapage */
103
104 if (PagePrivate(page))
105 a = mp_anchor(page);
106 else {
107 a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
108 if (!a)
109 return -ENOMEM;
110 memset(a, 0, sizeof(struct meta_anchor));
111 page->private = (unsigned long)a;
112 SetPagePrivate(page);
113 kmap(page);
114 }
115
116 if (mp) {
117 l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
118 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
119 a->mp_count++;
120 a->mp[index] = mp;
121 }
122
123 return 0;
124}
125
126static inline void remove_metapage(struct page *page, struct metapage *mp)
127{
128 struct meta_anchor *a = mp_anchor(page);
129 int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
130 int index;
131
132 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
133
134 BUG_ON(a->mp[index] != mp);
135
136 a->mp[index] = NULL;
137 if (--a->mp_count == 0) {
138 kfree(a);
139 page->private = 0;
140 ClearPagePrivate(page);
141 kunmap(page);
142 }
143}
144
145static inline void inc_io(struct page *page)
146{
147 atomic_inc(&mp_anchor(page)->io_count);
148}
149
150static inline void dec_io(struct page *page, void (*handler) (struct page *))
151{
152 if (atomic_dec_and_test(&mp_anchor(page)->io_count))
153 handler(page);
154}
155
156#else
157static inline struct metapage *page_to_mp(struct page *page, uint offset)
158{
159 return PagePrivate(page) ? (struct metapage *)page->private : NULL;
160}
161
162static inline int insert_metapage(struct page *page, struct metapage *mp)
163{
164 if (mp) {
165 page->private = (unsigned long)mp;
166 SetPagePrivate(page);
167 kmap(page);
168 }
169 return 0;
170}
171
172static inline void remove_metapage(struct page *page, struct metapage *mp)
173{
174 page->private = 0;
175 ClearPagePrivate(page);
176 kunmap(page);
177}
178
179#define inc_io(page) do {} while(0)
180#define dec_io(page, handler) handler(page)
181
182#endif
183
95static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 184static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
96{ 185{
97 struct metapage *mp = (struct metapage *)foo; 186 struct metapage *mp = (struct metapage *)foo;
@@ -139,16 +228,6 @@ int __init metapage_init(void)
139 kmem_cache_destroy(metapage_cache); 228 kmem_cache_destroy(metapage_cache);
140 return -ENOMEM; 229 return -ENOMEM;
141 } 230 }
142 /*
143 * Now the hash list
144 */
145 for (hash_order = 0;
146 ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
147 hash_order++);
148 hash_table =
149 (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
150 assert(hash_table);
151 memset(hash_table, 0, PAGE_SIZE << hash_order);
152 231
153 return 0; 232 return 0;
154} 233}
@@ -159,73 +238,388 @@ void metapage_exit(void)
159 kmem_cache_destroy(metapage_cache); 238 kmem_cache_destroy(metapage_cache);
160} 239}
161 240
241static inline void drop_metapage(struct page *page, struct metapage *mp)
242{
243 if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
244 test_bit(META_io, &mp->flag))
245 return;
246 remove_metapage(page, mp);
247 INCREMENT(mpStat.pagefree);
248 free_metapage(mp);
249}
250
162/* 251/*
163 * Basically same hash as in pagemap.h, but using our hash table 252 * Metapage address space operations
164 */ 253 */
165static struct metapage **meta_hash(struct address_space *mapping, 254
166 unsigned long index) 255static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
256 unsigned int *len)
167{ 257{
168#define i (((unsigned long)mapping)/ \ 258 int rc = 0;
169 (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) 259 int xflag;
170#define s(x) ((x) + ((x) >> HASH_BITS)) 260 s64 xaddr;
171 return hash_table + (s(i + index) & (HASH_SIZE - 1)); 261 sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
172#undef i 262 inode->i_blkbits;
173#undef s 263
264 if (lblock >= file_blocks)
265 return 0;
266 if (lblock + *len > file_blocks)
267 *len = file_blocks - lblock;
268
269 if (inode->i_ino) {
270 rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
271 if ((rc == 0) && *len)
272 lblock = (sector_t)xaddr;
273 else
274 lblock = 0;
275 } /* else no mapping */
276
277 return lblock;
174} 278}
175 279
176static struct metapage *search_hash(struct metapage ** hash_ptr, 280static void last_read_complete(struct page *page)
177 struct address_space *mapping,
178 unsigned long index)
179{ 281{
180 struct metapage *ptr; 282 if (!PageError(page))
283 SetPageUptodate(page);
284 unlock_page(page);
285}
286
287static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
288 int err)
289{
290 struct page *page = bio->bi_private;
291
292 if (bio->bi_size)
293 return 1;
181 294
182 for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { 295 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
183 if ((ptr->mapping == mapping) && (ptr->index == index)) 296 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
184 return ptr; 297 SetPageError(page);
185 } 298 }
186 299
187 return NULL; 300 dec_io(page, last_read_complete);
301 bio_put(bio);
302
303 return 0;
188} 304}
189 305
190static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) 306static void remove_from_logsync(struct metapage *mp)
191{ 307{
192 if (*hash_ptr) 308 struct jfs_log *log = mp->log;
193 (*hash_ptr)->hash_prev = mp; 309 unsigned long flags;
310/*
311 * This can race. Recheck that log hasn't been set to null, and after
312 * acquiring logsync lock, recheck lsn
313 */
314 if (!log)
315 return;
316
317 LOGSYNC_LOCK(log, flags);
318 if (mp->lsn) {
319 mp->log = NULL;
320 mp->lsn = 0;
321 mp->clsn = 0;
322 log->count--;
323 list_del(&mp->synclist);
324 }
325 LOGSYNC_UNLOCK(log, flags);
326}
194 327
195 mp->hash_prev = NULL; 328static void last_write_complete(struct page *page)
196 mp->hash_next = *hash_ptr; 329{
197 *hash_ptr = mp; 330 struct metapage *mp;
331 unsigned int offset;
332
333 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
334 mp = page_to_mp(page, offset);
335 if (mp && test_bit(META_io, &mp->flag)) {
336 if (mp->lsn)
337 remove_from_logsync(mp);
338 clear_bit(META_io, &mp->flag);
339 }
340 /*
341 * I'd like to call drop_metapage here, but I don't think it's
342 * safe unless I have the page locked
343 */
344 }
345 end_page_writeback(page);
198} 346}
199 347
200static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) 348static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
349 int err)
201{ 350{
202 if (mp->hash_prev) 351 struct page *page = bio->bi_private;
203 mp->hash_prev->hash_next = mp->hash_next; 352
204 else { 353 BUG_ON(!PagePrivate(page));
205 assert(*hash_ptr == mp); 354
206 *hash_ptr = mp->hash_next; 355 if (bio->bi_size)
356 return 1;
357
358 if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
359 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
360 SetPageError(page);
361 }
362 dec_io(page, last_write_complete);
363 bio_put(bio);
364 return 0;
365}
366
367static int metapage_writepage(struct page *page, struct writeback_control *wbc)
368{
369 struct bio *bio = NULL;
370 unsigned int block_offset; /* block offset of mp within page */
371 struct inode *inode = page->mapping->host;
372 unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
373 unsigned int len;
374 unsigned int xlen;
375 struct metapage *mp;
376 int redirty = 0;
377 sector_t lblock;
378 sector_t pblock;
379 sector_t next_block = 0;
380 sector_t page_start;
381 unsigned long bio_bytes = 0;
382 unsigned long bio_offset = 0;
383 unsigned int offset;
384
385 page_start = (sector_t)page->index <<
386 (PAGE_CACHE_SHIFT - inode->i_blkbits);
387 BUG_ON(!PageLocked(page));
388 BUG_ON(PageWriteback(page));
389
390 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
391 mp = page_to_mp(page, offset);
392
393 if (!mp || !test_bit(META_dirty, &mp->flag))
394 continue;
395
396 if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
397 redirty = 1;
398 continue;
399 }
400
401 clear_bit(META_dirty, &mp->flag);
402 block_offset = offset >> inode->i_blkbits;
403 lblock = page_start + block_offset;
404 if (bio) {
405 if (xlen && lblock == next_block) {
406 /* Contiguous, in memory & on disk */
407 len = min(xlen, blocks_per_mp);
408 xlen -= len;
409 bio_bytes += len << inode->i_blkbits;
410 set_bit(META_io, &mp->flag);
411 continue;
412 }
413 /* Not contiguous */
414 if (bio_add_page(bio, page, bio_bytes, bio_offset) <
415 bio_bytes)
416 goto add_failed;
417 /*
418 * Increment counter before submitting i/o to keep
419 * count from hitting zero before we're through
420 */
421 inc_io(page);
422 if (!bio->bi_size)
423 goto dump_bio;
424 submit_bio(WRITE, bio);
425 bio = NULL;
426 } else {
427 set_page_writeback(page);
428 inc_io(page);
429 }
430 xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
431 pblock = metapage_get_blocks(inode, lblock, &xlen);
432 if (!pblock) {
433 /* Need better error handling */
434 printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
435 dec_io(page, last_write_complete);
436 continue;
437 }
438 set_bit(META_io, &mp->flag);
439 len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
440
441 bio = bio_alloc(GFP_NOFS, 1);
442 bio->bi_bdev = inode->i_sb->s_bdev;
443 bio->bi_sector = pblock << (inode->i_blkbits - 9);
444 bio->bi_end_io = metapage_write_end_io;
445 bio->bi_private = page;
446
447 /* Don't call bio_add_page yet, we may add to this vec */
448 bio_offset = offset;
449 bio_bytes = len << inode->i_blkbits;
450
451 xlen -= len;
452 next_block = lblock + len;
453 }
454 if (bio) {
455 if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
456 goto add_failed;
457 if (!bio->bi_size)
458 goto dump_bio;
459
460 submit_bio(WRITE, bio);
461 }
462 if (redirty)
463 redirty_page_for_writepage(wbc, page);
464
465 unlock_page(page);
466
467 return 0;
468add_failed:
469 /* We should never reach here, since we're only adding one vec */
470 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
471 goto skip;
472dump_bio:
473 dump_mem("bio", bio, sizeof(*bio));
474skip:
475 bio_put(bio);
476 unlock_page(page);
477 dec_io(page, last_write_complete);
478
479 return -EIO;
480}
481
482static int metapage_readpage(struct file *fp, struct page *page)
483{
484 struct inode *inode = page->mapping->host;
485 struct bio *bio = NULL;
486 unsigned int block_offset;
487 unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
488 sector_t page_start; /* address of page in fs blocks */
489 sector_t pblock;
490 unsigned int xlen;
491 unsigned int len;
492 unsigned int offset;
493
494 BUG_ON(!PageLocked(page));
495 page_start = (sector_t)page->index <<
496 (PAGE_CACHE_SHIFT - inode->i_blkbits);
497
498 block_offset = 0;
499 while (block_offset < blocks_per_page) {
500 xlen = blocks_per_page - block_offset;
501 pblock = metapage_get_blocks(inode, page_start + block_offset,
502 &xlen);
503 if (pblock) {
504 if (!PagePrivate(page))
505 insert_metapage(page, NULL);
506 inc_io(page);
507 if (bio)
508 submit_bio(READ, bio);
509
510 bio = bio_alloc(GFP_NOFS, 1);
511 bio->bi_bdev = inode->i_sb->s_bdev;
512 bio->bi_sector = pblock << (inode->i_blkbits - 9);
513 bio->bi_end_io = metapage_read_end_io;
514 bio->bi_private = page;
515 len = xlen << inode->i_blkbits;
516 offset = block_offset << inode->i_blkbits;
517 if (bio_add_page(bio, page, len, offset) < len)
518 goto add_failed;
519 block_offset += xlen;
520 } else
521 block_offset++;
207 } 522 }
523 if (bio)
524 submit_bio(READ, bio);
525 else
526 unlock_page(page);
527
528 return 0;
208 529
209 if (mp->hash_next) 530add_failed:
210 mp->hash_next->hash_prev = mp->hash_prev; 531 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
532 bio_put(bio);
533 dec_io(page, last_read_complete);
534 return -EIO;
211} 535}
212 536
537static int metapage_releasepage(struct page *page, int gfp_mask)
538{
539 struct metapage *mp;
540 int busy = 0;
541 unsigned int offset;
542
543 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
544 mp = page_to_mp(page, offset);
545
546 if (!mp)
547 continue;
548
549 jfs_info("metapage_releasepage: mp = 0x%p", mp);
550 if (mp->count || mp->nohomeok) {
551 jfs_info("count = %ld, nohomeok = %d", mp->count,
552 mp->nohomeok);
553 busy = 1;
554 continue;
555 }
556 wait_on_page_writeback(page);
557 //WARN_ON(test_bit(META_dirty, &mp->flag));
558 if (test_bit(META_dirty, &mp->flag)) {
559 dump_mem("dirty mp in metapage_releasepage", mp,
560 sizeof(struct metapage));
561 dump_mem("page", page, sizeof(struct page));
562 dump_stack();
563 }
564 WARN_ON(mp->lsn);
565 if (mp->lsn)
566 remove_from_logsync(mp);
567 remove_metapage(page, mp);
568 INCREMENT(mpStat.pagefree);
569 free_metapage(mp);
570 }
571 if (busy)
572 return -1;
573
574 return 0;
575}
576
577static int metapage_invalidatepage(struct page *page, unsigned long offset)
578{
579 BUG_ON(offset);
580
581 if (PageWriteback(page))
582 return 0;
583
584 return metapage_releasepage(page, 0);
585}
586
587struct address_space_operations jfs_metapage_aops = {
588 .readpage = metapage_readpage,
589 .writepage = metapage_writepage,
590 .sync_page = block_sync_page,
591 .releasepage = metapage_releasepage,
592 .invalidatepage = metapage_invalidatepage,
593 .set_page_dirty = __set_page_dirty_nobuffers,
594};
595
213struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, 596struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
214 unsigned int size, int absolute, 597 unsigned int size, int absolute,
215 unsigned long new) 598 unsigned long new)
216{ 599{
217 struct metapage **hash_ptr;
218 int l2BlocksPerPage; 600 int l2BlocksPerPage;
219 int l2bsize; 601 int l2bsize;
220 struct address_space *mapping; 602 struct address_space *mapping;
221 struct metapage *mp; 603 struct metapage *mp = NULL;
604 struct page *page;
222 unsigned long page_index; 605 unsigned long page_index;
223 unsigned long page_offset; 606 unsigned long page_offset;
224 607
225 jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); 608 jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
226 609 inode->i_ino, lblock, absolute);
610
611 l2bsize = inode->i_blkbits;
612 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
613 page_index = lblock >> l2BlocksPerPage;
614 page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
615 if ((page_offset + size) > PAGE_CACHE_SIZE) {
616 jfs_err("MetaData crosses page boundary!!");
617 jfs_err("lblock = %lx, size = %d", lblock, size);
618 dump_stack();
619 return NULL;
620 }
227 if (absolute) 621 if (absolute)
228 mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; 622 mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
229 else { 623 else {
230 /* 624 /*
231 * If an nfs client tries to read an inode that is larger 625 * If an nfs client tries to read an inode that is larger
@@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
237 mapping = inode->i_mapping; 631 mapping = inode->i_mapping;
238 } 632 }
239 633
240 hash_ptr = meta_hash(mapping, lblock); 634 if (new && (PSIZE == PAGE_CACHE_SIZE)) {
241again: 635 page = grab_cache_page(mapping, page_index);
242 spin_lock(&meta_lock); 636 if (!page) {
243 mp = search_hash(hash_ptr, mapping, lblock); 637 jfs_err("grab_cache_page failed!");
638 return NULL;
639 }
640 SetPageUptodate(page);
641 } else {
642 page = read_cache_page(mapping, page_index,
643 (filler_t *)mapping->a_ops->readpage, NULL);
644 if (IS_ERR(page)) {
645 jfs_err("read_cache_page failed!");
646 return NULL;
647 }
648 lock_page(page);
649 }
650
651 mp = page_to_mp(page, page_offset);
244 if (mp) { 652 if (mp) {
245 page_found: 653 if (mp->logical_size != size) {
246 if (test_bit(META_stale, &mp->flag)) { 654 jfs_error(inode->i_sb,
247 spin_unlock(&meta_lock); 655 "__get_metapage: mp->logical_size != size");
248 msleep(1); 656 jfs_err("logical_size = %d, size = %d",
249 goto again; 657 mp->logical_size, size);
658 dump_stack();
659 goto unlock;
250 } 660 }
251 mp->count++; 661 mp->count++;
252 lock_metapage(mp); 662 lock_metapage(mp);
253 spin_unlock(&meta_lock);
254 if (test_bit(META_discard, &mp->flag)) { 663 if (test_bit(META_discard, &mp->flag)) {
255 if (!new) { 664 if (!new) {
256 jfs_error(inode->i_sb, 665 jfs_error(inode->i_sb,
257 "__get_metapage: using a " 666 "__get_metapage: using a "
258 "discarded metapage"); 667 "discarded metapage");
259 release_metapage(mp); 668 discard_metapage(mp);
260 return NULL; 669 goto unlock;
261 } 670 }
262 clear_bit(META_discard, &mp->flag); 671 clear_bit(META_discard, &mp->flag);
263 } 672 }
264 jfs_info("__get_metapage: found 0x%p, in hash", mp);
265 if (mp->logical_size != size) {
266 jfs_error(inode->i_sb,
267 "__get_metapage: mp->logical_size != size");
268 release_metapage(mp);
269 return NULL;
270 }
271 } else { 673 } else {
272 l2bsize = inode->i_blkbits; 674 INCREMENT(mpStat.pagealloc);
273 l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; 675 mp = alloc_metapage(GFP_NOFS);
274 page_index = lblock >> l2BlocksPerPage; 676 mp->page = page;
275 page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
276 l2bsize;
277 if ((page_offset + size) > PAGE_CACHE_SIZE) {
278 spin_unlock(&meta_lock);
279 jfs_err("MetaData crosses page boundary!!");
280 return NULL;
281 }
282
283 /*
284 * Locks held on aggregate inode pages are usually
285 * not held long, and they are taken in critical code
286 * paths (committing dirty inodes, txCommit thread)
287 *
288 * Attempt to get metapage without blocking, tapping into
289 * reserves if necessary.
290 */
291 mp = NULL;
292 if (JFS_IP(inode)->fileset == AGGREGATE_I) {
293 mp = alloc_metapage(GFP_ATOMIC);
294 if (!mp) {
295 /*
296 * mempool is supposed to protect us from
297 * failing here. We will try a blocking
298 * call, but a deadlock is possible here
299 */
300 printk(KERN_WARNING
301 "__get_metapage: atomic call to mempool_alloc failed.\n");
302 printk(KERN_WARNING
303 "Will attempt blocking call\n");
304 }
305 }
306 if (!mp) {
307 struct metapage *mp2;
308
309 spin_unlock(&meta_lock);
310 mp = alloc_metapage(GFP_NOFS);
311 spin_lock(&meta_lock);
312
313 /* we dropped the meta_lock, we need to search the
314 * hash again.
315 */
316 mp2 = search_hash(hash_ptr, mapping, lblock);
317 if (mp2) {
318 free_metapage(mp);
319 mp = mp2;
320 goto page_found;
321 }
322 }
323 mp->flag = 0; 677 mp->flag = 0;
324 lock_metapage(mp);
325 if (absolute)
326 set_bit(META_absolute, &mp->flag);
327 mp->xflag = COMMIT_PAGE; 678 mp->xflag = COMMIT_PAGE;
328 mp->count = 1; 679 mp->count = 1;
329 atomic_set(&mp->nohomeok,0); 680 mp->nohomeok = 0;
330 mp->mapping = mapping;
331 mp->index = lblock;
332 mp->page = NULL;
333 mp->logical_size = size; 681 mp->logical_size = size;
334 add_to_hash(mp, hash_ptr); 682 mp->data = page_address(page) + page_offset;
335 spin_unlock(&meta_lock); 683 mp->index = lblock;
336 684 if (unlikely(insert_metapage(page, mp))) {
337 if (new) { 685 free_metapage(mp);
338 jfs_info("__get_metapage: Calling grab_cache_page"); 686 goto unlock;
339 mp->page = grab_cache_page(mapping, page_index);
340 if (!mp->page) {
341 jfs_err("grab_cache_page failed!");
342 goto freeit;
343 } else {
344 INCREMENT(mpStat.pagealloc);
345 unlock_page(mp->page);
346 }
347 } else {
348 jfs_info("__get_metapage: Calling read_cache_page");
349 mp->page = read_cache_page(mapping, lblock,
350 (filler_t *)mapping->a_ops->readpage, NULL);
351 if (IS_ERR(mp->page)) {
352 jfs_err("read_cache_page failed!");
353 goto freeit;
354 } else
355 INCREMENT(mpStat.pagealloc);
356 } 687 }
357 mp->data = kmap(mp->page) + page_offset; 688 lock_metapage(mp);
358 } 689 }
359 690
360 if (new) 691 if (new) {
692 jfs_info("zeroing mp = 0x%p", mp);
361 memset(mp->data, 0, PSIZE); 693 memset(mp->data, 0, PSIZE);
694 }
362 695
363 jfs_info("__get_metapage: returning = 0x%p", mp); 696 unlock_page(page);
697 jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
364 return mp; 698 return mp;
365 699
366freeit: 700unlock:
367 spin_lock(&meta_lock); 701 unlock_page(page);
368 remove_from_hash(mp, hash_ptr);
369 free_metapage(mp);
370 spin_unlock(&meta_lock);
371 return NULL; 702 return NULL;
372} 703}
373 704
374void hold_metapage(struct metapage * mp, int force) 705void grab_metapage(struct metapage * mp)
375{ 706{
376 spin_lock(&meta_lock); 707 jfs_info("grab_metapage: mp = 0x%p", mp);
377 708 page_cache_get(mp->page);
709 lock_page(mp->page);
378 mp->count++; 710 mp->count++;
379 711 lock_metapage(mp);
380 if (force) { 712 unlock_page(mp->page);
381 ASSERT (!(test_bit(META_forced, &mp->flag)));
382 if (trylock_metapage(mp))
383 set_bit(META_forced, &mp->flag);
384 } else
385 lock_metapage(mp);
386
387 spin_unlock(&meta_lock);
388} 713}
389 714
390static void __write_metapage(struct metapage * mp) 715void force_metapage(struct metapage *mp)
391{ 716{
392 int l2bsize = mp->mapping->host->i_blkbits; 717 struct page *page = mp->page;
393 int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; 718 jfs_info("force_metapage: mp = 0x%p", mp);
394 unsigned long page_index; 719 set_bit(META_forcewrite, &mp->flag);
395 unsigned long page_offset; 720 clear_bit(META_sync, &mp->flag);
396 int rc; 721 page_cache_get(page);
397 722 lock_page(page);
398 jfs_info("__write_metapage: mp = 0x%p", mp); 723 set_page_dirty(page);
399 724 write_one_page(page, 1);
400 page_index = mp->page->index; 725 clear_bit(META_forcewrite, &mp->flag);
401 page_offset = 726 page_cache_release(page);
402 (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; 727}
403 728
729extern void hold_metapage(struct metapage *mp)
730{
404 lock_page(mp->page); 731 lock_page(mp->page);
405 rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, 732}
406 page_offset + 733
407 mp->logical_size); 734extern void put_metapage(struct metapage *mp)
408 if (rc) { 735{
409 jfs_err("prepare_write return %d!", rc); 736 if (mp->count || mp->nohomeok) {
410 ClearPageUptodate(mp->page); 737 /* Someone else will release this */
411 unlock_page(mp->page); 738 unlock_page(mp->page);
412 clear_bit(META_dirty, &mp->flag);
413 return; 739 return;
414 } 740 }
415 rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, 741 page_cache_get(mp->page);
416 page_offset + 742 mp->count++;
417 mp->logical_size); 743 lock_metapage(mp);
418 if (rc) {
419 jfs_err("commit_write returned %d", rc);
420 }
421
422 unlock_page(mp->page); 744 unlock_page(mp->page);
423 clear_bit(META_dirty, &mp->flag); 745 release_metapage(mp);
424
425 jfs_info("__write_metapage done");
426}
427
428static inline void sync_metapage(struct metapage *mp)
429{
430 struct page *page = mp->page;
431
432 page_cache_get(page);
433 lock_page(page);
434
435 /* we're done with this page - no need to check for errors */
436 if (page_has_buffers(page))
437 write_one_page(page, 1);
438 else
439 unlock_page(page);
440 page_cache_release(page);
441} 746}
442 747
443void release_metapage(struct metapage * mp) 748void release_metapage(struct metapage * mp)
444{ 749{
445 struct jfs_log *log; 750 struct page *page = mp->page;
446
447 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); 751 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
448 752
449 spin_lock(&meta_lock); 753 BUG_ON(!page);
450 if (test_bit(META_forced, &mp->flag)) { 754
451 clear_bit(META_forced, &mp->flag); 755 lock_page(page);
452 mp->count--; 756 unlock_metapage(mp);
453 spin_unlock(&meta_lock);
454 return;
455 }
456 757
457 assert(mp->count); 758 assert(mp->count);
458 if (--mp->count || atomic_read(&mp->nohomeok)) { 759 if (--mp->count || mp->nohomeok) {
459 unlock_metapage(mp); 760 unlock_page(page);
460 spin_unlock(&meta_lock); 761 page_cache_release(page);
461 return; 762 return;
462 } 763 }
463 764
464 if (mp->page) { 765 if (test_bit(META_dirty, &mp->flag)) {
465 set_bit(META_stale, &mp->flag); 766 set_page_dirty(page);
466 spin_unlock(&meta_lock);
467 kunmap(mp->page);
468 mp->data = NULL;
469 if (test_bit(META_dirty, &mp->flag))
470 __write_metapage(mp);
471 if (test_bit(META_sync, &mp->flag)) { 767 if (test_bit(META_sync, &mp->flag)) {
472 sync_metapage(mp);
473 clear_bit(META_sync, &mp->flag); 768 clear_bit(META_sync, &mp->flag);
769 write_one_page(page, 1);
770 lock_page(page); /* write_one_page unlocks the page */
474 } 771 }
772 } else if (mp->lsn) /* discard_metapage doesn't remove it */
773 remove_from_logsync(mp);
475 774
476 if (test_bit(META_discard, &mp->flag)) { 775#if MPS_PER_PAGE == 1
477 lock_page(mp->page); 776 /*
478 block_invalidatepage(mp->page, 0); 777 * If we know this is the only thing in the page, we can throw
479 unlock_page(mp->page); 778 * the page out of the page cache. If pages are larger, we
480 } 779 * don't want to do this.
481 780 */
482 page_cache_release(mp->page);
483 mp->page = NULL;
484 INCREMENT(mpStat.pagefree);
485 spin_lock(&meta_lock);
486 }
487 781
488 if (mp->lsn) { 782 /* Retest mp->count since we may have released page lock */
489 /* 783 if (test_bit(META_discard, &mp->flag) && !mp->count) {
490 * Remove metapage from logsynclist. 784 clear_page_dirty(page);
491 */ 785 ClearPageUptodate(page);
492 log = mp->log; 786#ifdef _NOT_YET
493 LOGSYNC_LOCK(log); 787 if (page->mapping) {
494 mp->log = NULL; 788 /* Remove from page cache and page cache reference */
495 mp->lsn = 0; 789 remove_from_page_cache(page);
496 mp->clsn = 0; 790 page_cache_release(page);
497 log->count--; 791 metapage_releasepage(page, 0);
498 list_del(&mp->synclist); 792 }
499 LOGSYNC_UNLOCK(log); 793#endif
500 } 794 }
501 remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); 795#else
502 spin_unlock(&meta_lock); 796 /* Try to keep metapages from using up too much memory */
503 797 drop_metapage(page, mp);
504 free_metapage(mp); 798#endif
799 unlock_page(page);
800 page_cache_release(page);
505} 801}
506 802
507void __invalidate_metapages(struct inode *ip, s64 addr, int len) 803void __invalidate_metapages(struct inode *ip, s64 addr, int len)
508{ 804{
509 struct metapage **hash_ptr; 805 sector_t lblock;
510 unsigned long lblock;
511 int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; 806 int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
807 int BlocksPerPage = 1 << l2BlocksPerPage;
512 /* All callers are interested in block device's mapping */ 808 /* All callers are interested in block device's mapping */
513 struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; 809 struct address_space *mapping =
810 JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
514 struct metapage *mp; 811 struct metapage *mp;
515 struct page *page; 812 struct page *page;
813 unsigned int offset;
516 814
517 /* 815 /*
518 * First, mark metapages to discard. They will eventually be 816 * Mark metapages to discard. They will eventually be
519 * released, but should not be written. 817 * released, but should not be written.
520 */ 818 */
521 for (lblock = addr; lblock < addr + len; 819 for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
522 lblock += 1 << l2BlocksPerPage) { 820 lblock += BlocksPerPage) {
523 hash_ptr = meta_hash(mapping, lblock); 821 page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
524again: 822 if (!page)
525 spin_lock(&meta_lock); 823 continue;
526 mp = search_hash(hash_ptr, mapping, lblock); 824 for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
527 if (mp) { 825 mp = page_to_mp(page, offset);
528 if (test_bit(META_stale, &mp->flag)) { 826 if (!mp)
529 spin_unlock(&meta_lock); 827 continue;
530 msleep(1); 828 if (mp->index < addr)
531 goto again; 829 continue;
532 } 830 if (mp->index >= addr + len)
831 break;
533 832
534 clear_bit(META_dirty, &mp->flag); 833 clear_bit(META_dirty, &mp->flag);
535 set_bit(META_discard, &mp->flag); 834 set_bit(META_discard, &mp->flag);
536 spin_unlock(&meta_lock); 835 if (mp->lsn)
537 } else { 836 remove_from_logsync(mp);
538 spin_unlock(&meta_lock);
539 page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
540 if (page) {
541 block_invalidatepage(page, 0);
542 unlock_page(page);
543 page_cache_release(page);
544 }
545 } 837 }
838 unlock_page(page);
839 page_cache_release(page);
546 } 840 }
547} 841}
548 842
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 0e58aba58c3..991e9fb84c7 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -33,38 +33,27 @@ struct metapage {
33 unsigned long flag; /* See Below */ 33 unsigned long flag; /* See Below */
34 unsigned long count; /* Reference count */ 34 unsigned long count; /* Reference count */
35 void *data; /* Data pointer */ 35 void *data; /* Data pointer */
36 36 sector_t index; /* block address of page */
37 /* list management stuff */
38 struct metapage *hash_prev;
39 struct metapage *hash_next; /* Also used for free list */
40
41 /*
42 * mapping & index become redundant, but we need these here to
43 * add the metapage to the hash before we have the real page
44 */
45 struct address_space *mapping;
46 unsigned long index;
47 wait_queue_head_t wait; 37 wait_queue_head_t wait;
48 38
49 /* implementation */ 39 /* implementation */
50 struct page *page; 40 struct page *page;
51 unsigned long logical_size; 41 unsigned int logical_size;
52 42
53 /* Journal management */ 43 /* Journal management */
54 int clsn; 44 int clsn;
55 atomic_t nohomeok; 45 int nohomeok;
56 struct jfs_log *log; 46 struct jfs_log *log;
57}; 47};
58 48
59/* metapage flag */ 49/* metapage flag */
60#define META_locked 0 50#define META_locked 0
61#define META_absolute 1 51#define META_free 1
62#define META_free 2 52#define META_dirty 2
63#define META_dirty 3 53#define META_sync 3
64#define META_sync 4 54#define META_discard 4
65#define META_discard 5 55#define META_forcewrite 5
66#define META_forced 6 56#define META_io 6
67#define META_stale 7
68 57
69#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag) 58#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
70 59
@@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(struct inode *inode,
80 __get_metapage(inode, lblock, size, absolute, TRUE) 69 __get_metapage(inode, lblock, size, absolute, TRUE)
81 70
82extern void release_metapage(struct metapage *); 71extern void release_metapage(struct metapage *);
83extern void hold_metapage(struct metapage *, int); 72extern void grab_metapage(struct metapage *);
73extern void force_metapage(struct metapage *);
74
75/*
76 * hold_metapage and put_metapage are used in conjuction. The page lock
77 * is not dropped between the two, so no other threads can get or release
78 * the metapage
79 */
80extern void hold_metapage(struct metapage *);
81extern void put_metapage(struct metapage *);
84 82
85static inline void write_metapage(struct metapage *mp) 83static inline void write_metapage(struct metapage *mp)
86{ 84{
@@ -101,6 +99,46 @@ static inline void discard_metapage(struct metapage *mp)
101 release_metapage(mp); 99 release_metapage(mp);
102} 100}
103 101
102static inline void metapage_nohomeok(struct metapage *mp)
103{
104 struct page *page = mp->page;
105 lock_page(page);
106 if (!mp->nohomeok++) {
107 mark_metapage_dirty(mp);
108 page_cache_get(page);
109 wait_on_page_writeback(page);
110 }
111 unlock_page(page);
112}
113
114/*
115 * This serializes access to mp->lsn when metapages are added to logsynclist
116 * without setting nohomeok. i.e. updating imap & dmap
117 */
118static inline void metapage_wait_for_io(struct metapage *mp)
119{
120 if (test_bit(META_io, &mp->flag))
121 wait_on_page_writeback(mp->page);
122}
123
124/*
125 * This is called when already holding the metapage
126 */
127static inline void _metapage_homeok(struct metapage *mp)
128{
129 if (!--mp->nohomeok)
130 page_cache_release(mp->page);
131}
132
133static inline void metapage_homeok(struct metapage *mp)
134{
135 hold_metapage(mp);
136 _metapage_homeok(mp);
137 put_metapage(mp);
138}
139
140extern struct address_space_operations jfs_metapage_aops;
141
104/* 142/*
105 * This routines invalidate all pages for an extent. 143 * This routines invalidate all pages for an extent.
106 */ 144 */
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index c535ffd638e..032d111bc33 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, int remount)
285 */ 285 */
286 logMOUNT(sb); 286 logMOUNT(sb);
287 287
288 /*
289 * Set page cache allocation policy
290 */
291 mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
292
293 return rc; 288 return rc;
294} 289}
295 290
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f40301d93f7..e93d01aa12c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -227,6 +227,7 @@ static lid_t txLockAlloc(void)
227 227
228static void txLockFree(lid_t lid) 228static void txLockFree(lid_t lid)
229{ 229{
230 TxLock[lid].tid = 0;
230 TxLock[lid].next = TxAnchor.freelock; 231 TxLock[lid].next = TxAnchor.freelock;
231 TxAnchor.freelock = lid; 232 TxAnchor.freelock = lid;
232 TxAnchor.tlocksInUse--; 233 TxAnchor.tlocksInUse--;
@@ -566,9 +567,6 @@ void txEnd(tid_t tid)
566 * synchronize with logsync barrier 567 * synchronize with logsync barrier
567 */ 568 */
568 if (test_bit(log_SYNCBARRIER, &log->flag)) { 569 if (test_bit(log_SYNCBARRIER, &log->flag)) {
569 /* forward log syncpt */
570 /* lmSync(log); */
571
572 jfs_info("log barrier off: 0x%x", log->lsn); 570 jfs_info("log barrier off: 0x%x", log->lsn);
573 571
574 /* enable new transactions start */ 572 /* enable new transactions start */
@@ -576,15 +574,22 @@ void txEnd(tid_t tid)
576 574
577 /* wakeup all waitors for logsync barrier */ 575 /* wakeup all waitors for logsync barrier */
578 TXN_WAKEUP(&log->syncwait); 576 TXN_WAKEUP(&log->syncwait);
577
578 TXN_UNLOCK();
579
580 /* forward log syncpt */
581 jfs_syncpt(log);
582
583 goto wakeup;
579 } 584 }
580 } 585 }
581 586
587 TXN_UNLOCK();
588wakeup:
582 /* 589 /*
583 * wakeup all waitors for a free tblock 590 * wakeup all waitors for a free tblock
584 */ 591 */
585 TXN_WAKEUP(&TxAnchor.freewait); 592 TXN_WAKEUP(&TxAnchor.freewait);
586
587 TXN_UNLOCK();
588} 593}
589 594
590 595
@@ -633,8 +638,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
633 638
634 /* is page locked by the requester transaction ? */ 639 /* is page locked by the requester transaction ? */
635 tlck = lid_to_tlock(lid); 640 tlck = lid_to_tlock(lid);
636 if ((xtid = tlck->tid) == tid) 641 if ((xtid = tlck->tid) == tid) {
642 TXN_UNLOCK();
637 goto grantLock; 643 goto grantLock;
644 }
638 645
639 /* 646 /*
640 * is page locked by anonymous transaction/lock ? 647 * is page locked by anonymous transaction/lock ?
@@ -649,6 +656,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
649 */ 656 */
650 if (xtid == 0) { 657 if (xtid == 0) {
651 tlck->tid = tid; 658 tlck->tid = tid;
659 TXN_UNLOCK();
652 tblk = tid_to_tblock(tid); 660 tblk = tid_to_tblock(tid);
653 /* 661 /*
654 * The order of the tlocks in the transaction is important 662 * The order of the tlocks in the transaction is important
@@ -706,17 +714,18 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
706 */ 714 */
707 tlck->tid = tid; 715 tlck->tid = tid;
708 716
717 TXN_UNLOCK();
718
709 /* mark tlock for meta-data page */ 719 /* mark tlock for meta-data page */
710 if (mp->xflag & COMMIT_PAGE) { 720 if (mp->xflag & COMMIT_PAGE) {
711 721
712 tlck->flag = tlckPAGELOCK; 722 tlck->flag = tlckPAGELOCK;
713 723
714 /* mark the page dirty and nohomeok */ 724 /* mark the page dirty and nohomeok */
715 mark_metapage_dirty(mp); 725 metapage_nohomeok(mp);
716 atomic_inc(&mp->nohomeok);
717 726
718 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", 727 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
719 mp, atomic_read(&mp->nohomeok), tid, tlck); 728 mp, mp->nohomeok, tid, tlck);
720 729
721 /* if anonymous transaction, and buffer is on the group 730 /* if anonymous transaction, and buffer is on the group
722 * commit synclist, mark inode to show this. This will 731 * commit synclist, mark inode to show this. This will
@@ -762,8 +771,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
762 if (tlck->next == 0) { 771 if (tlck->next == 0) {
763 /* This inode's first anonymous transaction */ 772 /* This inode's first anonymous transaction */
764 jfs_ip->atltail = lid; 773 jfs_ip->atltail = lid;
774 TXN_LOCK();
765 list_add_tail(&jfs_ip->anon_inode_list, 775 list_add_tail(&jfs_ip->anon_inode_list,
766 &TxAnchor.anon_list); 776 &TxAnchor.anon_list);
777 TXN_UNLOCK();
767 } 778 }
768 } 779 }
769 780
@@ -821,8 +832,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
821 grantLock: 832 grantLock:
822 tlck->type |= type; 833 tlck->type |= type;
823 834
824 TXN_UNLOCK();
825
826 return tlck; 835 return tlck;
827 836
828 /* 837 /*
@@ -841,11 +850,19 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
841 BUG(); 850 BUG();
842 } 851 }
843 INCREMENT(stattx.waitlock); /* statistics */ 852 INCREMENT(stattx.waitlock); /* statistics */
853 TXN_UNLOCK();
844 release_metapage(mp); 854 release_metapage(mp);
855 TXN_LOCK();
856 xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */
845 857
846 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 858 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
847 tid, xtid, lid); 859 tid, xtid, lid);
848 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); 860
861 /* Recheck everything since dropping TXN_LOCK */
862 if (xtid && (tlck->mp == mp) && (mp->lid == lid))
863 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
864 else
865 TXN_UNLOCK();
849 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); 866 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid);
850 867
851 return NULL; 868 return NULL;
@@ -906,6 +923,7 @@ static void txUnlock(struct tblock * tblk)
906 struct metapage *mp; 923 struct metapage *mp;
907 struct jfs_log *log; 924 struct jfs_log *log;
908 int difft, diffp; 925 int difft, diffp;
926 unsigned long flags;
909 927
910 jfs_info("txUnlock: tblk = 0x%p", tblk); 928 jfs_info("txUnlock: tblk = 0x%p", tblk);
911 log = JFS_SBI(tblk->sb)->log; 929 log = JFS_SBI(tblk->sb)->log;
@@ -925,19 +943,14 @@ static void txUnlock(struct tblock * tblk)
925 assert(mp->xflag & COMMIT_PAGE); 943 assert(mp->xflag & COMMIT_PAGE);
926 944
927 /* hold buffer 945 /* hold buffer
928 *
929 * It's possible that someone else has the metapage.
930 * The only things were changing are nohomeok, which
931 * is handled atomically, and clsn which is protected
932 * by the LOGSYNC_LOCK.
933 */ 946 */
934 hold_metapage(mp, 1); 947 hold_metapage(mp);
935 948
936 assert(atomic_read(&mp->nohomeok) > 0); 949 assert(mp->nohomeok > 0);
937 atomic_dec(&mp->nohomeok); 950 _metapage_homeok(mp);
938 951
939 /* inherit younger/larger clsn */ 952 /* inherit younger/larger clsn */
940 LOGSYNC_LOCK(log); 953 LOGSYNC_LOCK(log, flags);
941 if (mp->clsn) { 954 if (mp->clsn) {
942 logdiff(difft, tblk->clsn, log); 955 logdiff(difft, tblk->clsn, log);
943 logdiff(diffp, mp->clsn, log); 956 logdiff(diffp, mp->clsn, log);
@@ -945,16 +958,11 @@ static void txUnlock(struct tblock * tblk)
945 mp->clsn = tblk->clsn; 958 mp->clsn = tblk->clsn;
946 } else 959 } else
947 mp->clsn = tblk->clsn; 960 mp->clsn = tblk->clsn;
948 LOGSYNC_UNLOCK(log); 961 LOGSYNC_UNLOCK(log, flags);
949 962
950 assert(!(tlck->flag & tlckFREEPAGE)); 963 assert(!(tlck->flag & tlckFREEPAGE));
951 964
952 if (tlck->flag & tlckWRITEPAGE) { 965 put_metapage(mp);
953 write_metapage(mp);
954 } else {
955 /* release page which has been forced */
956 release_metapage(mp);
957 }
958 } 966 }
959 967
960 /* insert tlock, and linelock(s) of the tlock if any, 968 /* insert tlock, and linelock(s) of the tlock if any,
@@ -981,10 +989,10 @@ static void txUnlock(struct tblock * tblk)
981 * has been inserted in logsync list at txUpdateMap()) 989 * has been inserted in logsync list at txUpdateMap())
982 */ 990 */
983 if (tblk->lsn) { 991 if (tblk->lsn) {
984 LOGSYNC_LOCK(log); 992 LOGSYNC_LOCK(log, flags);
985 log->count--; 993 log->count--;
986 list_del(&tblk->synclist); 994 list_del(&tblk->synclist);
987 LOGSYNC_UNLOCK(log); 995 LOGSYNC_UNLOCK(log, flags);
988 } 996 }
989} 997}
990 998
@@ -1573,8 +1581,8 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1573 * the last entry, so don't bother logging this 1581 * the last entry, so don't bother logging this
1574 */ 1582 */
1575 mp->lid = 0; 1583 mp->lid = 0;
1576 hold_metapage(mp, 0); 1584 grab_metapage(mp);
1577 atomic_dec(&mp->nohomeok); 1585 metapage_homeok(mp);
1578 discard_metapage(mp); 1586 discard_metapage(mp);
1579 tlck->mp = NULL; 1587 tlck->mp = NULL;
1580 return 0; 1588 return 0;
@@ -1712,7 +1720,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1712 struct maplock *maplock; 1720 struct maplock *maplock;
1713 struct xdlistlock *xadlock; 1721 struct xdlistlock *xadlock;
1714 struct pxd_lock *pxdlock; 1722 struct pxd_lock *pxdlock;
1715 pxd_t *pxd; 1723 pxd_t *page_pxd;
1716 int next, lwm, hwm; 1724 int next, lwm, hwm;
1717 1725
1718 ip = tlck->ip; 1726 ip = tlck->ip;
@@ -1722,7 +1730,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1722 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); 1730 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
1723 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); 1731 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
1724 1732
1725 pxd = &lrd->log.redopage.pxd; 1733 page_pxd = &lrd->log.redopage.pxd;
1726 1734
1727 if (tlck->type & tlckBTROOT) { 1735 if (tlck->type & tlckBTROOT) {
1728 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1736 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
@@ -1752,9 +1760,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1752 * applying the after-image to the meta-data page. 1760 * applying the after-image to the meta-data page.
1753 */ 1761 */
1754 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1762 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1755// *pxd = mp->cm_pxd; 1763// *page_pxd = mp->cm_pxd;
1756 PXDaddress(pxd, mp->index); 1764 PXDaddress(page_pxd, mp->index);
1757 PXDlength(pxd, 1765 PXDlength(page_pxd,
1758 mp->logical_size >> tblk->sb->s_blocksize_bits); 1766 mp->logical_size >> tblk->sb->s_blocksize_bits);
1759 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1767 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1760 1768
@@ -1776,25 +1784,31 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1776 tlck->flag |= tlckUPDATEMAP; 1784 tlck->flag |= tlckUPDATEMAP;
1777 xadlock->flag = mlckALLOCXADLIST; 1785 xadlock->flag = mlckALLOCXADLIST;
1778 xadlock->count = next - lwm; 1786 xadlock->count = next - lwm;
1779 if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { 1787 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1780 int i; 1788 int i;
1789 pxd_t *pxd;
1781 /* 1790 /*
1782 * Lazy commit may allow xtree to be modified before 1791 * Lazy commit may allow xtree to be modified before
1783 * txUpdateMap runs. Copy xad into linelock to 1792 * txUpdateMap runs. Copy xad into linelock to
1784 * preserve correct data. 1793 * preserve correct data.
1794 *
1795 * We can fit twice as may pxd's as xads in the lock
1785 */ 1796 */
1786 xadlock->xdlist = &xtlck->pxdlock; 1797 xadlock->flag = mlckALLOCPXDLIST;
1787 memcpy(xadlock->xdlist, &p->xad[lwm], 1798 pxd = xadlock->xdlist = &xtlck->pxdlock;
1788 sizeof(xad_t) * xadlock->count); 1799 for (i = 0; i < xadlock->count; i++) {
1789 1800 PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
1790 for (i = 0; i < xadlock->count; i++) 1801 PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
1791 p->xad[lwm + i].flag &= 1802 p->xad[lwm + i].flag &=
1792 ~(XAD_NEW | XAD_EXTENDED); 1803 ~(XAD_NEW | XAD_EXTENDED);
1804 pxd++;
1805 }
1793 } else { 1806 } else {
1794 /* 1807 /*
1795 * xdlist will point to into inode's xtree, ensure 1808 * xdlist will point to into inode's xtree, ensure
1796 * that transaction is not committed lazily. 1809 * that transaction is not committed lazily.
1797 */ 1810 */
1811 xadlock->flag = mlckALLOCXADLIST;
1798 xadlock->xdlist = &p->xad[lwm]; 1812 xadlock->xdlist = &p->xad[lwm];
1799 tblk->xflag &= ~COMMIT_LAZY; 1813 tblk->xflag &= ~COMMIT_LAZY;
1800 } 1814 }
@@ -1836,8 +1850,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1836 if (tblk->xflag & COMMIT_TRUNCATE) { 1850 if (tblk->xflag & COMMIT_TRUNCATE) {
1837 /* write NOREDOPAGE for the page */ 1851 /* write NOREDOPAGE for the page */
1838 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1852 lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
1839 PXDaddress(pxd, mp->index); 1853 PXDaddress(page_pxd, mp->index);
1840 PXDlength(pxd, 1854 PXDlength(page_pxd,
1841 mp->logical_size >> tblk->sb-> 1855 mp->logical_size >> tblk->sb->
1842 s_blocksize_bits); 1856 s_blocksize_bits);
1843 lrd->backchain = 1857 lrd->backchain =
@@ -1872,22 +1886,32 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1872 * deleted page itself; 1886 * deleted page itself;
1873 */ 1887 */
1874 tlck->flag |= tlckUPDATEMAP; 1888 tlck->flag |= tlckUPDATEMAP;
1875 xadlock->flag = mlckFREEXADLIST;
1876 xadlock->count = hwm - XTENTRYSTART + 1; 1889 xadlock->count = hwm - XTENTRYSTART + 1;
1877 if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { 1890 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
1891 int i;
1892 pxd_t *pxd;
1878 /* 1893 /*
1879 * Lazy commit may allow xtree to be modified before 1894 * Lazy commit may allow xtree to be modified before
1880 * txUpdateMap runs. Copy xad into linelock to 1895 * txUpdateMap runs. Copy xad into linelock to
1881 * preserve correct data. 1896 * preserve correct data.
1897 *
1898 * We can fit twice as may pxd's as xads in the lock
1882 */ 1899 */
1883 xadlock->xdlist = &xtlck->pxdlock; 1900 xadlock->flag = mlckFREEPXDLIST;
1884 memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART], 1901 pxd = xadlock->xdlist = &xtlck->pxdlock;
1885 sizeof(xad_t) * xadlock->count); 1902 for (i = 0; i < xadlock->count; i++) {
1903 PXDaddress(pxd,
1904 addressXAD(&p->xad[XTENTRYSTART + i]));
1905 PXDlength(pxd,
1906 lengthXAD(&p->xad[XTENTRYSTART + i]));
1907 pxd++;
1908 }
1886 } else { 1909 } else {
1887 /* 1910 /*
1888 * xdlist will point to into inode's xtree, ensure 1911 * xdlist will point to into inode's xtree, ensure
1889 * that transaction is not committed lazily. 1912 * that transaction is not committed lazily.
1890 */ 1913 */
1914 xadlock->flag = mlckFREEXADLIST;
1891 xadlock->xdlist = &p->xad[XTENTRYSTART]; 1915 xadlock->xdlist = &p->xad[XTENTRYSTART];
1892 tblk->xflag &= ~COMMIT_LAZY; 1916 tblk->xflag &= ~COMMIT_LAZY;
1893 } 1917 }
@@ -1918,7 +1942,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1918 * header ? 1942 * header ?
1919 */ 1943 */
1920 if (tlck->type & tlckTRUNCATE) { 1944 if (tlck->type & tlckTRUNCATE) {
1921 pxd_t tpxd; /* truncated extent of xad */ 1945 pxd_t pxd; /* truncated extent of xad */
1922 int twm; 1946 int twm;
1923 1947
1924 /* 1948 /*
@@ -1947,8 +1971,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1947 * applying the after-image to the meta-data page. 1971 * applying the after-image to the meta-data page.
1948 */ 1972 */
1949 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1973 lrd->type = cpu_to_le16(LOG_REDOPAGE);
1950 PXDaddress(pxd, mp->index); 1974 PXDaddress(page_pxd, mp->index);
1951 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); 1975 PXDlength(page_pxd,
1976 mp->logical_size >> tblk->sb->s_blocksize_bits);
1952 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1977 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
1953 1978
1954 /* 1979 /*
@@ -1966,7 +1991,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
1966 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 1991 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
1967 lrd->log.updatemap.nxd = cpu_to_le16(1); 1992 lrd->log.updatemap.nxd = cpu_to_le16(1);
1968 lrd->log.updatemap.pxd = pxdlock->pxd; 1993 lrd->log.updatemap.pxd = pxdlock->pxd;
1969 tpxd = pxdlock->pxd; /* save to format maplock */ 1994 pxd = pxdlock->pxd; /* save to format maplock */
1970 lrd->backchain = 1995 lrd->backchain =
1971 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1996 cpu_to_le32(lmLog(log, tblk, lrd, NULL));
1972 } 1997 }
@@ -2035,7 +2060,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
2035 pxdlock = (struct pxd_lock *) xadlock; 2060 pxdlock = (struct pxd_lock *) xadlock;
2036 pxdlock->flag = mlckFREEPXD; 2061 pxdlock->flag = mlckFREEPXD;
2037 pxdlock->count = 1; 2062 pxdlock->count = 1;
2038 pxdlock->pxd = tpxd; 2063 pxdlock->pxd = pxd;
2039 2064
2040 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " 2065 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
2041 "hwm:%d", ip, mp, pxdlock->count, hwm); 2066 "hwm:%d", ip, mp, pxdlock->count, hwm);
@@ -2253,7 +2278,8 @@ void txForce(struct tblock * tblk)
2253 tlck->flag &= ~tlckWRITEPAGE; 2278 tlck->flag &= ~tlckWRITEPAGE;
2254 2279
2255 /* do not release page to freelist */ 2280 /* do not release page to freelist */
2256 2281 force_metapage(mp);
2282#if 0
2257 /* 2283 /*
2258 * The "right" thing to do here is to 2284 * The "right" thing to do here is to
2259 * synchronously write the metadata. 2285 * synchronously write the metadata.
@@ -2265,9 +2291,10 @@ void txForce(struct tblock * tblk)
2265 * we can get by with synchronously writing 2291 * we can get by with synchronously writing
2266 * the pages when they are released. 2292 * the pages when they are released.
2267 */ 2293 */
2268 assert(atomic_read(&mp->nohomeok)); 2294 assert(mp->nohomeok);
2269 set_bit(META_dirty, &mp->flag); 2295 set_bit(META_dirty, &mp->flag);
2270 set_bit(META_sync, &mp->flag); 2296 set_bit(META_sync, &mp->flag);
2297#endif
2271 } 2298 }
2272 } 2299 }
2273 } 2300 }
@@ -2327,7 +2354,7 @@ static void txUpdateMap(struct tblock * tblk)
2327 */ 2354 */
2328 mp = tlck->mp; 2355 mp = tlck->mp;
2329 ASSERT(mp->xflag & COMMIT_PAGE); 2356 ASSERT(mp->xflag & COMMIT_PAGE);
2330 hold_metapage(mp, 0); 2357 grab_metapage(mp);
2331 } 2358 }
2332 2359
2333 /* 2360 /*
@@ -2377,8 +2404,8 @@ static void txUpdateMap(struct tblock * tblk)
2377 ASSERT(mp->lid == lid); 2404 ASSERT(mp->lid == lid);
2378 tlck->mp->lid = 0; 2405 tlck->mp->lid = 0;
2379 } 2406 }
2380 assert(atomic_read(&mp->nohomeok) == 1); 2407 assert(mp->nohomeok == 1);
2381 atomic_dec(&mp->nohomeok); 2408 metapage_homeok(mp);
2382 discard_metapage(mp); 2409 discard_metapage(mp);
2383 tlck->mp = NULL; 2410 tlck->mp = NULL;
2384 } 2411 }
@@ -2844,24 +2871,9 @@ static void LogSyncRelease(struct metapage * mp)
2844{ 2871{
2845 struct jfs_log *log = mp->log; 2872 struct jfs_log *log = mp->log;
2846 2873
2847 assert(atomic_read(&mp->nohomeok)); 2874 assert(mp->nohomeok);
2848 assert(log); 2875 assert(log);
2849 atomic_dec(&mp->nohomeok); 2876 metapage_homeok(mp);
2850
2851 if (atomic_read(&mp->nohomeok))
2852 return;
2853
2854 hold_metapage(mp, 0);
2855
2856 LOGSYNC_LOCK(log);
2857 mp->log = NULL;
2858 mp->lsn = 0;
2859 mp->clsn = 0;
2860 log->count--;
2861 list_del_init(&mp->synclist);
2862 LOGSYNC_UNLOCK(log);
2863
2864 release_metapage(mp);
2865} 2877}
2866 2878
2867/* 2879/*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index f31a9e3f3fe..5cf91785b54 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -49,7 +49,6 @@
49 */ 49 */
50int jfs_umount(struct super_block *sb) 50int jfs_umount(struct super_block *sb)
51{ 51{
52 struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
53 struct jfs_sb_info *sbi = JFS_SBI(sb); 52 struct jfs_sb_info *sbi = JFS_SBI(sb);
54 struct inode *ipbmap = sbi->ipbmap; 53 struct inode *ipbmap = sbi->ipbmap;
55 struct inode *ipimap = sbi->ipimap; 54 struct inode *ipimap = sbi->ipimap;
@@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb)
109 * Make sure all metadata makes it to disk before we mark 108 * Make sure all metadata makes it to disk before we mark
110 * the superblock as clean 109 * the superblock as clean
111 */ 110 */
112 filemap_fdatawrite(bdev_mapping); 111 filemap_fdatawrite(sbi->direct_inode->i_mapping);
113 filemap_fdatawait(bdev_mapping); 112 filemap_fdatawait(sbi->direct_inode->i_mapping);
114 113
115 /* 114 /*
116 * ensure all file system file pages are propagated to their 115 * ensure all file system file pages are propagated to their
@@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb)
123 if (log) { /* log = NULL if read-only mount */ 122 if (log) { /* log = NULL if read-only mount */
124 updateSuper(sb, FM_CLEAN); 123 updateSuper(sb, FM_CLEAN);
125 124
126 /* Restore default gfp_mask for bdev */
127 mapping_set_gfp_mask(bdev_mapping, GFP_USER);
128
129 /* 125 /*
130 * close log: 126 * close log:
131 * 127 *
@@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb)
140 136
141int jfs_umount_rw(struct super_block *sb) 137int jfs_umount_rw(struct super_block *sb)
142{ 138{
143 struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
144 struct jfs_sb_info *sbi = JFS_SBI(sb); 139 struct jfs_sb_info *sbi = JFS_SBI(sb);
145 struct jfs_log *log = sbi->log; 140 struct jfs_log *log = sbi->log;
146 141
@@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb)
166 * mark the superblock clean before everything is flushed to 161 * mark the superblock clean before everything is flushed to
167 * disk. 162 * disk.
168 */ 163 */
169 filemap_fdatawrite(bdev_mapping); 164 filemap_fdatawrite(sbi->direct_inode->i_mapping);
170 filemap_fdatawait(bdev_mapping); 165 filemap_fdatawait(sbi->direct_inode->i_mapping);
171 166
172 updateSuper(sb, FM_CLEAN); 167 updateSuper(sb, FM_CLEAN);
173 168
174 /* Restore default gfp_mask for bdev */
175 mapping_set_gfp_mask(bdev_mapping, GFP_USER);
176
177 return lmLogClose(sb); 169 return lmLogClose(sb);
178} 170}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 11c58c54b81..2c1f311914a 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -111,8 +111,8 @@ static struct {
111/* 111/*
112 * forward references 112 * forward references
113 */ 113 */
114static int xtSearch(struct inode *ip, 114static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
115 s64 xoff, int *cmpp, struct btstack * btstack, int flag); 115 struct btstack * btstack, int flag);
116 116
117static int xtSplitUp(tid_t tid, 117static int xtSplitUp(tid_t tid,
118 struct inode *ip, 118 struct inode *ip,
@@ -159,11 +159,12 @@ int xtLookup(struct inode *ip, s64 lstart,
159 xtpage_t *p; 159 xtpage_t *p;
160 int index; 160 int index;
161 xad_t *xad; 161 xad_t *xad;
162 s64 size, xoff, xend; 162 s64 next, size, xoff, xend;
163 int xlen; 163 int xlen;
164 s64 xaddr; 164 s64 xaddr;
165 165
166 *plen = 0; 166 *paddr = 0;
167 *plen = llen;
167 168
168 if (!no_check) { 169 if (!no_check) {
169 /* is lookup offset beyond eof ? */ 170 /* is lookup offset beyond eof ? */
@@ -180,7 +181,7 @@ int xtLookup(struct inode *ip, s64 lstart,
180 * search for the xad entry covering the logical extent 181 * search for the xad entry covering the logical extent
181 */ 182 */
182//search: 183//search:
183 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { 184 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
184 jfs_err("xtLookup: xtSearch returned %d", rc); 185 jfs_err("xtLookup: xtSearch returned %d", rc);
185 return rc; 186 return rc;
186 } 187 }
@@ -198,8 +199,11 @@ int xtLookup(struct inode *ip, s64 lstart,
198 * lstart is a page start address, 199 * lstart is a page start address,
199 * i.e., lstart cannot start in a hole; 200 * i.e., lstart cannot start in a hole;
200 */ 201 */
201 if (cmp) 202 if (cmp) {
203 if (next)
204 *plen = min(next - lstart, llen);
202 goto out; 205 goto out;
206 }
203 207
204 /* 208 /*
205 * lxd covered by xad 209 * lxd covered by xad
@@ -284,7 +288,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
284 if (lstart >= size) 288 if (lstart >= size)
285 return 0; 289 return 0;
286 290
287 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) 291 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
288 return rc; 292 return rc;
289 293
290 /* 294 /*
@@ -488,6 +492,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
488 * parameters: 492 * parameters:
489 * ip - file object; 493 * ip - file object;
490 * xoff - extent offset; 494 * xoff - extent offset;
495 * nextp - address of next extent (if any) for search miss
491 * cmpp - comparison result: 496 * cmpp - comparison result:
492 * btstack - traverse stack; 497 * btstack - traverse stack;
493 * flag - search process flag (XT_INSERT); 498 * flag - search process flag (XT_INSERT);
@@ -497,7 +502,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
497 * *cmpp is set to result of comparison with the entry returned. 502 * *cmpp is set to result of comparison with the entry returned.
498 * the page containing the entry is pinned at exit. 503 * the page containing the entry is pinned at exit.
499 */ 504 */
500static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ 505static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
501 int *cmpp, struct btstack * btstack, int flag) 506 int *cmpp, struct btstack * btstack, int flag)
502{ 507{
503 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 508 struct jfs_inode_info *jfs_ip = JFS_IP(ip);
@@ -511,6 +516,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
511 struct btframe *btsp; 516 struct btframe *btsp;
512 int nsplit = 0; /* number of pages to split */ 517 int nsplit = 0; /* number of pages to split */
513 s64 t64; 518 s64 t64;
519 s64 next = 0;
514 520
515 INCREMENT(xtStat.search); 521 INCREMENT(xtStat.search);
516 522
@@ -579,6 +585,7 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
579 * previous and this entry 585 * previous and this entry
580 */ 586 */
581 *cmpp = 1; 587 *cmpp = 1;
588 next = t64;
582 goto out; 589 goto out;
583 } 590 }
584 591
@@ -623,6 +630,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
623 /* update sequential access heuristics */ 630 /* update sequential access heuristics */
624 jfs_ip->btindex = index; 631 jfs_ip->btindex = index;
625 632
633 if (nextp)
634 *nextp = next;
635
626 INCREMENT(xtStat.fastSearch); 636 INCREMENT(xtStat.fastSearch);
627 return 0; 637 return 0;
628 } 638 }
@@ -675,10 +685,11 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
675 685
676 return 0; 686 return 0;
677 } 687 }
678
679 /* search hit - internal page: 688 /* search hit - internal page:
680 * descend/search its child page 689 * descend/search its child page
681 */ 690 */
691 if (index < p->header.nextindex - 1)
692 next = offsetXAD(&p->xad[index + 1]);
682 goto next; 693 goto next;
683 } 694 }
684 695
@@ -694,6 +705,8 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
694 * base is the smallest index with key (Kj) greater than 705 * base is the smallest index with key (Kj) greater than
695 * search key (K) and may be zero or maxentry index. 706 * search key (K) and may be zero or maxentry index.
696 */ 707 */
708 if (base < p->header.nextindex)
709 next = offsetXAD(&p->xad[base]);
697 /* 710 /*
698 * search miss - leaf page: 711 * search miss - leaf page:
699 * 712 *
@@ -727,6 +740,9 @@ static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
727 jfs_ip->btorder = BT_RANDOM; 740 jfs_ip->btorder = BT_RANDOM;
728 jfs_ip->btindex = base; 741 jfs_ip->btindex = base;
729 742
743 if (nextp)
744 *nextp = next;
745
730 return 0; 746 return 0;
731 } 747 }
732 748
@@ -793,6 +809,7 @@ int xtInsert(tid_t tid, /* transaction id */
793 struct xtsplit split; /* split information */ 809 struct xtsplit split; /* split information */
794 xad_t *xad; 810 xad_t *xad;
795 int cmp; 811 int cmp;
812 s64 next;
796 struct tlock *tlck; 813 struct tlock *tlck;
797 struct xtlock *xtlck; 814 struct xtlock *xtlck;
798 815
@@ -806,7 +823,7 @@ int xtInsert(tid_t tid, /* transaction id */
806 * n.b. xtSearch() may return index of maxentry of 823 * n.b. xtSearch() may return index of maxentry of
807 * the full page. 824 * the full page.
808 */ 825 */
809 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 826 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
810 return rc; 827 return rc;
811 828
812 /* retrieve search result */ 829 /* retrieve search result */
@@ -814,7 +831,7 @@ int xtInsert(tid_t tid, /* transaction id */
814 831
815 /* This test must follow XT_GETSEARCH since mp must be valid if 832 /* This test must follow XT_GETSEARCH since mp must be valid if
816 * we branch to out: */ 833 * we branch to out: */
817 if (cmp == 0) { 834 if ((cmp == 0) || (next && (xlen > next - xoff))) {
818 rc = -EEXIST; 835 rc = -EEXIST;
819 goto out; 836 goto out;
820 } 837 }
@@ -1626,7 +1643,7 @@ int xtExtend(tid_t tid, /* transaction id */
1626 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1643 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
1627 1644
1628 /* there must exist extent to be extended */ 1645 /* there must exist extent to be extended */
1629 if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT))) 1646 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
1630 return rc; 1647 return rc;
1631 1648
1632 /* retrieve search result */ 1649 /* retrieve search result */
@@ -1794,7 +1811,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
1794*/ 1811*/
1795 1812
1796 /* there must exist extent to be tailgated */ 1813 /* there must exist extent to be tailgated */
1797 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 1814 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
1798 return rc; 1815 return rc;
1799 1816
1800 /* retrieve search result */ 1817 /* retrieve search result */
@@ -1977,7 +1994,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
1977 nxlen = lengthXAD(nxad); 1994 nxlen = lengthXAD(nxad);
1978 nxaddr = addressXAD(nxad); 1995 nxaddr = addressXAD(nxad);
1979 1996
1980 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 1997 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
1981 return rc; 1998 return rc;
1982 1999
1983 /* retrieve search result */ 2000 /* retrieve search result */
@@ -2291,7 +2308,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
2291 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2308 if (nextindex == le16_to_cpu(p->header.maxentry)) {
2292 XT_PUTPAGE(mp); 2309 XT_PUTPAGE(mp);
2293 2310
2294 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 2311 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
2295 return rc; 2312 return rc;
2296 2313
2297 /* retrieve search result */ 2314 /* retrieve search result */
@@ -2438,6 +2455,7 @@ int xtAppend(tid_t tid, /* transaction id */
2438 int nsplit, nblocks, xlen; 2455 int nsplit, nblocks, xlen;
2439 struct pxdlist pxdlist; 2456 struct pxdlist pxdlist;
2440 pxd_t *pxd; 2457 pxd_t *pxd;
2458 s64 next;
2441 2459
2442 xaddr = *xaddrp; 2460 xaddr = *xaddrp;
2443 xlen = *xlenp; 2461 xlen = *xlenp;
@@ -2452,7 +2470,7 @@ int xtAppend(tid_t tid, /* transaction id */
2452 * n.b. xtSearch() may return index of maxentry of 2470 * n.b. xtSearch() may return index of maxentry of
2453 * the full page. 2471 * the full page.
2454 */ 2472 */
2455 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 2473 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
2456 return rc; 2474 return rc;
2457 2475
2458 /* retrieve search result */ 2476 /* retrieve search result */
@@ -2462,6 +2480,9 @@ int xtAppend(tid_t tid, /* transaction id */
2462 rc = -EEXIST; 2480 rc = -EEXIST;
2463 goto out; 2481 goto out;
2464 } 2482 }
2483
2484 if (next)
2485 xlen = min(xlen, (int)(next - xoff));
2465//insert: 2486//insert:
2466 /* 2487 /*
2467 * insert entry for new extent 2488 * insert entry for new extent
@@ -2600,7 +2621,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
2600 /* 2621 /*
2601 * find the matching entry; xtSearch() pins the page 2622 * find the matching entry; xtSearch() pins the page
2602 */ 2623 */
2603 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) 2624 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2604 return rc; 2625 return rc;
2605 2626
2606 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2627 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
@@ -2852,7 +2873,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2852 */ 2873 */
2853 if (xtype == DATAEXT) { 2874 if (xtype == DATAEXT) {
2854 /* search in leaf entry */ 2875 /* search in leaf entry */
2855 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 2876 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
2856 if (rc) 2877 if (rc)
2857 return rc; 2878 return rc;
2858 2879
@@ -2958,7 +2979,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
2958 } 2979 }
2959 2980
2960 /* get back parent page */ 2981 /* get back parent page */
2961 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) 2982 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
2962 return rc; 2983 return rc;
2963 2984
2964 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2985 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
@@ -3991,7 +4012,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
3991 4012
3992 if (committed_size) { 4013 if (committed_size) {
3993 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4014 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
3994 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 4015 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
3995 if (rc) 4016 if (rc)
3996 return rc; 4017 return rc;
3997 4018
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 2eb6869b6e7..c6dc254d325 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
209 */ 209 */
210 txQuiesce(sb); 210 txQuiesce(sb);
211 211
212 /* Reset size of direct inode */
213 sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size;
214
212 if (sbi->mntflag & JFS_INLINELOG) { 215 if (sbi->mntflag & JFS_INLINELOG) {
213 /* 216 /*
214 * deactivate old inline log 217 * deactivate old inline log
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5856866e24f..5e774ed7fb6 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -210,6 +210,10 @@ static void jfs_put_super(struct super_block *sb)
210 unload_nls(sbi->nls_tab); 210 unload_nls(sbi->nls_tab);
211 sbi->nls_tab = NULL; 211 sbi->nls_tab = NULL;
212 212
213 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
214 iput(sbi->direct_inode);
215 sbi->direct_inode = NULL;
216
213 kfree(sbi); 217 kfree(sbi);
214} 218}
215 219
@@ -358,6 +362,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
358 } 362 }
359 363
360 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 364 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
365 /*
366 * Invalidate any previously read metadata. fsck may have
367 * changed the on-disk data since we mounted r/o
368 */
369 truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
370
361 JFS_SBI(sb)->flag = flag; 371 JFS_SBI(sb)->flag = flag;
362 return jfs_mount_rw(sb, 1); 372 return jfs_mount_rw(sb, 1);
363 } 373 }
@@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
428 sb->s_op = &jfs_super_operations; 438 sb->s_op = &jfs_super_operations;
429 sb->s_export_op = &jfs_export_operations; 439 sb->s_export_op = &jfs_export_operations;
430 440
441 /*
442 * Initialize direct-mapping inode/address-space
443 */
444 inode = new_inode(sb);
445 if (inode == NULL)
446 goto out_kfree;
447 inode->i_ino = 0;
448 inode->i_nlink = 1;
449 inode->i_size = sb->s_bdev->bd_inode->i_size;
450 inode->i_mapping->a_ops = &jfs_metapage_aops;
451 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
452
453 sbi->direct_inode = inode;
454
431 rc = jfs_mount(sb); 455 rc = jfs_mount(sb);
432 if (rc) { 456 if (rc) {
433 if (!silent) { 457 if (!silent) {
434 jfs_err("jfs_mount failed w/return code = %d", rc); 458 jfs_err("jfs_mount failed w/return code = %d", rc);
435 } 459 }
436 goto out_kfree; 460 goto out_mount_failed;
437 } 461 }
438 if (sb->s_flags & MS_RDONLY) 462 if (sb->s_flags & MS_RDONLY)
439 sbi->log = NULL; 463 sbi->log = NULL;
@@ -482,6 +506,13 @@ out_no_rw:
482 if (rc) { 506 if (rc) {
483 jfs_err("jfs_umount failed with return code %d", rc); 507 jfs_err("jfs_umount failed with return code %d", rc);
484 } 508 }
509out_mount_failed:
510 filemap_fdatawrite(sbi->direct_inode->i_mapping);
511 filemap_fdatawait(sbi->direct_inode->i_mapping);
512 truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
513 make_bad_inode(sbi->direct_inode);
514 iput(sbi->direct_inode);
515 sbi->direct_inode = NULL;
485out_kfree: 516out_kfree:
486 if (sbi->nls_tab) 517 if (sbi->nls_tab)
487 unload_nls(sbi->nls_tab); 518 unload_nls(sbi->nls_tab);
@@ -527,8 +558,10 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
527 struct jfs_log *log = JFS_SBI(sb)->log; 558 struct jfs_log *log = JFS_SBI(sb)->log;
528 559
529 /* log == NULL indicates read-only mount */ 560 /* log == NULL indicates read-only mount */
530 if (log) 561 if (log) {
531 jfs_flush_journal(log, wait); 562 jfs_flush_journal(log, wait);
563 jfs_syncpt(log);
564 }
532 565
533 return 0; 566 return 0;
534} 567}
diff --git a/fs/mpage.c b/fs/mpage.c
index e7d8d1a7760..32c7c8fcfce 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -160,52 +160,6 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
160 } while (page_bh != head); 160 } while (page_bh != head);
161} 161}
162 162
163/**
164 * mpage_readpages - populate an address space with some pages, and
165 * start reads against them.
166 *
167 * @mapping: the address_space
168 * @pages: The address of a list_head which contains the target pages. These
169 * pages have their ->index populated and are otherwise uninitialised.
170 *
171 * The page at @pages->prev has the lowest file offset, and reads should be
172 * issued in @pages->prev to @pages->next order.
173 *
174 * @nr_pages: The number of pages at *@pages
175 * @get_block: The filesystem's block mapper function.
176 *
177 * This function walks the pages and the blocks within each page, building and
178 * emitting large BIOs.
179 *
180 * If anything unusual happens, such as:
181 *
182 * - encountering a page which has buffers
183 * - encountering a page which has a non-hole after a hole
184 * - encountering a page with non-contiguous blocks
185 *
186 * then this code just gives up and calls the buffer_head-based read function.
187 * It does handle a page which has holes at the end - that is a common case:
188 * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
189 *
190 * BH_Boundary explanation:
191 *
192 * There is a problem. The mpage read code assembles several pages, gets all
193 * their disk mappings, and then submits them all. That's fine, but obtaining
194 * the disk mappings may require I/O. Reads of indirect blocks, for example.
195 *
196 * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
197 * submitted in the following order:
198 * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
199 * because the indirect block has to be read to get the mappings of blocks
200 * 13,14,15,16. Obviously, this impacts performance.
201 *
202 * So what we do it to allow the filesystem's get_block() function to set
203 * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block
204 * after this one will require I/O against a block which is probably close to
205 * this one. So you should push what I/O you have currently accumulated.
206 *
207 * This all causes the disk requests to be issued in the correct order.
208 */
209static struct bio * 163static struct bio *
210do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, 164do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
211 sector_t *last_block_in_bio, get_block_t get_block) 165 sector_t *last_block_in_bio, get_block_t get_block)
@@ -320,6 +274,52 @@ confused:
320 goto out; 274 goto out;
321} 275}
322 276
277/**
278 * mpage_readpages - populate an address space with some pages, and
279 * start reads against them.
280 *
281 * @mapping: the address_space
282 * @pages: The address of a list_head which contains the target pages. These
283 * pages have their ->index populated and are otherwise uninitialised.
284 *
285 * The page at @pages->prev has the lowest file offset, and reads should be
286 * issued in @pages->prev to @pages->next order.
287 *
288 * @nr_pages: The number of pages at *@pages
289 * @get_block: The filesystem's block mapper function.
290 *
291 * This function walks the pages and the blocks within each page, building and
292 * emitting large BIOs.
293 *
294 * If anything unusual happens, such as:
295 *
296 * - encountering a page which has buffers
297 * - encountering a page which has a non-hole after a hole
298 * - encountering a page with non-contiguous blocks
299 *
300 * then this code just gives up and calls the buffer_head-based read function.
301 * It does handle a page which has holes at the end - that is a common case:
302 * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
303 *
304 * BH_Boundary explanation:
305 *
306 * There is a problem. The mpage read code assembles several pages, gets all
307 * their disk mappings, and then submits them all. That's fine, but obtaining
308 * the disk mappings may require I/O. Reads of indirect blocks, for example.
309 *
310 * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
311 * submitted in the following order:
312 * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
313 * because the indirect block has to be read to get the mappings of blocks
314 * 13,14,15,16. Obviously, this impacts performance.
315 *
316 * So what we do it to allow the filesystem's get_block() function to set
317 * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block
318 * after this one will require I/O against a block which is probably close to
319 * this one. So you should push what I/O you have currently accumulated.
320 *
321 * This all causes the disk requests to be issued in the correct order.
322 */
323int 323int
324mpage_readpages(struct address_space *mapping, struct list_head *pages, 324mpage_readpages(struct address_space *mapping, struct list_head *pages,
325 unsigned nr_pages, get_block_t get_block) 325 unsigned nr_pages, get_block_t get_block)
@@ -727,6 +727,8 @@ retry:
727 &last_block_in_bio, &ret, wbc, 727 &last_block_in_bio, &ret, wbc,
728 writepage_fn); 728 writepage_fn);
729 } 729 }
730 if (unlikely(ret == WRITEPAGE_ACTIVATE))
731 unlock_page(page);
730 if (ret || (--(wbc->nr_to_write) <= 0)) 732 if (ret || (--(wbc->nr_to_write) <= 0))
731 done = 1; 733 done = 1;
732 if (wbc->nonblocking && bdi_write_congested(bdi)) { 734 if (wbc->nonblocking && bdi_write_congested(bdi)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 57554bfbed7..e31903aadd9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,6 +1419,8 @@ static struct file_operations proc_tgid_attr_operations;
1419static struct inode_operations proc_tgid_attr_inode_operations; 1419static struct inode_operations proc_tgid_attr_inode_operations;
1420#endif 1420#endif
1421 1421
1422static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1423
1422/* SMP-safe */ 1424/* SMP-safe */
1423static struct dentry *proc_pident_lookup(struct inode *dir, 1425static struct dentry *proc_pident_lookup(struct inode *dir,
1424 struct dentry *dentry, 1426 struct dentry *dentry,
@@ -1458,7 +1460,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
1458 */ 1460 */
1459 switch(p->type) { 1461 switch(p->type) {
1460 case PROC_TGID_TASK: 1462 case PROC_TGID_TASK:
1461 inode->i_nlink = 3; 1463 inode->i_nlink = 2 + get_tid_list(2, NULL, dir);
1462 inode->i_op = &proc_task_inode_operations; 1464 inode->i_op = &proc_task_inode_operations;
1463 inode->i_fop = &proc_task_operations; 1465 inode->i_fop = &proc_task_operations;
1464 break; 1466 break;
@@ -1701,13 +1703,13 @@ static struct inode_operations proc_self_inode_operations = {
1701}; 1703};
1702 1704
1703/** 1705/**
1704 * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. 1706 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache.
1705 * @p: task that should be flushed. 1707 * @p: task that should be flushed.
1706 * 1708 *
1707 * Drops the /proc/<pid> dcache entry from the hash chains. 1709 * Drops the /proc/@pid dcache entry from the hash chains.
1708 * 1710 *
1709 * Dropping /proc/<pid> entries and detach_pid must be synchroneous, 1711 * Dropping /proc/@pid entries and detach_pid must be synchroneous,
1710 * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, 1712 * otherwise e.g. /proc/@pid/exe might point to the wrong executable,
1711 * if the pid value is immediately reused. This is enforced by 1713 * if the pid value is immediately reused. This is enforced by
1712 * - caller must acquire spin_lock(p->proc_lock) 1714 * - caller must acquire spin_lock(p->proc_lock)
1713 * - must be called before detach_pid() 1715 * - must be called before detach_pid()
@@ -1739,8 +1741,8 @@ struct dentry *proc_pid_unhash(struct task_struct *p)
1739} 1741}
1740 1742
1741/** 1743/**
1742 * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries 1744 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
1743 * @proc_entry: directoy to prune. 1745 * @proc_dentry: directoy to prune.
1744 * 1746 *
1745 * Shrink the /proc directory that was used by the just killed thread. 1747 * Shrink the /proc directory that was used by the just killed thread.
1746 */ 1748 */
@@ -1800,8 +1802,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
1800 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1802 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1801 inode->i_op = &proc_tgid_base_inode_operations; 1803 inode->i_op = &proc_tgid_base_inode_operations;
1802 inode->i_fop = &proc_tgid_base_operations; 1804 inode->i_fop = &proc_tgid_base_operations;
1803 inode->i_nlink = 3;
1804 inode->i_flags|=S_IMMUTABLE; 1805 inode->i_flags|=S_IMMUTABLE;
1806#ifdef CONFIG_SECURITY
1807 inode->i_nlink = 5;
1808#else
1809 inode->i_nlink = 4;
1810#endif
1805 1811
1806 dentry->d_op = &pid_base_dentry_operations; 1812 dentry->d_op = &pid_base_dentry_operations;
1807 1813
@@ -1855,8 +1861,12 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
1855 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1861 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1856 inode->i_op = &proc_tid_base_inode_operations; 1862 inode->i_op = &proc_tid_base_inode_operations;
1857 inode->i_fop = &proc_tid_base_operations; 1863 inode->i_fop = &proc_tid_base_operations;
1858 inode->i_nlink = 3;
1859 inode->i_flags|=S_IMMUTABLE; 1864 inode->i_flags|=S_IMMUTABLE;
1865#ifdef CONFIG_SECURITY
1866 inode->i_nlink = 4;
1867#else
1868 inode->i_nlink = 3;
1869#endif
1860 1870
1861 dentry->d_op = &pid_base_dentry_operations; 1871 dentry->d_op = &pid_base_dentry_operations;
1862 1872
@@ -1935,7 +1945,8 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
1935 1945
1936 if (--index >= 0) 1946 if (--index >= 0)
1937 continue; 1947 continue;
1938 tids[nr_tids] = tid; 1948 if (tids != NULL)
1949 tids[nr_tids] = tid;
1939 nr_tids++; 1950 nr_tids++;
1940 if (nr_tids >= PROC_MAXPIDS) 1951 if (nr_tids >= PROC_MAXPIDS)
1941 break; 1952 break;
@@ -2035,6 +2046,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
2035 } 2046 }
2036 2047
2037 nr_tids = get_tid_list(pos, tid_array, inode); 2048 nr_tids = get_tid_list(pos, tid_array, inode);
2049 inode->i_nlink = pos + nr_tids;
2038 2050
2039 for (i = 0; i < nr_tids; i++) { 2051 for (i = 0; i < nr_tids; i++) {
2040 unsigned long j = PROC_NUMBUF; 2052 unsigned long j = PROC_NUMBUF;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a4e2ed544bb..49c479c9454 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -260,8 +260,9 @@ static inline int block_group_used(struct super_block *s, u32 id) {
260/* 260/*
261 * the packing is returned in disk byte order 261 * the packing is returned in disk byte order
262 */ 262 */
263u32 reiserfs_choose_packing(struct inode *dir) { 263__le32 reiserfs_choose_packing(struct inode *dir)
264 u32 packing; 264{
265 __le32 packing;
265 if (TEST_OPTION(packing_groups, dir->i_sb)) { 266 if (TEST_OPTION(packing_groups, dir->i_sb)) {
266 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); 267 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
267 /* 268 /*
@@ -655,7 +656,7 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t *hint)
655 struct buffer_head * bh; 656 struct buffer_head * bh;
656 struct item_head * ih; 657 struct item_head * ih;
657 int pos_in_item; 658 int pos_in_item;
658 __u32 * item; 659 __le32 * item;
659 int ret = 0; 660 int ret = 0;
660 661
661 if (!hint->path) /* reiserfs code can call this function w/o pointer to path 662 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
@@ -736,7 +737,7 @@ static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_bl
736#ifdef DISPLACE_NEW_PACKING_LOCALITIES 737#ifdef DISPLACE_NEW_PACKING_LOCALITIES
737static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) 738static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
738{ 739{
739 struct reiserfs_key * key = &hint->key; 740 struct in_core_key * key = &hint->key;
740 741
741 hint->th->displace_new_blocks = 0; 742 hint->th->displace_new_blocks = 0;
742 hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); 743 hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
@@ -777,7 +778,7 @@ static inline int old_way (reiserfs_blocknr_hint_t * hint)
777 778
778static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint) 779static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint)
779{ 780{
780 struct reiserfs_key * key = &hint->key; 781 struct in_core_key * key = &hint->key;
781 b_blocknr_t slice_start; 782 b_blocknr_t slice_start;
782 783
783 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); 784 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d1514a9b051..fbde4b01a32 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -209,8 +209,8 @@ static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldi
209/* compose directory item containing "." and ".." entries (entries are 209/* compose directory item containing "." and ".." entries (entries are
210 not aligned to 4 byte boundary) */ 210 not aligned to 4 byte boundary) */
211/* the last four params are LE */ 211/* the last four params are LE */
212void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid, 212void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid,
213 __u32 par_dirid, __u32 par_objid) 213 __le32 par_dirid, __le32 par_objid)
214{ 214{
215 struct reiserfs_de_head * deh; 215 struct reiserfs_de_head * deh;
216 216
@@ -242,8 +242,8 @@ void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid,
242} 242}
243 243
244/* compose directory item containing "." and ".." entries */ 244/* compose directory item containing "." and ".." entries */
245void make_empty_dir_item (char * body, __u32 dirid, __u32 objid, 245void make_empty_dir_item (char * body, __le32 dirid, __le32 objid,
246 __u32 par_dirid, __u32 par_objid) 246 __le32 par_dirid, __le32 par_objid)
247{ 247{
248 struct reiserfs_de_head * deh; 248 struct reiserfs_de_head * deh;
249 249
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 26950113af8..2230afff187 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -166,7 +166,7 @@ static int reiserfs_allocate_blocks_for_region(
166 struct cpu_key key; // cpu key of item that we are going to deal with 166 struct cpu_key key; // cpu key of item that we are going to deal with
167 struct item_head *ih; // pointer to item head that we are going to deal with 167 struct item_head *ih; // pointer to item head that we are going to deal with
168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with 168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
169 __u32 * item; // pointer to item we are going to deal with 169 __le32 * item; // pointer to item we are going to deal with
170 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 170 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. 171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator. 172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
@@ -891,7 +891,7 @@ static int reiserfs_prepare_file_region_for_write(
891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with 891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with
892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with 892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with
893 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 893 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
894 __u32 * item=NULL; // pointer to item we are going to deal with 894 __le32 * item=NULL; // pointer to item we are going to deal with
895 int item_pos=-1; /* Position in indirect item */ 895 int item_pos=-1; /* Position in indirect item */
896 896
897 897
@@ -1284,10 +1284,11 @@ static ssize_t reiserfs_file_write( struct file *file, /* the file we are going
1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1285 reiserfs_write_unlock(inode->i_sb); 1285 reiserfs_write_unlock(inode->i_sb);
1286 1286
1287 if ( !num_pages ) { /* If we do not have enough space even for */ 1287 if ( !num_pages ) { /* If we do not have enough space even for a single page... */
1288 res = -ENOSPC; /* single page, return -ENOSPC */ 1288 if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) {
1289 if ( pos > (inode->i_size & (inode->i_sb->s_blocksize-1))) 1289 res = -ENOSPC;
1290 break; // In case we are writing past the file end, break. 1290 break; // In case we are writing past the end of the last file block, break.
1291 }
1291 // Otherwise we are possibly overwriting the file, so 1292 // Otherwise we are possibly overwriting the file, so
1292 // let's set write size to be equal or less than blocksize. 1293 // let's set write size to be equal or less than blocksize.
1293 // This way we get it correctly for file holes. 1294 // This way we get it correctly for file holes.
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 7543031396f..2711dff1b7b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -173,7 +173,7 @@ static inline void fix_tail_page_for_writing(struct page *page) {
173 done already or non-hole position has been found in the indirect item */ 173 done already or non-hole position has been found in the indirect item */
174static inline int allocation_needed (int retval, b_blocknr_t allocated, 174static inline int allocation_needed (int retval, b_blocknr_t allocated,
175 struct item_head * ih, 175 struct item_head * ih,
176 __u32 * item, int pos_in_item) 176 __le32 * item, int pos_in_item)
177{ 177{
178 if (allocated) 178 if (allocated)
179 return 0; 179 return 0;
@@ -278,7 +278,7 @@ research:
278 bh = get_last_bh (&path); 278 bh = get_last_bh (&path);
279 ih = get_ih (&path); 279 ih = get_ih (&path);
280 if (is_indirect_le_ih (ih)) { 280 if (is_indirect_le_ih (ih)) {
281 __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); 281 __le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih);
282 282
283 /* FIXME: here we could cache indirect item or part of it in 283 /* FIXME: here we could cache indirect item or part of it in
284 the inode to avoid search_by_key in case of subsequent 284 the inode to avoid search_by_key in case of subsequent
@@ -581,7 +581,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
581 struct cpu_key key; 581 struct cpu_key key;
582 struct buffer_head * bh, * unbh = NULL; 582 struct buffer_head * bh, * unbh = NULL;
583 struct item_head * ih, tmp_ih; 583 struct item_head * ih, tmp_ih;
584 __u32 * item; 584 __le32 * item;
585 int done; 585 int done;
586 int fs_gen; 586 int fs_gen;
587 struct reiserfs_transaction_handle *th = NULL; 587 struct reiserfs_transaction_handle *th = NULL;
@@ -746,7 +746,7 @@ start_trans:
746 done = 0; 746 done = 0;
747 do { 747 do {
748 if (is_statdata_le_ih (ih)) { 748 if (is_statdata_le_ih (ih)) {
749 __u32 unp = 0; 749 __le32 unp = 0;
750 struct cpu_key tmp_key; 750 struct cpu_key tmp_key;
751 751
752 /* indirect item has to be inserted */ 752 /* indirect item has to be inserted */
@@ -1341,8 +1341,8 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args
1341 key.version = KEY_FORMAT_3_5; 1341 key.version = KEY_FORMAT_3_5;
1342 key.on_disk_key.k_dir_id = dirino; 1342 key.on_disk_key.k_dir_id = dirino;
1343 key.on_disk_key.k_objectid = inode->i_ino; 1343 key.on_disk_key.k_objectid = inode->i_ino;
1344 key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET; 1344 key.on_disk_key.k_offset = 0;
1345 key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS; 1345 key.on_disk_key.k_type = 0;
1346 1346
1347 /* look for the object's stat data */ 1347 /* look for the object's stat data */
1348 retval = search_item (inode->i_sb, &key, &path_to_sd); 1348 retval = search_item (inode->i_sb, &key, &path_to_sd);
@@ -2067,7 +2067,7 @@ static int map_block_for_writepage(struct inode *inode,
2067 struct item_head tmp_ih ; 2067 struct item_head tmp_ih ;
2068 struct item_head *ih ; 2068 struct item_head *ih ;
2069 struct buffer_head *bh ; 2069 struct buffer_head *bh ;
2070 __u32 *item ; 2070 __le32 *item ;
2071 struct cpu_key key ; 2071 struct cpu_key key ;
2072 INITIALIZE_PATH(path) ; 2072 INITIALIZE_PATH(path) ;
2073 int pos_in_item ; 2073 int pos_in_item ;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 9cf7c13b120..0ce33db1acd 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -296,10 +296,11 @@ static void print_sequence (__u32 start, int len)
296static void indirect_print_item (struct item_head * ih, char * item) 296static void indirect_print_item (struct item_head * ih, char * item)
297{ 297{
298 int j; 298 int j;
299 __u32 * unp, prev = INT_MAX; 299 __le32 * unp;
300 __u32 prev = INT_MAX;
300 int num; 301 int num;
301 302
302 unp = (__u32 *)item; 303 unp = (__le32 *)item;
303 304
304 if (ih_item_len(ih) % UNFM_P_SIZE) 305 if (ih_item_len(ih) % UNFM_P_SIZE)
305 reiserfs_warning (NULL, "indirect_print_item: invalid item len"); 306 reiserfs_warning (NULL, "indirect_print_item: invalid item len");
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c9ad3a7849f..3072cfdee95 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2306,13 +2306,16 @@ static int journal_init_dev( struct super_block *super,
2306 if( !IS_ERR( journal -> j_dev_file ) ) { 2306 if( !IS_ERR( journal -> j_dev_file ) ) {
2307 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2307 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
2308 if( !S_ISBLK( jdev_inode -> i_mode ) ) { 2308 if( !S_ISBLK( jdev_inode -> i_mode ) ) {
2309 reiserfs_warning (super, "journal_init_dev: '%s' is " 2309 reiserfs_warning(super, "journal_init_dev: '%s' is "
2310 "not a block device", jdev_name ); 2310 "not a block device", jdev_name );
2311 result = -ENOTBLK; 2311 result = -ENOTBLK;
2312 release_journal_dev( super, journal );
2312 } else { 2313 } else {
2313 /* ok */ 2314 /* ok */
2314 journal->j_dev_bd = I_BDEV(jdev_inode); 2315 journal->j_dev_bd = I_BDEV(jdev_inode);
2315 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2316 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2317 reiserfs_info(super, "journal_init_dev: journal device: %s\n",
2318 bdevname(journal->j_dev_bd, b));
2316 } 2319 }
2317 } else { 2320 } else {
2318 result = PTR_ERR( journal -> j_dev_file ); 2321 result = PTR_ERR( journal -> j_dev_file );
@@ -2321,11 +2324,6 @@ static int journal_init_dev( struct super_block *super,
2321 "journal_init_dev: Cannot open '%s': %i", 2324 "journal_init_dev: Cannot open '%s': %i",
2322 jdev_name, result ); 2325 jdev_name, result );
2323 } 2326 }
2324 if( result != 0 ) {
2325 release_journal_dev( super, journal );
2326 }
2327 reiserfs_info(super, "journal_init_dev: journal device: %s\n",
2328 bdevname(journal->j_dev_bd, b));
2329 return result; 2327 return result;
2330} 2328}
2331 2329
@@ -2393,7 +2391,7 @@ int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_fo
2393 jh = (struct reiserfs_journal_header *)(bhjh->b_data); 2391 jh = (struct reiserfs_journal_header *)(bhjh->b_data);
2394 2392
2395 /* make sure that journal matches to the super block */ 2393 /* make sure that journal matches to the super block */
2396 if (is_reiserfs_jr(rs) && (jh->jh_journal.jp_journal_magic != sb_jp_journal_magic(rs))) { 2394 if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) {
2397 reiserfs_warning (p_s_sb, "sh-460: journal header magic %x " 2395 reiserfs_warning (p_s_sb, "sh-460: journal header magic %x "
2398 "(device %s) does not match to magic found in super " 2396 "(device %s) does not match to magic found in super "
2399 "block %x", 2397 "block %x",
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 0785c43a748..bfe8e25ef29 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -11,13 +11,13 @@
11 11
12// find where objectid map starts 12// find where objectid map starts
13#define objectid_map(s,rs) (old_format_only (s) ? \ 13#define objectid_map(s,rs) (old_format_only (s) ? \
14 (__u32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ 14 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
15 (__u32 *)((rs) + 1)) 15 (__le32 *)((rs) + 1))
16 16
17 17
18#ifdef CONFIG_REISERFS_CHECK 18#ifdef CONFIG_REISERFS_CHECK
19 19
20static void check_objectid_map (struct super_block * s, __u32 * map) 20static void check_objectid_map (struct super_block * s, __le32 * map)
21{ 21{
22 if (le32_to_cpu (map[0]) != 1) 22 if (le32_to_cpu (map[0]) != 1)
23 reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx", 23 reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx",
@@ -27,7 +27,7 @@ static void check_objectid_map (struct super_block * s, __u32 * map)
27} 27}
28 28
29#else 29#else
30static void check_objectid_map (struct super_block * s, __u32 * map) 30static void check_objectid_map (struct super_block * s, __le32 * map)
31{;} 31{;}
32#endif 32#endif
33 33
@@ -52,7 +52,7 @@ __u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th)
52{ 52{
53 struct super_block * s = th->t_super; 53 struct super_block * s = th->t_super;
54 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 54 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
55 __u32 * map = objectid_map (s, rs); 55 __le32 * map = objectid_map (s, rs);
56 __u32 unused_objectid; 56 __u32 unused_objectid;
57 57
58 BUG_ON (!th->t_trans_id); 58 BUG_ON (!th->t_trans_id);
@@ -97,7 +97,7 @@ void reiserfs_release_objectid (struct reiserfs_transaction_handle *th,
97{ 97{
98 struct super_block * s = th->t_super; 98 struct super_block * s = th->t_super;
99 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 99 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
100 __u32 * map = objectid_map (s, rs); 100 __le32 * map = objectid_map (s, rs);
101 int i = 0; 101 int i = 0;
102 102
103 BUG_ON (!th->t_trans_id); 103 BUG_ON (!th->t_trans_id);
@@ -172,12 +172,12 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s) {
172 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ; 172 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ;
173 int old_max = sb_oid_maxsize(disk_sb); 173 int old_max = sb_oid_maxsize(disk_sb);
174 struct reiserfs_super_block_v1 *disk_sb_v1 ; 174 struct reiserfs_super_block_v1 *disk_sb_v1 ;
175 __u32 *objectid_map, *new_objectid_map ; 175 __le32 *objectid_map, *new_objectid_map ;
176 int i ; 176 int i ;
177 177
178 disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data); 178 disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
179 objectid_map = (__u32 *)(disk_sb_v1 + 1) ; 179 objectid_map = (__le32 *)(disk_sb_v1 + 1) ;
180 new_objectid_map = (__u32 *)(disk_sb + 1) ; 180 new_objectid_map = (__le32 *)(disk_sb + 1) ;
181 181
182 if (cur_size > new_size) { 182 if (cur_size > new_size) {
183 /* mark everyone used that was listed as free at the end of the objectid 183 /* mark everyone used that was listed as free at the end of the objectid
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index f4ea81ae0e0..e242ebc7f6f 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -73,8 +73,8 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset,
73#define DFL( x ) D4C( rs -> s_v1.x ) 73#define DFL( x ) D4C( rs -> s_v1.x )
74 74
75#define objectid_map( s, rs ) (old_format_only (s) ? \ 75#define objectid_map( s, rs ) (old_format_only (s) ? \
76 (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \ 76 (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) : \
77 (__u32 *)(rs + 1)) 77 (__le32 *)(rs + 1))
78#define MAP( i ) D4C( objectid_map( sb, rs )[ i ] ) 78#define MAP( i ) D4C( objectid_map( sb, rs )[ i ] )
79 79
80#define DJF( x ) le32_to_cpu( rs -> x ) 80#define DJF( x ) le32_to_cpu( rs -> x )
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 73ec5212178..da23ba75f3d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -87,22 +87,20 @@ inline void copy_item_head(struct item_head * p_v_to,
87inline int comp_short_keys (const struct reiserfs_key * le_key, 87inline int comp_short_keys (const struct reiserfs_key * le_key,
88 const struct cpu_key * cpu_key) 88 const struct cpu_key * cpu_key)
89{ 89{
90 __u32 * p_s_le_u32, * p_s_cpu_u32; 90 __u32 n;
91 int n_key_length = REISERFS_SHORT_KEY_LEN; 91 n = le32_to_cpu(le_key->k_dir_id);
92 92 if (n < cpu_key->on_disk_key.k_dir_id)
93 p_s_le_u32 = (__u32 *)le_key;
94 p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key;
95 for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
96 if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
97 return -1; 93 return -1;
98 if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 ) 94 if (n > cpu_key->on_disk_key.k_dir_id)
95 return 1;
96 n = le32_to_cpu(le_key->k_objectid);
97 if (n < cpu_key->on_disk_key.k_objectid)
98 return -1;
99 if (n > cpu_key->on_disk_key.k_objectid)
99 return 1; 100 return 1;
100 }
101
102 return 0; 101 return 0;
103} 102}
104 103
105
106/* k1 is pointer to on-disk structure which is stored in little-endian 104/* k1 is pointer to on-disk structure which is stored in little-endian
107 form. k2 is pointer to cpu variable. 105 form. k2 is pointer to cpu variable.
108 Compare keys using all 4 key fields. 106 Compare keys using all 4 key fields.
@@ -152,18 +150,15 @@ inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct re
152 150
153inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) 151inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from)
154{ 152{
153 int version;
155 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); 154 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
156 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); 155 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
157 156
158 // find out version of the key 157 // find out version of the key
159 to->version = le_key_version (from); 158 version = le_key_version (from);
160 if (to->version == KEY_FORMAT_3_5) { 159 to->version = version;
161 to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset); 160 to->on_disk_key.k_offset = le_key_k_offset(version, from);
162 to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness); 161 to->on_disk_key.k_type = le_key_k_type(version, from);
163 } else {
164 to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2);
165 to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2);
166 }
167} 162}
168 163
169 164
@@ -228,8 +223,14 @@ extern struct tree_balance * cur_tb;
228const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; 223const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}};
229 224
230/* Maximal possible key. It is never in the tree. */ 225/* Maximal possible key. It is never in the tree. */
231const struct reiserfs_key MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}}; 226const struct reiserfs_key MAX_KEY = {
227 __constant_cpu_to_le32(0xffffffff),
228 __constant_cpu_to_le32(0xffffffff),
229 {{__constant_cpu_to_le32(0xffffffff),
230 __constant_cpu_to_le32(0xffffffff)},}
231};
232 232
233const struct in_core_key MAX_IN_CORE_KEY = {~0U, ~0U, ~0ULL>>4, 15};
233 234
234/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom 235/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
235 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 236 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
@@ -997,7 +998,7 @@ static char prepare_for_delete_or_cut(
997 int n_unfm_number, /* Number of the item unformatted nodes. */ 998 int n_unfm_number, /* Number of the item unformatted nodes. */
998 n_counter, 999 n_counter,
999 n_blk_size; 1000 n_blk_size;
1000 __u32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */ 1001 __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1001 __u32 tmp; 1002 __u32 tmp;
1002 struct item_head s_ih; /* Item header. */ 1003 struct item_head s_ih; /* Item header. */
1003 char c_mode; /* Returned mode of the balance. */ 1004 char c_mode; /* Returned mode of the balance. */
@@ -1059,7 +1060,7 @@ static char prepare_for_delete_or_cut(
1059 /* pointers to be cut */ 1060 /* pointers to be cut */
1060 n_unfm_number -= pos_in_item (p_s_path); 1061 n_unfm_number -= pos_in_item (p_s_path);
1061 /* Set pointer to the last unformatted node pointer that is to be cut. */ 1062 /* Set pointer to the last unformatted node pointer that is to be cut. */
1062 p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed; 1063 p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
1063 1064
1064 1065
1065 /* We go through the unformatted nodes pointers of the indirect 1066 /* We go through the unformatted nodes pointers of the indirect
@@ -1081,8 +1082,8 @@ static char prepare_for_delete_or_cut(
1081 need_research = 1 ; 1082 need_research = 1 ;
1082 break; 1083 break;
1083 } 1084 }
1084 RFALSE( p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) || 1085 RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1085 p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1, 1086 p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
1086 "vs-5265: pointer out of range"); 1087 "vs-5265: pointer out of range");
1087 1088
1088 /* Hole, nothing to remove. */ 1089 /* Hole, nothing to remove. */
@@ -1431,7 +1432,7 @@ int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode
1431#if defined( USE_INODE_GENERATION_COUNTER ) 1432#if defined( USE_INODE_GENERATION_COUNTER )
1432 if( !old_format_only ( th -> t_super ) ) 1433 if( !old_format_only ( th -> t_super ) )
1433 { 1434 {
1434 __u32 *inode_generation; 1435 __le32 *inode_generation;
1435 1436
1436 inode_generation = 1437 inode_generation =
1437 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation; 1438 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index bcdf2438d15..31e75125f48 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -110,7 +110,7 @@ static void reiserfs_unlockfs(struct super_block *s) {
110 reiserfs_allow_writes(s) ; 110 reiserfs_allow_writes(s) ;
111} 111}
112 112
113extern const struct reiserfs_key MAX_KEY; 113extern const struct in_core_key MAX_IN_CORE_KEY;
114 114
115 115
116/* this is used to delete "save link" when there are no items of a 116/* this is used to delete "save link" when there are no items of a
@@ -164,7 +164,7 @@ static int finish_unfinished (struct super_block * s)
164 164
165 /* compose key to look for "save" links */ 165 /* compose key to look for "save" links */
166 max_cpu_key.version = KEY_FORMAT_3_5; 166 max_cpu_key.version = KEY_FORMAT_3_5;
167 max_cpu_key.on_disk_key = MAX_KEY; 167 max_cpu_key.on_disk_key = MAX_IN_CORE_KEY;
168 max_cpu_key.key_length = 3; 168 max_cpu_key.key_length = 3;
169 169
170#ifdef CONFIG_QUOTA 170#ifdef CONFIG_QUOTA
@@ -216,10 +216,10 @@ static int finish_unfinished (struct super_block * s)
216 216
217 /* reiserfs_iget needs k_dirid and k_objectid only */ 217 /* reiserfs_iget needs k_dirid and k_objectid only */
218 item = B_I_PITEM (bh, ih); 218 item = B_I_PITEM (bh, ih);
219 obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__u32 *)item); 219 obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item);
220 obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); 220 obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid);
221 obj_key.on_disk_key.u.k_offset_v1.k_offset = 0; 221 obj_key.on_disk_key.k_offset = 0;
222 obj_key.on_disk_key.u.k_offset_v1.k_uniqueness = 0; 222 obj_key.on_disk_key.k_type = 0;
223 223
224 pathrelse (&path); 224 pathrelse (&path);
225 225
@@ -304,7 +304,7 @@ void add_save_link (struct reiserfs_transaction_handle * th,
304 int retval; 304 int retval;
305 struct cpu_key key; 305 struct cpu_key key;
306 struct item_head ih; 306 struct item_head ih;
307 __u32 link; 307 __le32 link;
308 308
309 BUG_ON (!th->t_trans_id); 309 BUG_ON (!th->t_trans_id);
310 310
@@ -889,12 +889,18 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
889 char * p; 889 char * p;
890 890
891 p = NULL; 891 p = NULL;
892 /* "resize=NNN" */ 892 /* "resize=NNN" or "resize=auto" */
893 *blocks = simple_strtoul (arg, &p, 0); 893
894 if (*p != '\0') { 894 if (!strcmp(arg, "auto")) {
895 /* NNN does not look like a number */ 895 /* From JFS code, to auto-get the size.*/
896 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); 896 *blocks = s->s_bdev->bd_inode->i_size >> s->s_blocksize_bits;
897 return 0; 897 } else {
898 *blocks = simple_strtoul (arg, &p, 0);
899 if (*p != '\0') {
900 /* NNN does not look like a number */
901 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
902 return 0;
903 }
898 } 904 }
899 } 905 }
900 906
@@ -903,7 +909,8 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
903 unsigned long val = simple_strtoul (arg, &p, 0); 909 unsigned long val = simple_strtoul (arg, &p, 0);
904 /* commit=NNN (time in seconds) */ 910 /* commit=NNN (time in seconds) */
905 if ( *p != '\0' || val >= (unsigned int)-1) { 911 if ( *p != '\0' || val >= (unsigned int)-1) {
906 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); return 0; 912 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
913 return 0;
907 } 914 }
908 *commit_max_age = (unsigned int)val; 915 *commit_max_age = (unsigned int)val;
909 } 916 }
@@ -1329,7 +1336,7 @@ static int read_super_block (struct super_block * s, int offset)
1329 return 1; 1336 return 1;
1330 } 1337 }
1331 1338
1332 if ( rs->s_v1.s_root_block == -1 ) { 1339 if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) {
1333 brelse(bh) ; 1340 brelse(bh) ;
1334 reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n" 1341 reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
1335 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n" 1342 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 650c43ba86c..38ef913767f 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -51,7 +51,10 @@ EXPORT_SYMBOL(seq_open);
51 51
52/** 52/**
53 * seq_read - ->read() method for sequential files. 53 * seq_read - ->read() method for sequential files.
54 * @file, @buf, @size, @ppos: see file_operations method 54 * @file: the file to read from
55 * @buf: the buffer to read to
56 * @size: the maximum number of bytes to read
57 * @ppos: the current position in the file
55 * 58 *
56 * Ready-made ->f_op->read() 59 * Ready-made ->f_op->read()
57 */ 60 */
@@ -219,7 +222,9 @@ Eoverflow:
219 222
220/** 223/**
221 * seq_lseek - ->llseek() method for sequential files. 224 * seq_lseek - ->llseek() method for sequential files.
222 * @file, @offset, @origin: see file_operations method 225 * @file: the file in question
226 * @offset: new position
227 * @origin: 0 for absolute, 1 for relative position
223 * 228 *
224 * Ready-made ->f_op->llseek() 229 * Ready-made ->f_op->llseek()
225 */ 230 */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da25aeb0e06..364208071e1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -96,7 +96,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
96/** 96/**
97 * flush_read_buffer - push buffer to userspace. 97 * flush_read_buffer - push buffer to userspace.
98 * @buffer: data buffer for file. 98 * @buffer: data buffer for file.
99 * @userbuf: user-passed buffer. 99 * @buf: user-passed buffer.
100 * @count: number of bytes requested. 100 * @count: number of bytes requested.
101 * @ppos: file position. 101 * @ppos: file position.
102 * 102 *
@@ -164,7 +164,7 @@ out:
164/** 164/**
165 * fill_write_buffer - copy buffer from userspace. 165 * fill_write_buffer - copy buffer from userspace.
166 * @buffer: data buffer for file. 166 * @buffer: data buffer for file.
167 * @userbuf: data from user. 167 * @buf: data from user.
168 * @count: number of bytes in @userbuf. 168 * @count: number of bytes in @userbuf.
169 * 169 *
170 * Allocate @buffer->page if it hasn't been already, then 170 * Allocate @buffer->page if it hasn't been already, then
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2faa4172b9f..bb40d63f328 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -49,8 +49,7 @@ static int udf_adinicb_readpage(struct file *file, struct page * page)
49 struct inode *inode = page->mapping->host; 49 struct inode *inode = page->mapping->host;
50 char *kaddr; 50 char *kaddr;
51 51
52 if (!PageLocked(page)) 52 BUG_ON(!PageLocked(page));
53 PAGE_BUG(page);
54 53
55 kaddr = kmap(page); 54 kaddr = kmap(page);
56 memset(kaddr, 0, PAGE_CACHE_SIZE); 55 memset(kaddr, 0, PAGE_CACHE_SIZE);
@@ -67,8 +66,7 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb
67 struct inode *inode = page->mapping->host; 66 struct inode *inode = page->mapping->host;
68 char *kaddr; 67 char *kaddr;
69 68
70 if (!PageLocked(page)) 69 BUG_ON(!PageLocked(page));
71 PAGE_BUG(page);
72 70
73 kaddr = kmap(page); 71 kaddr = kmap(page);
74 memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), kaddr, inode->i_size); 72 memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), kaddr, inode->i_size);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0506e117378..3d68de39fad 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -167,8 +167,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
167 } 167 }
168 168
169 page = grab_cache_page(inode->i_mapping, 0); 169 page = grab_cache_page(inode->i_mapping, 0);
170 if (!PageLocked(page)) 170 BUG_ON(!PageLocked(page));
171 PAGE_BUG(page); 171
172 if (!PageUptodate(page)) 172 if (!PageUptodate(page))
173 { 173 {
174 kaddr = kmap(page); 174 kaddr = kmap(page);