aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/conv.c28
-rw-r--r--fs/9p/mux.c15
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/buffer.c65
-rw-r--r--fs/cifs/connect.c11
-rw-r--r--fs/cifs/file.c8
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/compat.c14
-rw-r--r--fs/configfs/configfs_internal.h11
-rw-r--r--fs/configfs/dir.c36
-rw-r--r--fs/configfs/file.c19
-rw-r--r--fs/configfs/inode.c120
-rw-r--r--fs/configfs/mount.c28
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/direct-io.c9
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/fat/file.c50
-rw-r--r--fs/fat/misc.c14
-rw-r--r--fs/fcntl.c7
-rw-r--r--fs/file.c3
-rw-r--r--fs/fuse/dev.c40
-rw-r--r--fs/fuse/file.c9
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c14
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd/transaction.c10
-rw-r--r--fs/jffs/intrep.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/clntproc.c11
-rw-r--r--fs/namei.c39
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfsd/nfssvc.c76
-rw-r--r--fs/ocfs2/buffer_head_io.c10
-rw-r--r--fs/ocfs2/cluster/heartbeat.c5
-rw-r--r--fs/ocfs2/cluster/tcp.c16
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c18
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c24
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c250
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c13
-rw-r--r--fs/ocfs2/dlm/userdlm.c2
-rw-r--r--fs/ocfs2/extent_map.c12
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/inode.h4
-rw-r--r--fs/ocfs2/journal.c32
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/super.c11
-rw-r--r--fs/ocfs2/sysfile.c6
-rw-r--r--fs/ocfs2/uptodate.c12
-rw-r--r--fs/ocfs2/uptodate.h2
-rw-r--r--fs/proc/proc_misc.c2
-rw-r--r--fs/quota_v2.c2
-rw-r--r--fs/reiserfs/dir.c16
-rw-r--r--fs/reiserfs/file.c19
-rw-r--r--fs/reiserfs/fix_node.c50
-rw-r--r--fs/reiserfs/inode.c11
-rw-r--r--fs/reiserfs/journal.c140
-rw-r--r--fs/reiserfs/namei.c16
-rw-r--r--fs/reiserfs/procfs.c3
-rw-r--r--fs/reiserfs/super.c8
-rw-r--r--fs/reiserfs/xattr.c128
-rw-r--r--fs/smbfs/dir.c5
-rw-r--r--fs/udf/balloc.c7
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/super.c10
-rw-r--r--fs/ufs/truncate.c72
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c50
79 files changed, 1043 insertions, 631 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 32a9f99154e2..bf1f10067960 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -116,13 +116,19 @@ static void buf_put_int64(struct cbuf *buf, u64 val)
116 } 116 }
117} 117}
118 118
119static void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) 119static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
120{ 120{
121 char *ret;
122
123 ret = NULL;
121 if (buf_check_size(buf, slen + 2)) { 124 if (buf_check_size(buf, slen + 2)) {
122 buf_put_int16(buf, slen); 125 buf_put_int16(buf, slen);
126 ret = buf->p;
123 memcpy(buf->p, s, slen); 127 memcpy(buf->p, s, slen);
124 buf->p += slen; 128 buf->p += slen;
125 } 129 }
130
131 return ret;
126} 132}
127 133
128static inline void buf_put_string(struct cbuf *buf, const char *s) 134static inline void buf_put_string(struct cbuf *buf, const char *s)
@@ -430,15 +436,19 @@ static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
430static void 436static void
431v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str) 437v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
432{ 438{
433 if (data) { 439 int len;
434 str->len = strlen(data); 440 char *s;
435 str->str = bufp->p; 441
436 } else { 442 if (data)
437 str->len = 0; 443 len = strlen(data);
438 str->str = NULL; 444 else
439 } 445 len = 0;
440 446
441 buf_put_stringn(bufp, data, str->len); 447 s = buf_put_stringn(bufp, data, len);
448 if (str) {
449 str->len = len;
450 str->str = s;
451 }
442} 452}
443 453
444static int 454static int
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 945cb368d451..ea1134eb47c8 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -471,10 +471,13 @@ static void v9fs_write_work(void *a)
471 } 471 }
472 472
473 spin_lock(&m->lock); 473 spin_lock(&m->lock);
474 req = 474again:
475 list_entry(m->unsent_req_list.next, struct v9fs_req, 475 req = list_entry(m->unsent_req_list.next, struct v9fs_req,
476 req_list); 476 req_list);
477 list_move_tail(&req->req_list, &m->req_list); 477 list_move_tail(&req->req_list, &m->req_list);
478 if (req->err == ERREQFLUSH)
479 goto again;
480
478 m->wbuf = req->tcall->sdata; 481 m->wbuf = req->tcall->sdata;
479 m->wsize = req->tcall->size; 482 m->wsize = req->tcall->size;
480 m->wpos = 0; 483 m->wpos = 0;
@@ -525,7 +528,7 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
525 struct v9fs_str *ename; 528 struct v9fs_str *ename;
526 529
527 tag = req->tag; 530 tag = req->tag;
528 if (req->rcall->id == RERROR && !req->err) { 531 if (!req->err && req->rcall->id == RERROR) {
529 ecode = req->rcall->params.rerror.errno; 532 ecode = req->rcall->params.rerror.errno;
530 ename = &req->rcall->params.rerror.error; 533 ename = &req->rcall->params.rerror.error;
531 534
@@ -551,7 +554,10 @@ static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
551 req->err = -EIO; 554 req->err = -EIO;
552 } 555 }
553 556
554 if (req->cb && req->err != ERREQFLUSH) { 557 if (req->err == ERREQFLUSH)
558 return;
559
560 if (req->cb) {
555 dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n", 561 dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
556 req->tcall, req->rcall); 562 req->tcall, req->rcall);
557 563
@@ -812,6 +818,7 @@ v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
812 struct v9fs_mux_rpc *r; 818 struct v9fs_mux_rpc *r;
813 819
814 if (err == ERREQFLUSH) { 820 if (err == ERREQFLUSH) {
821 kfree(rc);
815 dprintk(DEBUG_MUX, "err req flush\n"); 822 dprintk(DEBUG_MUX, "err req flush\n");
816 return; 823 return;
817 } 824 }
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 91f552454c76..63e5b0398e8b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -886,8 +886,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
886 } 886 }
887 887
888 /* copy extension buffer into buffer */ 888 /* copy extension buffer into buffer */
889 if (fcall->params.rstat.stat.extension.len < buflen) 889 if (fcall->params.rstat.stat.extension.len+1 < buflen)
890 buflen = fcall->params.rstat.stat.extension.len; 890 buflen = fcall->params.rstat.stat.extension.len + 1;
891 891
892 memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1); 892 memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
893 buffer[buflen-1] = 0; 893 buffer[buflen-1] = 0;
@@ -951,7 +951,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
951 if (!link) 951 if (!link)
952 link = ERR_PTR(-ENOMEM); 952 link = ERR_PTR(-ENOMEM);
953 else { 953 else {
954 len = v9fs_readlink(dentry, link, strlen(link)); 954 len = v9fs_readlink(dentry, link, PATH_MAX);
955 955
956 if (len < 0) { 956 if (len < 0) {
957 __putname(link); 957 __putname(link);
diff --git a/fs/Kconfig b/fs/Kconfig
index ef78e3a42d32..e9749b0eecd8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -883,8 +883,6 @@ config CONFIGFS_FS
883 Both sysfs and configfs can and should exist together on the 883 Both sysfs and configfs can and should exist together on the
884 same system. One is not a replacement for the other. 884 same system. One is not a replacement for the other.
885 885
886 If unsure, say N.
887
888endmenu 886endmenu
889 887
890menu "Miscellaneous filesystems" 888menu "Miscellaneous filesystems"
@@ -1327,7 +1325,7 @@ config UFS_FS
1327 1325
1328config UFS_FS_WRITE 1326config UFS_FS_WRITE
1329 bool "UFS file system write support (DANGEROUS)" 1327 bool "UFS file system write support (DANGEROUS)"
1330 depends on UFS_FS && EXPERIMENTAL 1328 depends on UFS_FS && EXPERIMENTAL && BROKEN
1331 help 1329 help
1332 Say Y here if you want to try writing to UFS partitions. This is 1330 Say Y here if you want to try writing to UFS partitions. This is
1333 experimental, so you should back up your UFS partitions beforehand. 1331 experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/buffer.c b/fs/buffer.c
index 3dc712f29d2d..62cfd17dc5fe 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1022,6 +1022,7 @@ try_again:
1022 1022
1023 bh->b_state = 0; 1023 bh->b_state = 0;
1024 atomic_set(&bh->b_count, 0); 1024 atomic_set(&bh->b_count, 0);
1025 bh->b_private = NULL;
1025 bh->b_size = size; 1026 bh->b_size = size;
1026 1027
1027 /* Link the buffer to its page */ 1028 /* Link the buffer to its page */
@@ -2866,22 +2867,22 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2866 else if (test_set_buffer_locked(bh)) 2867 else if (test_set_buffer_locked(bh))
2867 continue; 2868 continue;
2868 2869
2869 get_bh(bh);
2870 if (rw == WRITE || rw == SWRITE) { 2870 if (rw == WRITE || rw == SWRITE) {
2871 if (test_clear_buffer_dirty(bh)) { 2871 if (test_clear_buffer_dirty(bh)) {
2872 bh->b_end_io = end_buffer_write_sync; 2872 bh->b_end_io = end_buffer_write_sync;
2873 get_bh(bh);
2873 submit_bh(WRITE, bh); 2874 submit_bh(WRITE, bh);
2874 continue; 2875 continue;
2875 } 2876 }
2876 } else { 2877 } else {
2877 if (!buffer_uptodate(bh)) { 2878 if (!buffer_uptodate(bh)) {
2878 bh->b_end_io = end_buffer_read_sync; 2879 bh->b_end_io = end_buffer_read_sync;
2880 get_bh(bh);
2879 submit_bh(rw, bh); 2881 submit_bh(rw, bh);
2880 continue; 2882 continue;
2881 } 2883 }
2882 } 2884 }
2883 unlock_buffer(bh); 2885 unlock_buffer(bh);
2884 put_bh(bh);
2885 } 2886 }
2886} 2887}
2887 2888
@@ -3050,6 +3051,66 @@ asmlinkage long sys_bdflush(int func, long data)
3050} 3051}
3051 3052
3052/* 3053/*
3054 * Migration function for pages with buffers. This function can only be used
3055 * if the underlying filesystem guarantees that no other references to "page"
3056 * exist.
3057 */
3058#ifdef CONFIG_MIGRATION
3059int buffer_migrate_page(struct page *newpage, struct page *page)
3060{
3061 struct address_space *mapping = page->mapping;
3062 struct buffer_head *bh, *head;
3063
3064 if (!mapping)
3065 return -EAGAIN;
3066
3067 if (!page_has_buffers(page))
3068 return migrate_page(newpage, page);
3069
3070 head = page_buffers(page);
3071
3072 if (migrate_page_remove_references(newpage, page, 3))
3073 return -EAGAIN;
3074
3075 bh = head;
3076 do {
3077 get_bh(bh);
3078 lock_buffer(bh);
3079 bh = bh->b_this_page;
3080
3081 } while (bh != head);
3082
3083 ClearPagePrivate(page);
3084 set_page_private(newpage, page_private(page));
3085 set_page_private(page, 0);
3086 put_page(page);
3087 get_page(newpage);
3088
3089 bh = head;
3090 do {
3091 set_bh_page(bh, newpage, bh_offset(bh));
3092 bh = bh->b_this_page;
3093
3094 } while (bh != head);
3095
3096 SetPagePrivate(newpage);
3097
3098 migrate_page_copy(newpage, page);
3099
3100 bh = head;
3101 do {
3102 unlock_buffer(bh);
3103 put_bh(bh);
3104 bh = bh->b_this_page;
3105
3106 } while (bh != head);
3107
3108 return 0;
3109}
3110EXPORT_SYMBOL(buffer_migrate_page);
3111#endif
3112
3113/*
3053 * Buffer-head allocation 3114 * Buffer-head allocation
3054 */ 3115 */
3055static kmem_cache_t *bh_cachep; 3116static kmem_cache_t *bh_cachep;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 88f60aa52058..e488603fb1e7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1785,7 +1785,16 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1785 } else if(volume_info.wsize) 1785 } else if(volume_info.wsize)
1786 cifs_sb->wsize = volume_info.wsize; 1786 cifs_sb->wsize = volume_info.wsize;
1787 else 1787 else
1788 cifs_sb->wsize = CIFSMaxBufSize; /* default */ 1788 cifs_sb->wsize =
1789 min_t(const int, PAGEVEC_SIZE * PAGE_CACHE_SIZE,
1790 127*1024);
1791 /* old default of CIFSMaxBufSize was too small now
1792 that SMB Write2 can send multiple pages in kvec.
1793 RFC1001 does not describe what happens when frame
1794 bigger than 128K is sent so use that as max in
1795 conjunction with 52K kvec constraint on arch with 4K
1796 page size */
1797
1789 if(cifs_sb->rsize < PAGE_CACHE_SIZE) { 1798 if(cifs_sb->rsize < PAGE_CACHE_SIZE) {
1790 cifs_sb->rsize = PAGE_CACHE_SIZE; 1799 cifs_sb->rsize = PAGE_CACHE_SIZE;
1791 /* Windows ME does this */ 1800 /* Windows ME does this */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 77c990f0cb98..d17c97d07c80 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1190,7 +1190,6 @@ retry:
1190 /* BB what if continued retry is 1190 /* BB what if continued retry is
1191 requested via mount flags? */ 1191 requested via mount flags? */
1192 set_bit(AS_EIO, &mapping->flags); 1192 set_bit(AS_EIO, &mapping->flags);
1193 SetPageError(page);
1194 } else { 1193 } else {
1195 cifs_stats_bytes_written(cifs_sb->tcon, 1194 cifs_stats_bytes_written(cifs_sb->tcon,
1196 bytes_written); 1195 bytes_written);
@@ -1198,6 +1197,13 @@ retry:
1198 } 1197 }
1199 for (i = 0; i < n_iov; i++) { 1198 for (i = 0; i < n_iov; i++) {
1200 page = pvec.pages[first + i]; 1199 page = pvec.pages[first + i];
1200 /* Should we also set page error on
1201 success rc but too little data written? */
1202 /* BB investigate retry logic on temporary
1203 server crash cases and how recovery works
1204 when page marked as error */
1205 if(rc)
1206 SetPageError(page);
1201 kunmap(page); 1207 kunmap(page);
1202 unlock_page(page); 1208 unlock_page(page);
1203 page_cache_release(page); 1209 page_cache_release(page);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7b98792150ea..b12cb8a7da7c 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -498,7 +498,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
498 else 498 else
499 *pRespBufType = CIFS_SMALL_BUFFER; 499 *pRespBufType = CIFS_SMALL_BUFFER;
500 iov[0].iov_len = receive_len + 4; 500 iov[0].iov_len = receive_len + 4;
501 iov[1].iov_len = 0;
502 501
503 dump_smb(midQ->resp_buf, 80); 502 dump_smb(midQ->resp_buf, 80);
504 /* convert the length into a more usable form */ 503 /* convert the length into a more usable form */
diff --git a/fs/compat.c b/fs/compat.c
index ff0bafcff720..70c5af4cc270 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -73,17 +73,17 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
73 return do_utimes(AT_FDCWD, filename, t ? tv : NULL); 73 return do_utimes(AT_FDCWD, filename, t ? tv : NULL);
74} 74}
75 75
76asmlinkage long compat_sys_futimesat(int dfd, char __user *filename, struct compat_timeval __user *t) 76asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, struct compat_timeval __user *t)
77{ 77{
78 struct timeval tv[2]; 78 struct timeval tv[2];
79 79
80 if (t) { 80 if (t) {
81 if (get_user(tv[0].tv_sec, &t[0].tv_sec) || 81 if (get_user(tv[0].tv_sec, &t[0].tv_sec) ||
82 get_user(tv[0].tv_usec, &t[0].tv_usec) || 82 get_user(tv[0].tv_usec, &t[0].tv_usec) ||
83 get_user(tv[1].tv_sec, &t[1].tv_sec) || 83 get_user(tv[1].tv_sec, &t[1].tv_sec) ||
84 get_user(tv[1].tv_usec, &t[1].tv_usec)) 84 get_user(tv[1].tv_usec, &t[1].tv_usec))
85 return -EFAULT; 85 return -EFAULT;
86 } 86 }
87 return do_utimes(dfd, filename, t ? tv : NULL); 87 return do_utimes(dfd, filename, t ? tv : NULL);
88} 88}
89 89
@@ -114,7 +114,7 @@ asmlinkage long compat_sys_newlstat(char __user * filename,
114 return error; 114 return error;
115} 115}
116 116
117asmlinkage long compat_sys_newfstatat(int dfd, char __user *filename, 117asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
118 struct compat_stat __user *statbuf, int flag) 118 struct compat_stat __user *statbuf, int flag)
119{ 119{
120 struct kstat stat; 120 struct kstat stat;
@@ -1326,7 +1326,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
1326 * O_LARGEFILE flag. 1326 * O_LARGEFILE flag.
1327 */ 1327 */
1328asmlinkage long 1328asmlinkage long
1329compat_sys_openat(int dfd, const char __user *filename, int flags, int mode) 1329compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode)
1330{ 1330{
1331 return do_sys_open(dfd, filename, flags, mode); 1331 return do_sys_open(dfd, filename, flags, mode);
1332} 1332}
@@ -1781,7 +1781,7 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1781{ 1781{
1782 compat_sigset_t ss32; 1782 compat_sigset_t ss32;
1783 sigset_t ksigmask, sigsaved; 1783 sigset_t ksigmask, sigsaved;
1784 long timeout = MAX_SCHEDULE_TIMEOUT; 1784 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1785 struct compat_timespec ts; 1785 struct compat_timespec ts;
1786 int ret; 1786 int ret;
1787 1787
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 8899d9c5f6bf..f70e46951b37 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -36,6 +36,7 @@ struct configfs_dirent {
36 int s_type; 36 int s_type;
37 umode_t s_mode; 37 umode_t s_mode;
38 struct dentry * s_dentry; 38 struct dentry * s_dentry;
39 struct iattr * s_iattr;
39}; 40};
40 41
41#define CONFIGFS_ROOT 0x0001 42#define CONFIGFS_ROOT 0x0001
@@ -48,10 +49,11 @@ struct configfs_dirent {
48#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 49#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
49 50
50extern struct vfsmount * configfs_mount; 51extern struct vfsmount * configfs_mount;
52extern kmem_cache_t *configfs_dir_cachep;
51 53
52extern int configfs_is_root(struct config_item *item); 54extern int configfs_is_root(struct config_item *item);
53 55
54extern struct inode * configfs_new_inode(mode_t mode); 56extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
55extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 57extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
56 58
57extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 59extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
@@ -63,6 +65,7 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
63 65
64extern const unsigned char * configfs_get_name(struct configfs_dirent *sd); 66extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
65extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent); 67extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
68extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
66 69
67extern int configfs_pin_fs(void); 70extern int configfs_pin_fs(void);
68extern void configfs_release_fs(void); 71extern void configfs_release_fs(void);
@@ -120,8 +123,10 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
120 123
121static inline void release_configfs_dirent(struct configfs_dirent * sd) 124static inline void release_configfs_dirent(struct configfs_dirent * sd)
122{ 125{
123 if (!(sd->s_type & CONFIGFS_ROOT)) 126 if (!(sd->s_type & CONFIGFS_ROOT)) {
124 kfree(sd); 127 kfree(sd->s_iattr);
128 kmem_cache_free(configfs_dir_cachep, sd);
129 }
125} 130}
126 131
127static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd) 132static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index b668ec61527e..ca60e3abef45 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -72,7 +72,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
72{ 72{
73 struct configfs_dirent * sd; 73 struct configfs_dirent * sd;
74 74
75 sd = kmalloc(sizeof(*sd), GFP_KERNEL); 75 sd = kmem_cache_alloc(configfs_dir_cachep, GFP_KERNEL);
76 if (!sd) 76 if (!sd)
77 return NULL; 77 return NULL;
78 78
@@ -136,13 +136,19 @@ static int create_dir(struct config_item * k, struct dentry * p,
136 int error; 136 int error;
137 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 137 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
138 138
139 error = configfs_create(d, mode, init_dir); 139 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
140 CONFIGFS_DIR);
140 if (!error) { 141 if (!error) {
141 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 142 error = configfs_create(d, mode, init_dir);
142 CONFIGFS_DIR);
143 if (!error) { 143 if (!error) {
144 p->d_inode->i_nlink++; 144 p->d_inode->i_nlink++;
145 (d)->d_op = &configfs_dentry_ops; 145 (d)->d_op = &configfs_dentry_ops;
146 } else {
147 struct configfs_dirent *sd = d->d_fsdata;
148 if (sd) {
149 list_del_init(&sd->s_sibling);
150 configfs_put(sd);
151 }
146 } 152 }
147 } 153 }
148 return error; 154 return error;
@@ -182,12 +188,19 @@ int configfs_create_link(struct configfs_symlink *sl,
182 int err = 0; 188 int err = 0;
183 umode_t mode = S_IFLNK | S_IRWXUGO; 189 umode_t mode = S_IFLNK | S_IRWXUGO;
184 190
185 err = configfs_create(dentry, mode, init_symlink); 191 err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode,
192 CONFIGFS_ITEM_LINK);
186 if (!err) { 193 if (!err) {
187 err = configfs_make_dirent(parent->d_fsdata, dentry, sl, 194 err = configfs_create(dentry, mode, init_symlink);
188 mode, CONFIGFS_ITEM_LINK);
189 if (!err) 195 if (!err)
190 dentry->d_op = &configfs_dentry_ops; 196 dentry->d_op = &configfs_dentry_ops;
197 else {
198 struct configfs_dirent *sd = dentry->d_fsdata;
199 if (sd) {
200 list_del_init(&sd->s_sibling);
201 configfs_put(sd);
202 }
203 }
191 } 204 }
192 return err; 205 return err;
193} 206}
@@ -241,13 +254,15 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
241 struct configfs_attribute * attr = sd->s_element; 254 struct configfs_attribute * attr = sd->s_element;
242 int error; 255 int error;
243 256
257 dentry->d_fsdata = configfs_get(sd);
258 sd->s_dentry = dentry;
244 error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file); 259 error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file);
245 if (error) 260 if (error) {
261 configfs_put(sd);
246 return error; 262 return error;
263 }
247 264
248 dentry->d_op = &configfs_dentry_ops; 265 dentry->d_op = &configfs_dentry_ops;
249 dentry->d_fsdata = configfs_get(sd);
250 sd->s_dentry = dentry;
251 d_rehash(dentry); 266 d_rehash(dentry);
252 267
253 return 0; 268 return 0;
@@ -839,6 +854,7 @@ struct inode_operations configfs_dir_inode_operations = {
839 .symlink = configfs_symlink, 854 .symlink = configfs_symlink,
840 .unlink = configfs_unlink, 855 .unlink = configfs_unlink,
841 .lookup = configfs_lookup, 856 .lookup = configfs_lookup,
857 .setattr = configfs_setattr,
842}; 858};
843 859
844#if 0 860#if 0
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index c26cd61f13af..3921920d8716 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/dnotify.h>
30#include <linux/slab.h> 29#include <linux/slab.h>
31#include <asm/uaccess.h> 30#include <asm/uaccess.h>
32#include <asm/semaphore.h> 31#include <asm/semaphore.h>
@@ -150,7 +149,7 @@ out:
150/** 149/**
151 * fill_write_buffer - copy buffer from userspace. 150 * fill_write_buffer - copy buffer from userspace.
152 * @buffer: data buffer for file. 151 * @buffer: data buffer for file.
153 * @userbuf: data from user. 152 * @buf: data from user.
154 * @count: number of bytes in @userbuf. 153 * @count: number of bytes in @userbuf.
155 * 154 *
156 * Allocate @buffer->page if it hasn't been already, then 155 * Allocate @buffer->page if it hasn't been already, then
@@ -177,8 +176,9 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
177 176
178/** 177/**
179 * flush_write_buffer - push buffer to config_item. 178 * flush_write_buffer - push buffer to config_item.
180 * @file: file pointer. 179 * @dentry: dentry to the attribute
181 * @buffer: data buffer for file. 180 * @buffer: data buffer for file.
181 * @count: number of bytes
182 * 182 *
183 * Get the correct pointers for the config_item and the attribute we're 183 * Get the correct pointers for the config_item and the attribute we're
184 * dealing with, then call the store() method for the attribute, 184 * dealing with, then call the store() method for the attribute,
@@ -217,15 +217,16 @@ static ssize_t
217configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 217configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
218{ 218{
219 struct configfs_buffer * buffer = file->private_data; 219 struct configfs_buffer * buffer = file->private_data;
220 ssize_t len;
220 221
221 down(&buffer->sem); 222 down(&buffer->sem);
222 count = fill_write_buffer(buffer,buf,count); 223 len = fill_write_buffer(buffer, buf, count);
223 if (count > 0) 224 if (len > 0)
224 count = flush_write_buffer(file->f_dentry,buffer,count); 225 len = flush_write_buffer(file->f_dentry, buffer, count);
225 if (count > 0) 226 if (len > 0)
226 *ppos += count; 227 *ppos += len;
227 up(&buffer->sem); 228 up(&buffer->sem);
228 return count; 229 return len;
229} 230}
230 231
231static int check_perm(struct inode * inode, struct file * file) 232static int check_perm(struct inode * inode, struct file * file)
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 6577c588de9d..c153bd9534cb 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/namei.h> 32#include <linux/namei.h>
33#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
34#include <linux/capability.h>
34 35
35#include <linux/configfs.h> 36#include <linux/configfs.h>
36#include "configfs_internal.h" 37#include "configfs_internal.h"
@@ -48,18 +49,107 @@ static struct backing_dev_info configfs_backing_dev_info = {
48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 49 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
49}; 50};
50 51
51struct inode * configfs_new_inode(mode_t mode) 52static struct inode_operations configfs_inode_operations ={
53 .setattr = configfs_setattr,
54};
55
56int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
57{
58 struct inode * inode = dentry->d_inode;
59 struct configfs_dirent * sd = dentry->d_fsdata;
60 struct iattr * sd_iattr;
61 unsigned int ia_valid = iattr->ia_valid;
62 int error;
63
64 if (!sd)
65 return -EINVAL;
66
67 sd_iattr = sd->s_iattr;
68
69 error = inode_change_ok(inode, iattr);
70 if (error)
71 return error;
72
73 error = inode_setattr(inode, iattr);
74 if (error)
75 return error;
76
77 if (!sd_iattr) {
78 /* setting attributes for the first time, allocate now */
79 sd_iattr = kmalloc(sizeof(struct iattr), GFP_KERNEL);
80 if (!sd_iattr)
81 return -ENOMEM;
82 /* assign default attributes */
83 memset(sd_iattr, 0, sizeof(struct iattr));
84 sd_iattr->ia_mode = sd->s_mode;
85 sd_iattr->ia_uid = 0;
86 sd_iattr->ia_gid = 0;
87 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
88 sd->s_iattr = sd_iattr;
89 }
90
91 /* attributes were changed atleast once in past */
92
93 if (ia_valid & ATTR_UID)
94 sd_iattr->ia_uid = iattr->ia_uid;
95 if (ia_valid & ATTR_GID)
96 sd_iattr->ia_gid = iattr->ia_gid;
97 if (ia_valid & ATTR_ATIME)
98 sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
99 inode->i_sb->s_time_gran);
100 if (ia_valid & ATTR_MTIME)
101 sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
102 inode->i_sb->s_time_gran);
103 if (ia_valid & ATTR_CTIME)
104 sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
105 inode->i_sb->s_time_gran);
106 if (ia_valid & ATTR_MODE) {
107 umode_t mode = iattr->ia_mode;
108
109 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
110 mode &= ~S_ISGID;
111 sd_iattr->ia_mode = sd->s_mode = mode;
112 }
113
114 return error;
115}
116
117static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
118{
119 inode->i_mode = mode;
120 inode->i_uid = 0;
121 inode->i_gid = 0;
122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
123}
124
125static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
126{
127 inode->i_mode = iattr->ia_mode;
128 inode->i_uid = iattr->ia_uid;
129 inode->i_gid = iattr->ia_gid;
130 inode->i_atime = iattr->ia_atime;
131 inode->i_mtime = iattr->ia_mtime;
132 inode->i_ctime = iattr->ia_ctime;
133}
134
135struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
52{ 136{
53 struct inode * inode = new_inode(configfs_sb); 137 struct inode * inode = new_inode(configfs_sb);
54 if (inode) { 138 if (inode) {
55 inode->i_mode = mode;
56 inode->i_uid = 0;
57 inode->i_gid = 0;
58 inode->i_blksize = PAGE_CACHE_SIZE; 139 inode->i_blksize = PAGE_CACHE_SIZE;
59 inode->i_blocks = 0; 140 inode->i_blocks = 0;
60 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
61 inode->i_mapping->a_ops = &configfs_aops; 141 inode->i_mapping->a_ops = &configfs_aops;
62 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; 142 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
143 inode->i_op = &configfs_inode_operations;
144
145 if (sd->s_iattr) {
146 /* sysfs_dirent has non-default attributes
147 * get them for the new inode from persistent copy
148 * in sysfs_dirent
149 */
150 set_inode_attr(inode, sd->s_iattr);
151 } else
152 set_default_inode_attr(inode, mode);
63 } 153 }
64 return inode; 154 return inode;
65} 155}
@@ -70,7 +160,8 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
70 struct inode * inode = NULL; 160 struct inode * inode = NULL;
71 if (dentry) { 161 if (dentry) {
72 if (!dentry->d_inode) { 162 if (!dentry->d_inode) {
73 if ((inode = configfs_new_inode(mode))) { 163 struct configfs_dirent *sd = dentry->d_fsdata;
164 if ((inode = configfs_new_inode(mode, sd))) {
74 if (dentry->d_parent && dentry->d_parent->d_inode) { 165 if (dentry->d_parent && dentry->d_parent->d_inode) {
75 struct inode *p_inode = dentry->d_parent->d_inode; 166 struct inode *p_inode = dentry->d_parent->d_inode;
76 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; 167 p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
@@ -103,10 +194,9 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
103 */ 194 */
104const unsigned char * configfs_get_name(struct configfs_dirent *sd) 195const unsigned char * configfs_get_name(struct configfs_dirent *sd)
105{ 196{
106 struct attribute * attr; 197 struct configfs_attribute *attr;
107 198
108 if (!sd || !sd->s_element) 199 BUG_ON(!sd || !sd->s_element);
109 BUG();
110 200
111 /* These always have a dentry, so use that */ 201 /* These always have a dentry, so use that */
112 if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK)) 202 if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK))
@@ -114,7 +204,7 @@ const unsigned char * configfs_get_name(struct configfs_dirent *sd)
114 204
115 if (sd->s_type & CONFIGFS_ITEM_ATTR) { 205 if (sd->s_type & CONFIGFS_ITEM_ATTR) {
116 attr = sd->s_element; 206 attr = sd->s_element;
117 return attr->name; 207 return attr->ca_name;
118 } 208 }
119 return NULL; 209 return NULL;
120} 210}
@@ -130,13 +220,17 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
130 220
131 if (dentry) { 221 if (dentry) {
132 spin_lock(&dcache_lock); 222 spin_lock(&dcache_lock);
223 spin_lock(&dentry->d_lock);
133 if (!(d_unhashed(dentry) && dentry->d_inode)) { 224 if (!(d_unhashed(dentry) && dentry->d_inode)) {
134 dget_locked(dentry); 225 dget_locked(dentry);
135 __d_drop(dentry); 226 __d_drop(dentry);
227 spin_unlock(&dentry->d_lock);
136 spin_unlock(&dcache_lock); 228 spin_unlock(&dcache_lock);
137 simple_unlink(parent->d_inode, dentry); 229 simple_unlink(parent->d_inode, dentry);
138 } else 230 } else {
231 spin_unlock(&dentry->d_lock);
139 spin_unlock(&dcache_lock); 232 spin_unlock(&dcache_lock);
233 }
140 } 234 }
141} 235}
142 236
@@ -145,6 +239,10 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
145 struct configfs_dirent * sd; 239 struct configfs_dirent * sd;
146 struct configfs_dirent * parent_sd = dir->d_fsdata; 240 struct configfs_dirent * parent_sd = dir->d_fsdata;
147 241
242 if (dir->d_inode == NULL)
243 /* no inode means this hasn't been made visible yet */
244 return;
245
148 mutex_lock(&dir->d_inode->i_mutex); 246 mutex_lock(&dir->d_inode->i_mutex);
149 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 247 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
150 if (!sd->s_element) 248 if (!sd->s_element)
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 1a2f6f6a4d91..f920d30478e5 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -38,6 +38,7 @@
38 38
39struct vfsmount * configfs_mount = NULL; 39struct vfsmount * configfs_mount = NULL;
40struct super_block * configfs_sb = NULL; 40struct super_block * configfs_sb = NULL;
41kmem_cache_t *configfs_dir_cachep;
41static int configfs_mnt_count = 0; 42static int configfs_mnt_count = 0;
42 43
43static struct super_operations configfs_ops = { 44static struct super_operations configfs_ops = {
@@ -62,6 +63,7 @@ static struct configfs_dirent configfs_root = {
62 .s_children = LIST_HEAD_INIT(configfs_root.s_children), 63 .s_children = LIST_HEAD_INIT(configfs_root.s_children),
63 .s_element = &configfs_root_group.cg_item, 64 .s_element = &configfs_root_group.cg_item,
64 .s_type = CONFIGFS_ROOT, 65 .s_type = CONFIGFS_ROOT,
66 .s_iattr = NULL,
65}; 67};
66 68
67static int configfs_fill_super(struct super_block *sb, void *data, int silent) 69static int configfs_fill_super(struct super_block *sb, void *data, int silent)
@@ -73,9 +75,11 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
73 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 75 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
74 sb->s_magic = CONFIGFS_MAGIC; 76 sb->s_magic = CONFIGFS_MAGIC;
75 sb->s_op = &configfs_ops; 77 sb->s_op = &configfs_ops;
78 sb->s_time_gran = 1;
76 configfs_sb = sb; 79 configfs_sb = sb;
77 80
78 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 81 inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
82 &configfs_root);
79 if (inode) { 83 if (inode) {
80 inode->i_op = &configfs_dir_inode_operations; 84 inode->i_op = &configfs_dir_inode_operations;
81 inode->i_fop = &configfs_dir_operations; 85 inode->i_fop = &configfs_dir_operations;
@@ -128,19 +132,31 @@ static decl_subsys(config, NULL, NULL);
128 132
129static int __init configfs_init(void) 133static int __init configfs_init(void)
130{ 134{
131 int err; 135 int err = -ENOMEM;
136
137 configfs_dir_cachep = kmem_cache_create("configfs_dir_cache",
138 sizeof(struct configfs_dirent),
139 0, 0, NULL, NULL);
140 if (!configfs_dir_cachep)
141 goto out;
132 142
133 kset_set_kset_s(&config_subsys, kernel_subsys); 143 kset_set_kset_s(&config_subsys, kernel_subsys);
134 err = subsystem_register(&config_subsys); 144 err = subsystem_register(&config_subsys);
135 if (err) 145 if (err) {
136 return err; 146 kmem_cache_destroy(configfs_dir_cachep);
147 configfs_dir_cachep = NULL;
148 goto out;
149 }
137 150
138 err = register_filesystem(&configfs_fs_type); 151 err = register_filesystem(&configfs_fs_type);
139 if (err) { 152 if (err) {
140 printk(KERN_ERR "configfs: Unable to register filesystem!\n"); 153 printk(KERN_ERR "configfs: Unable to register filesystem!\n");
141 subsystem_unregister(&config_subsys); 154 subsystem_unregister(&config_subsys);
155 kmem_cache_destroy(configfs_dir_cachep);
156 configfs_dir_cachep = NULL;
142 } 157 }
143 158
159out:
144 return err; 160 return err;
145} 161}
146 162
@@ -148,11 +164,13 @@ static void __exit configfs_exit(void)
148{ 164{
149 unregister_filesystem(&configfs_fs_type); 165 unregister_filesystem(&configfs_fs_type);
150 subsystem_unregister(&config_subsys); 166 subsystem_unregister(&config_subsys);
167 kmem_cache_destroy(configfs_dir_cachep);
168 configfs_dir_cachep = NULL;
151} 169}
152 170
153MODULE_AUTHOR("Oracle"); 171MODULE_AUTHOR("Oracle");
154MODULE_LICENSE("GPL"); 172MODULE_LICENSE("GPL");
155MODULE_VERSION("0.0.1"); 173MODULE_VERSION("0.0.2");
156MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); 174MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
157 175
158module_init(configfs_init); 176module_init(configfs_init);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 50f5840521a9..e5512e295cf2 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -162,8 +162,7 @@ int configfs_unlink(struct inode *dir, struct dentry *dentry)
162 if (!(sd->s_type & CONFIGFS_ITEM_LINK)) 162 if (!(sd->s_type & CONFIGFS_ITEM_LINK))
163 goto out; 163 goto out;
164 164
165 if (dentry->d_parent == configfs_sb->s_root) 165 BUG_ON(dentry->d_parent == configfs_sb->s_root);
166 BUG();
167 166
168 sl = sd->s_element; 167 sl = sd->s_element;
169 168
@@ -277,5 +276,6 @@ struct inode_operations configfs_symlink_inode_operations = {
277 .follow_link = configfs_follow_link, 276 .follow_link = configfs_follow_link,
278 .readlink = generic_readlink, 277 .readlink = generic_readlink,
279 .put_link = configfs_put_link, 278 .put_link = configfs_put_link,
279 .setattr = configfs_setattr,
280}; 280};
281 281
diff --git a/fs/dcache.c b/fs/dcache.c
index 86bdb93789c6..a173bba32666 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -743,7 +743,9 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
743 dentry->d_op = NULL; 743 dentry->d_op = NULL;
744 dentry->d_fsdata = NULL; 744 dentry->d_fsdata = NULL;
745 dentry->d_mounted = 0; 745 dentry->d_mounted = 0;
746#ifdef CONFIG_PROFILING
746 dentry->d_cookie = NULL; 747 dentry->d_cookie = NULL;
748#endif
747 INIT_HLIST_NODE(&dentry->d_hash); 749 INIT_HLIST_NODE(&dentry->d_hash);
748 INIT_LIST_HEAD(&dentry->d_lru); 750 INIT_LIST_HEAD(&dentry->d_lru);
749 INIT_LIST_HEAD(&dentry->d_subdirs); 751 INIT_LIST_HEAD(&dentry->d_subdirs);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 30dbbd1df511..848044af7e16 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -857,6 +857,7 @@ do_holes:
857 /* Handle holes */ 857 /* Handle holes */
858 if (!buffer_mapped(map_bh)) { 858 if (!buffer_mapped(map_bh)) {
859 char *kaddr; 859 char *kaddr;
860 loff_t i_size_aligned;
860 861
861 /* AKPM: eargh, -ENOTBLK is a hack */ 862 /* AKPM: eargh, -ENOTBLK is a hack */
862 if (dio->rw == WRITE) { 863 if (dio->rw == WRITE) {
@@ -864,8 +865,14 @@ do_holes:
864 return -ENOTBLK; 865 return -ENOTBLK;
865 } 866 }
866 867
868 /*
869 * Be sure to account for a partial block as the
870 * last block in the file
871 */
872 i_size_aligned = ALIGN(i_size_read(dio->inode),
873 1 << blkbits);
867 if (dio->block_in_file >= 874 if (dio->block_in_file >=
868 i_size_read(dio->inode)>>blkbits) { 875 i_size_aligned >> blkbits) {
869 /* We hit eof */ 876 /* We hit eof */
870 page_cache_release(page); 877 page_cache_release(page);
871 goto out; 878 goto out;
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 35acc43b897f..da52b4a5db64 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -220,7 +220,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
220 struct ext2_inode_info *ei = EXT2_I(inode); 220 struct ext2_inode_info *ei = EXT2_I(inode);
221 int name_index; 221 int name_index;
222 void *value = NULL; 222 void *value = NULL;
223 size_t size; 223 size_t size = 0;
224 int error; 224 int error;
225 225
226 if (S_ISLNK(inode->i_mode)) 226 if (S_ISLNK(inode->i_mode))
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 74714af4ae69..e52765219e16 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -605,7 +605,7 @@ got:
605 insert_inode_hash(inode); 605 insert_inode_hash(inode);
606 606
607 if (DQUOT_ALLOC_INODE(inode)) { 607 if (DQUOT_ALLOC_INODE(inode)) {
608 err = -ENOSPC; 608 err = -EDQUOT;
609 goto fail_drop; 609 goto fail_drop;
610 } 610 }
611 611
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e7d3f0522d01..a717837f272e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -706,6 +706,7 @@ struct address_space_operations ext2_aops = {
706 .bmap = ext2_bmap, 706 .bmap = ext2_bmap,
707 .direct_IO = ext2_direct_IO, 707 .direct_IO = ext2_direct_IO,
708 .writepages = ext2_writepages, 708 .writepages = ext2_writepages,
709 .migratepage = buffer_migrate_page,
709}; 710};
710 711
711struct address_space_operations ext2_aops_xip = { 712struct address_space_operations ext2_aops_xip = {
@@ -723,6 +724,7 @@ struct address_space_operations ext2_nobh_aops = {
723 .bmap = ext2_bmap, 724 .bmap = ext2_bmap,
724 .direct_IO = ext2_direct_IO, 725 .direct_IO = ext2_direct_IO,
725 .writepages = ext2_writepages, 726 .writepages = ext2_writepages,
727 .migratepage = buffer_migrate_page,
726}; 728};
727 729
728/* 730/*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 8d6819846fc9..cb6f9bd658de 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -221,6 +221,11 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
221 seq_puts(seq, ",grpquota"); 221 seq_puts(seq, ",grpquota");
222#endif 222#endif
223 223
224#if defined(CONFIG_EXT2_FS_XIP)
225 if (sbi->s_mount_opt & EXT2_MOUNT_XIP)
226 seq_puts(seq, ",xip");
227#endif
228
224 return 0; 229 return 0;
225} 230}
226 231
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 47a9da2dfb4f..0d21d558b87a 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -226,7 +226,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
226 struct ext3_inode_info *ei = EXT3_I(inode); 226 struct ext3_inode_info *ei = EXT3_I(inode);
227 int name_index; 227 int name_index;
228 void *value = NULL; 228 void *value = NULL;
229 size_t size; 229 size_t size = 0;
230 int error; 230 int error;
231 231
232 if (S_ISLNK(inode->i_mode)) 232 if (S_ISLNK(inode->i_mode))
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8824e84f8a56..3fc4238e9703 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,6 +1559,7 @@ static struct address_space_operations ext3_ordered_aops = {
1559 .invalidatepage = ext3_invalidatepage, 1559 .invalidatepage = ext3_invalidatepage,
1560 .releasepage = ext3_releasepage, 1560 .releasepage = ext3_releasepage,
1561 .direct_IO = ext3_direct_IO, 1561 .direct_IO = ext3_direct_IO,
1562 .migratepage = buffer_migrate_page,
1562}; 1563};
1563 1564
1564static struct address_space_operations ext3_writeback_aops = { 1565static struct address_space_operations ext3_writeback_aops = {
@@ -1572,6 +1573,7 @@ static struct address_space_operations ext3_writeback_aops = {
1572 .invalidatepage = ext3_invalidatepage, 1573 .invalidatepage = ext3_invalidatepage,
1573 .releasepage = ext3_releasepage, 1574 .releasepage = ext3_releasepage,
1574 .direct_IO = ext3_direct_IO, 1575 .direct_IO = ext3_direct_IO,
1576 .migratepage = buffer_migrate_page,
1575}; 1577};
1576 1578
1577static struct address_space_operations ext3_journalled_aops = { 1579static struct address_space_operations ext3_journalled_aops = {
diff --git a/fs/fat/file.c b/fs/fat/file.c
index e99c5a73b39e..88aa1ae13f9f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -210,10 +210,30 @@ static int fat_free(struct inode *inode, int skip)
210 if (MSDOS_I(inode)->i_start == 0) 210 if (MSDOS_I(inode)->i_start == 0)
211 return 0; 211 return 0;
212 212
213 /* 213 fat_cache_inval_inode(inode);
214 * Write a new EOF, and get the remaining cluster chain for freeing. 214
215 */
216 wait = IS_DIRSYNC(inode); 215 wait = IS_DIRSYNC(inode);
216 i_start = free_start = MSDOS_I(inode)->i_start;
217 i_logstart = MSDOS_I(inode)->i_logstart;
218
219 /* First, we write the new file size. */
220 if (!skip) {
221 MSDOS_I(inode)->i_start = 0;
222 MSDOS_I(inode)->i_logstart = 0;
223 }
224 MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
225 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
226 if (wait) {
227 err = fat_sync_inode(inode);
228 if (err) {
229 MSDOS_I(inode)->i_start = i_start;
230 MSDOS_I(inode)->i_logstart = i_logstart;
231 return err;
232 }
233 } else
234 mark_inode_dirty(inode);
235
236 /* Write a new EOF, and get the remaining cluster chain for freeing. */
217 if (skip) { 237 if (skip) {
218 struct fat_entry fatent; 238 struct fat_entry fatent;
219 int ret, fclus, dclus; 239 int ret, fclus, dclus;
@@ -244,35 +264,11 @@ static int fat_free(struct inode *inode, int skip)
244 return ret; 264 return ret;
245 265
246 free_start = ret; 266 free_start = ret;
247 i_start = i_logstart = 0;
248 fat_cache_inval_inode(inode);
249 } else {
250 fat_cache_inval_inode(inode);
251
252 i_start = free_start = MSDOS_I(inode)->i_start;
253 i_logstart = MSDOS_I(inode)->i_logstart;
254 MSDOS_I(inode)->i_start = 0;
255 MSDOS_I(inode)->i_logstart = 0;
256 } 267 }
257 MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
258 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
259 if (wait) {
260 err = fat_sync_inode(inode);
261 if (err)
262 goto error;
263 } else
264 mark_inode_dirty(inode);
265 inode->i_blocks = skip << (MSDOS_SB(sb)->cluster_bits - 9); 268 inode->i_blocks = skip << (MSDOS_SB(sb)->cluster_bits - 9);
266 269
267 /* Freeing the remained cluster chain */ 270 /* Freeing the remained cluster chain */
268 return fat_free_clusters(inode, free_start); 271 return fat_free_clusters(inode, free_start);
269
270error:
271 if (i_start) {
272 MSDOS_I(inode)->i_start = i_start;
273 MSDOS_I(inode)->i_logstart = i_logstart;
274 }
275 return err;
276} 272}
277 273
278void fat_truncate(struct inode *inode) 274void fat_truncate(struct inode *inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 32fb0a3f1da4..944652e9dde1 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -196,19 +196,9 @@ EXPORT_SYMBOL_GPL(fat_date_unix2dos);
196 196
197int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 197int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
198{ 198{
199 int i, e, err = 0; 199 int i, err = 0;
200 200
201 for (i = 0; i < nr_bhs; i++) { 201 ll_rw_block(SWRITE, nr_bhs, bhs);
202 lock_buffer(bhs[i]);
203 if (test_clear_buffer_dirty(bhs[i])) {
204 get_bh(bhs[i]);
205 bhs[i]->b_end_io = end_buffer_write_sync;
206 e = submit_bh(WRITE, bhs[i]);
207 if (!err && e)
208 err = e;
209 } else
210 unlock_buffer(bhs[i]);
211 }
212 for (i = 0; i < nr_bhs; i++) { 202 for (i = 0; i < nr_bhs; i++) {
213 wait_on_buffer(bhs[i]); 203 wait_on_buffer(bhs[i]);
214 if (buffer_eopnotsupp(bhs[i])) { 204 if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 5f96786d1c73..dc4a7007f4e7 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -208,8 +208,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
208 struct inode * inode = filp->f_dentry->d_inode; 208 struct inode * inode = filp->f_dentry->d_inode;
209 int error = 0; 209 int error = 0;
210 210
211 /* O_APPEND cannot be cleared if the file is marked as append-only */ 211 /*
212 if (!(arg & O_APPEND) && IS_APPEND(inode)) 212 * O_APPEND cannot be cleared if the file is marked as append-only
213 * and the file is open for write.
214 */
215 if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
213 return -EPERM; 216 return -EPERM;
214 217
215 /* O_NOATIME can only be set by the owner or superuser */ 218 /* O_NOATIME can only be set by the owner or superuser */
diff --git a/fs/file.c b/fs/file.c
index fd066b261c75..cea7cbea11d0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -379,7 +379,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
379void __init files_defer_init(void) 379void __init files_defer_init(void)
380{ 380{
381 int i; 381 int i;
382 /* Really early - can't use for_each_cpu */ 382 for_each_cpu(i)
383 for (i = 0; i < NR_CPUS; i++)
384 fdtable_defer_list_init(i); 383 fdtable_defer_list_init(i);
385} 384}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 4526da8907c6..f556a0d5c0d3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -120,9 +120,9 @@ struct fuse_req *fuse_get_request(struct fuse_conn *fc)
120 return do_get_request(fc); 120 return do_get_request(fc);
121} 121}
122 122
123/* Must be called with fuse_lock held */
123static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) 124static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
124{ 125{
125 spin_lock(&fuse_lock);
126 if (req->preallocated) { 126 if (req->preallocated) {
127 atomic_dec(&fc->num_waiting); 127 atomic_dec(&fc->num_waiting);
128 list_add(&req->list, &fc->unused_list); 128 list_add(&req->list, &fc->unused_list);
@@ -134,11 +134,19 @@ static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
134 fc->outstanding_debt--; 134 fc->outstanding_debt--;
135 else 135 else
136 up(&fc->outstanding_sem); 136 up(&fc->outstanding_sem);
137 spin_unlock(&fuse_lock);
138} 137}
139 138
140void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) 139void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
141{ 140{
141 if (atomic_dec_and_test(&req->count)) {
142 spin_lock(&fuse_lock);
143 fuse_putback_request(fc, req);
144 spin_unlock(&fuse_lock);
145 }
146}
147
148static void fuse_put_request_locked(struct fuse_conn *fc, struct fuse_req *req)
149{
142 if (atomic_dec_and_test(&req->count)) 150 if (atomic_dec_and_test(&req->count))
143 fuse_putback_request(fc, req); 151 fuse_putback_request(fc, req);
144} 152}
@@ -163,26 +171,36 @@ void fuse_release_background(struct fuse_req *req)
163 * still waiting), the 'end' callback is called if given, else the 171 * still waiting), the 'end' callback is called if given, else the
164 * reference to the request is released 172 * reference to the request is released
165 * 173 *
174 * Releasing extra reference for foreground requests must be done
175 * within the same locked region as setting state to finished. This
176 * is because fuse_reset_request() may be called after request is
177 * finished and it must be the sole possessor. If request is
178 * interrupted and put in the background, it will return with an error
179 * and hence never be reset and reused.
180 *
166 * Called with fuse_lock, unlocks it 181 * Called with fuse_lock, unlocks it
167 */ 182 */
168static void request_end(struct fuse_conn *fc, struct fuse_req *req) 183static void request_end(struct fuse_conn *fc, struct fuse_req *req)
169{ 184{
170 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
171 req->end = NULL;
172 list_del(&req->list); 185 list_del(&req->list);
173 req->state = FUSE_REQ_FINISHED; 186 req->state = FUSE_REQ_FINISHED;
174 spin_unlock(&fuse_lock); 187 if (!req->background) {
175 if (req->background) { 188 wake_up(&req->waitq);
189 fuse_put_request_locked(fc, req);
190 spin_unlock(&fuse_lock);
191 } else {
192 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
193 req->end = NULL;
194 spin_unlock(&fuse_lock);
176 down_read(&fc->sbput_sem); 195 down_read(&fc->sbput_sem);
177 if (fc->mounted) 196 if (fc->mounted)
178 fuse_release_background(req); 197 fuse_release_background(req);
179 up_read(&fc->sbput_sem); 198 up_read(&fc->sbput_sem);
199 if (end)
200 end(fc, req);
201 else
202 fuse_put_request(fc, req);
180 } 203 }
181 wake_up(&req->waitq);
182 if (end)
183 end(fc, req);
184 else
185 fuse_put_request(fc, req);
186} 204}
187 205
188/* 206/*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a7ef5e716f3c..296351615b00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -335,9 +335,14 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
335 loff_t pos = page_offset(req->pages[0]); 335 loff_t pos = page_offset(req->pages[0]);
336 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 336 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
337 req->out.page_zeroing = 1; 337 req->out.page_zeroing = 1;
338 req->end = fuse_readpages_end;
339 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 338 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
340 request_send_background(fc, req); 339 if (fc->async_read) {
340 req->end = fuse_readpages_end;
341 request_send_background(fc, req);
342 } else {
343 request_send(fc, req);
344 fuse_readpages_end(fc, req);
345 }
341} 346}
342 347
343struct fuse_readpages_data { 348struct fuse_readpages_data {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 46cf933aa3bf..4a83adfec968 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,9 @@ struct fuse_conn {
272 reply, before any other request, and never cleared */ 272 reply, before any other request, and never cleared */
273 unsigned conn_error : 1; 273 unsigned conn_error : 1;
274 274
275 /** Do readpages asynchronously? Only set in INIT */
276 unsigned async_read : 1;
277
275 /* 278 /*
276 * The following bitfields are only for optimization purposes 279 * The following bitfields are only for optimization purposes
277 * and hence races in setting them will not cause malfunction 280 * and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c755a0440a66..879e6fba9480 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -473,6 +473,16 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) 473 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
474 fc->conn_error = 1; 474 fc->conn_error = 1;
475 else { 475 else {
476 unsigned long ra_pages;
477
478 if (arg->minor >= 6) {
479 ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
480 if (arg->flags & FUSE_ASYNC_READ)
481 fc->async_read = 1;
482 } else
483 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
484
485 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
476 fc->minor = arg->minor; 486 fc->minor = arg->minor;
477 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 487 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
478 } 488 }
@@ -496,6 +506,8 @@ static void fuse_send_init(struct fuse_conn *fc)
496 506
497 arg->major = FUSE_KERNEL_VERSION; 507 arg->major = FUSE_KERNEL_VERSION;
498 arg->minor = FUSE_KERNEL_MINOR_VERSION; 508 arg->minor = FUSE_KERNEL_MINOR_VERSION;
509 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
510 arg->flags |= FUSE_ASYNC_READ;
499 req->in.h.opcode = FUSE_INIT; 511 req->in.h.opcode = FUSE_INIT;
500 req->in.numargs = 1; 512 req->in.numargs = 1;
501 req->in.args[0].size = sizeof(*arg); 513 req->in.args[0].size = sizeof(*arg);
@@ -552,8 +564,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
552 fc->user_id = d.user_id; 564 fc->user_id = d.user_id;
553 fc->group_id = d.group_id; 565 fc->group_id = d.group_id;
554 fc->max_read = d.max_read; 566 fc->max_read = d.max_read;
555 if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
556 fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
557 567
558 /* Used by get_root_inode() */ 568 /* Used by get_root_inode() */
559 sb->s_fs_info = fc; 569 sb->s_fs_info = fc;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index f568102da1e8..b35195289945 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -72,8 +72,8 @@ huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
72 unsigned long start = vma->vm_start; 72 unsigned long start = vma->vm_start;
73 unsigned long end = vma->vm_end; 73 unsigned long end = vma->vm_end;
74 unsigned long hugepages = (end - start) >> HPAGE_SHIFT; 74 unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
75 pgoff_t next = vma->vm_pgoff; 75 pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
76 pgoff_t endpg = next + ((end - start) >> PAGE_SHIFT); 76 pgoff_t endpg = next + hugepages;
77 77
78 pagevec_init(&pvec, 0); 78 pagevec_init(&pvec, 0);
79 while (next < endpg) { 79 while (next < endpg) {
diff --git a/fs/inode.c b/fs/inode.c
index 108138d4e909..d0be6159eb7f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1179,7 +1179,7 @@ EXPORT_SYMBOL(bmap);
1179/** 1179/**
1180 * touch_atime - update the access time 1180 * touch_atime - update the access time
1181 * @mnt: mount the inode is accessed on 1181 * @mnt: mount the inode is accessed on
1182 * @inode: inode accessed 1182 * @dentry: dentry accessed
1183 * 1183 *
1184 * Update the accessed time on an inode and mark it for writeback. 1184 * Update the accessed time on an inode and mark it for writeback.
1185 * This function automatically handles read only file systems and media, 1185 * This function automatically handles read only file systems and media,
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 429f4b263cf1..ca917973c2c0 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1308,6 +1308,7 @@ int journal_stop(handle_t *handle)
1308 transaction_t *transaction = handle->h_transaction; 1308 transaction_t *transaction = handle->h_transaction;
1309 journal_t *journal = transaction->t_journal; 1309 journal_t *journal = transaction->t_journal;
1310 int old_handle_count, err; 1310 int old_handle_count, err;
1311 pid_t pid;
1311 1312
1312 J_ASSERT(transaction->t_updates > 0); 1313 J_ASSERT(transaction->t_updates > 0);
1313 J_ASSERT(journal_current_handle() == handle); 1314 J_ASSERT(journal_current_handle() == handle);
@@ -1333,8 +1334,15 @@ int journal_stop(handle_t *handle)
1333 * It doesn't cost much - we're about to run a commit and sleep 1334 * It doesn't cost much - we're about to run a commit and sleep
1334 * on IO anyway. Speeds up many-threaded, many-dir operations 1335 * on IO anyway. Speeds up many-threaded, many-dir operations
1335 * by 30x or more... 1336 * by 30x or more...
1337 *
1338 * But don't do this if this process was the most recent one to
1339 * perform a synchronous write. We do this to detect the case where a
1340 * single process is doing a stream of sync writes. No point in waiting
1341 * for joiners in that case.
1336 */ 1342 */
1337 if (handle->h_sync) { 1343 pid = current->pid;
1344 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1345 journal->j_last_sync_writer = pid;
1338 do { 1346 do {
1339 old_handle_count = transaction->t_handle_count; 1347 old_handle_count = transaction->t_handle_count;
1340 schedule_timeout_uninterruptible(1); 1348 schedule_timeout_uninterruptible(1);
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index b2e95421d932..ce7b54b0b2b7 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1965,7 +1965,7 @@ retry:
1965 iovec_cnt++; 1965 iovec_cnt++;
1966 1966
1967 if (JFFS_GET_PAD_BYTES(raw_inode->nsize)) { 1967 if (JFFS_GET_PAD_BYTES(raw_inode->nsize)) {
1968 static char allff[3]={255,255,255}; 1968 static unsigned char allff[3]={255,255,255};
1969 /* Add some extra padding if necessary */ 1969 /* Add some extra padding if necessary */
1970 node_iovec[iovec_cnt].iov_base = allff; 1970 node_iovec[iovec_cnt].iov_base = allff;
1971 node_iovec[iovec_cnt].iov_len = 1971 node_iovec[iovec_cnt].iov_len =
diff --git a/fs/libfs.c b/fs/libfs.c
index 63c020e6589e..71fd08fa4103 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -388,6 +388,7 @@ int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files
388 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 388 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
389 inode->i_op = &simple_dir_inode_operations; 389 inode->i_op = &simple_dir_inode_operations;
390 inode->i_fop = &simple_dir_operations; 390 inode->i_fop = &simple_dir_operations;
391 inode->i_nlink = 2;
391 root = d_alloc_root(inode); 392 root = d_alloc_root(inode);
392 if (!root) { 393 if (!root) {
393 iput(inode); 394 iput(inode);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 145524039577..220058d8616d 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -22,12 +22,14 @@
22#define NLMDBG_FACILITY NLMDBG_CLIENT 22#define NLMDBG_FACILITY NLMDBG_CLIENT
23#define NLMCLNT_GRACE_WAIT (5*HZ) 23#define NLMCLNT_GRACE_WAIT (5*HZ)
24#define NLMCLNT_POLL_TIMEOUT (30*HZ) 24#define NLMCLNT_POLL_TIMEOUT (30*HZ)
25#define NLMCLNT_MAX_RETRIES 3
25 26
26static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); 27static int nlmclnt_test(struct nlm_rqst *, struct file_lock *);
27static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); 28static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
28static int nlmclnt_unlock(struct nlm_rqst *, struct file_lock *); 29static int nlmclnt_unlock(struct nlm_rqst *, struct file_lock *);
29static int nlm_stat_to_errno(u32 stat); 30static int nlm_stat_to_errno(u32 stat);
30static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host); 31static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host);
32static int nlmclnt_cancel(struct nlm_host *, int , struct file_lock *);
31 33
32static const struct rpc_call_ops nlmclnt_unlock_ops; 34static const struct rpc_call_ops nlmclnt_unlock_ops;
33static const struct rpc_call_ops nlmclnt_cancel_ops; 35static const struct rpc_call_ops nlmclnt_cancel_ops;
@@ -598,7 +600,7 @@ out_unblock:
598 nlmclnt_finish_block(req); 600 nlmclnt_finish_block(req);
599 /* Cancel the blocked request if it is still pending */ 601 /* Cancel the blocked request if it is still pending */
600 if (resp->status == NLM_LCK_BLOCKED) 602 if (resp->status == NLM_LCK_BLOCKED)
601 nlmclnt_cancel(host, fl); 603 nlmclnt_cancel(host, req->a_args.block, fl);
602out: 604out:
603 nlmclnt_release_lockargs(req); 605 nlmclnt_release_lockargs(req);
604 return status; 606 return status;
@@ -728,8 +730,7 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = {
728 * We always use an async RPC call for this in order not to hang a 730 * We always use an async RPC call for this in order not to hang a
729 * process that has been Ctrl-C'ed. 731 * process that has been Ctrl-C'ed.
730 */ 732 */
731int 733static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
732nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
733{ 734{
734 struct nlm_rqst *req; 735 struct nlm_rqst *req;
735 unsigned long flags; 736 unsigned long flags;
@@ -750,6 +751,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
750 req->a_flags = RPC_TASK_ASYNC; 751 req->a_flags = RPC_TASK_ASYNC;
751 752
752 nlmclnt_setlockargs(req, fl); 753 nlmclnt_setlockargs(req, fl);
754 req->a_args.block = block;
753 755
754 status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); 756 status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
755 if (status < 0) { 757 if (status < 0) {
@@ -801,6 +803,9 @@ die:
801 return; 803 return;
802 804
803retry_cancel: 805retry_cancel:
806 /* Don't ever retry more than 3 times */
807 if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
808 goto die;
804 nlm_rebind_host(req->a_host); 809 nlm_rebind_host(req->a_host);
805 rpc_restart_call(task); 810 rpc_restart_call(task);
806 rpc_delay(task, 30 * HZ); 811 rpc_delay(task, 30 * HZ);
diff --git a/fs/namei.c b/fs/namei.c
index 4acdac043b6b..faf61c35308c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -790,7 +790,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
790 790
791 inode = nd->dentry->d_inode; 791 inode = nd->dentry->d_inode;
792 if (nd->depth) 792 if (nd->depth)
793 lookup_flags = LOOKUP_FOLLOW; 793 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
794 794
795 /* At this point we know we have a real path component. */ 795 /* At this point we know we have a real path component. */
796 for(;;) { 796 for(;;) {
@@ -885,7 +885,8 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
885last_with_slashes: 885last_with_slashes:
886 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 886 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
887last_component: 887last_component:
888 nd->flags &= ~LOOKUP_CONTINUE; 888 /* Clear LOOKUP_CONTINUE iff it was previously unset */
889 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
889 if (lookup_flags & LOOKUP_PARENT) 890 if (lookup_flags & LOOKUP_PARENT)
890 goto lookup_parent; 891 goto lookup_parent;
891 if (this.name[0] == '.') switch (this.len) { 892 if (this.name[0] == '.') switch (this.len) {
@@ -1069,6 +1070,8 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1069 unsigned int flags, struct nameidata *nd) 1070 unsigned int flags, struct nameidata *nd)
1070{ 1071{
1071 int retval = 0; 1072 int retval = 0;
1073 int fput_needed;
1074 struct file *file;
1072 1075
1073 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1076 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1074 nd->flags = flags; 1077 nd->flags = flags;
@@ -1090,29 +1093,22 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1090 nd->mnt = mntget(current->fs->pwdmnt); 1093 nd->mnt = mntget(current->fs->pwdmnt);
1091 nd->dentry = dget(current->fs->pwd); 1094 nd->dentry = dget(current->fs->pwd);
1092 } else { 1095 } else {
1093 struct file *file;
1094 int fput_needed;
1095 struct dentry *dentry; 1096 struct dentry *dentry;
1096 1097
1097 file = fget_light(dfd, &fput_needed); 1098 file = fget_light(dfd, &fput_needed);
1098 if (!file) { 1099 retval = -EBADF;
1099 retval = -EBADF; 1100 if (!file)
1100 goto out_fail; 1101 goto unlock_fail;
1101 }
1102 1102
1103 dentry = file->f_dentry; 1103 dentry = file->f_dentry;
1104 1104
1105 if (!S_ISDIR(dentry->d_inode->i_mode)) { 1105 retval = -ENOTDIR;
1106 retval = -ENOTDIR; 1106 if (!S_ISDIR(dentry->d_inode->i_mode))
1107 fput_light(file, fput_needed); 1107 goto fput_unlock_fail;
1108 goto out_fail;
1109 }
1110 1108
1111 retval = file_permission(file, MAY_EXEC); 1109 retval = file_permission(file, MAY_EXEC);
1112 if (retval) { 1110 if (retval)
1113 fput_light(file, fput_needed); 1111 goto fput_unlock_fail;
1114 goto out_fail;
1115 }
1116 1112
1117 nd->mnt = mntget(file->f_vfsmnt); 1113 nd->mnt = mntget(file->f_vfsmnt);
1118 nd->dentry = dget(dentry); 1114 nd->dentry = dget(dentry);
@@ -1126,7 +1122,12 @@ out:
1126 if (unlikely(current->audit_context 1122 if (unlikely(current->audit_context
1127 && nd && nd->dentry && nd->dentry->d_inode)) 1123 && nd && nd->dentry && nd->dentry->d_inode))
1128 audit_inode(name, nd->dentry->d_inode, flags); 1124 audit_inode(name, nd->dentry->d_inode, flags);
1129out_fail: 1125 return retval;
1126
1127fput_unlock_fail:
1128 fput_light(file, fput_needed);
1129unlock_fail:
1130 read_unlock(&current->fs->lock);
1130 return retval; 1131 return retval;
1131} 1132}
1132 1133
@@ -1161,6 +1162,7 @@ static int __path_lookup_intent_open(int dfd, const char *name,
1161 1162
1162/** 1163/**
1163 * path_lookup_open - lookup a file path with open intent 1164 * path_lookup_open - lookup a file path with open intent
1165 * @dfd: the directory to use as base, or AT_FDCWD
1164 * @name: pointer to file name 1166 * @name: pointer to file name
1165 * @lookup_flags: lookup intent flags 1167 * @lookup_flags: lookup intent flags
1166 * @nd: pointer to nameidata 1168 * @nd: pointer to nameidata
@@ -1175,6 +1177,7 @@ int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1175 1177
1176/** 1178/**
1177 * path_lookup_create - lookup a file path with open + create intent 1179 * path_lookup_create - lookup a file path with open + create intent
1180 * @dfd: the directory to use as base, or AT_FDCWD
1178 * @name: pointer to file name 1181 * @name: pointer to file name
1179 * @lookup_flags: lookup intent flags 1182 * @lookup_flags: lookup intent flags
1180 * @nd: pointer to nameidata 1183 * @nd: pointer to nameidata
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 10ae377e68ff..04ab2fc360e7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -481,7 +481,7 @@ retry:
481 if (wdata->verf.committed != NFS_FILE_SYNC) { 481 if (wdata->verf.committed != NFS_FILE_SYNC) {
482 need_commit = 1; 482 need_commit = 1;
483 if (memcmp(&first_verf.verifier, &wdata->verf.verifier, 483 if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
484 sizeof(first_verf.verifier))); 484 sizeof(first_verf.verifier)))
485 goto sync_retry; 485 goto sync_retry;
486 } 486 }
487 487
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 89ed04696865..1d163b616915 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -64,6 +64,32 @@ struct nfsd_list {
64}; 64};
65static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list); 65static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
66 66
67#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
68static struct svc_stat nfsd_acl_svcstats;
69static struct svc_version * nfsd_acl_version[] = {
70 [2] = &nfsd_acl_version2,
71 [3] = &nfsd_acl_version3,
72};
73
74#define NFSD_ACL_MINVERS 2
75#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
76static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
77
78static struct svc_program nfsd_acl_program = {
79 .pg_prog = NFS_ACL_PROGRAM,
80 .pg_nvers = NFSD_ACL_NRVERS,
81 .pg_vers = nfsd_acl_versions,
82 .pg_name = "nfsd",
83 .pg_class = "nfsd",
84 .pg_stats = &nfsd_acl_svcstats,
85 .pg_authenticate = &svc_set_client,
86};
87
88static struct svc_stat nfsd_acl_svcstats = {
89 .program = &nfsd_acl_program,
90};
91#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
92
67static struct svc_version * nfsd_version[] = { 93static struct svc_version * nfsd_version[] = {
68 [2] = &nfsd_version2, 94 [2] = &nfsd_version2,
69#if defined(CONFIG_NFSD_V3) 95#if defined(CONFIG_NFSD_V3)
@@ -79,6 +105,9 @@ static struct svc_version * nfsd_version[] = {
79static struct svc_version *nfsd_versions[NFSD_NRVERS]; 105static struct svc_version *nfsd_versions[NFSD_NRVERS];
80 106
81struct svc_program nfsd_program = { 107struct svc_program nfsd_program = {
108#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
109 .pg_next = &nfsd_acl_program,
110#endif
82 .pg_prog = NFS_PROGRAM, /* program number */ 111 .pg_prog = NFS_PROGRAM, /* program number */
83 .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ 112 .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
84 .pg_vers = nfsd_versions, /* version table */ 113 .pg_vers = nfsd_versions, /* version table */
@@ -147,6 +176,26 @@ nfsd_svc(unsigned short port, int nrservs)
147 nfsd_program.pg_vers[i] = nfsd_version[i]; 176 nfsd_program.pg_vers[i] = nfsd_version[i];
148 } 177 }
149 178
179
180#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
181 found_one = 0;
182
183 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
184 if (NFSCTL_VERISSET(nfsd_versbits, i)) {
185 nfsd_acl_program.pg_vers[i] =
186 nfsd_acl_version[i];
187 found_one = 1;
188 } else
189 nfsd_acl_program.pg_vers[i] = NULL;
190 }
191
192 if (!found_one) {
193 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
194 nfsd_acl_program.pg_vers[i] =
195 nfsd_acl_version[i];
196 }
197#endif
198
150 atomic_set(&nfsd_busy, 0); 199 atomic_set(&nfsd_busy, 0);
151 error = -ENOMEM; 200 error = -ENOMEM;
152 nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE); 201 nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
@@ -411,30 +460,3 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
411 nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); 460 nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
412 return 1; 461 return 1;
413} 462}
414
415#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
416static struct svc_stat nfsd_acl_svcstats;
417static struct svc_version * nfsd_acl_version[] = {
418 [2] = &nfsd_acl_version2,
419 [3] = &nfsd_acl_version3,
420};
421
422#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0]))
423static struct svc_program nfsd_acl_program = {
424 .pg_prog = NFS_ACL_PROGRAM,
425 .pg_nvers = NFSD_ACL_NRVERS,
426 .pg_vers = nfsd_acl_version,
427 .pg_name = "nfsd",
428 .pg_class = "nfsd",
429 .pg_stats = &nfsd_acl_svcstats,
430 .pg_authenticate = &svc_set_client,
431};
432
433static struct svc_stat nfsd_acl_svcstats = {
434 .program = &nfsd_acl_program,
435};
436
437#define nfsd_acl_program_p &nfsd_acl_program
438#else
439#define nfsd_acl_program_p NULL
440#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index d424041b38e9..bae3d7548bea 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -58,7 +58,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
58 goto out; 58 goto out;
59 } 59 }
60 60
61 down(&OCFS2_I(inode)->ip_io_sem); 61 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
62 62
63 lock_buffer(bh); 63 lock_buffer(bh);
64 set_buffer_uptodate(bh); 64 set_buffer_uptodate(bh);
@@ -82,7 +82,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
82 brelse(bh); 82 brelse(bh);
83 } 83 }
84 84
85 up(&OCFS2_I(inode)->ip_io_sem); 85 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
86out: 86out:
87 mlog_exit(ret); 87 mlog_exit(ret);
88 return ret; 88 return ret;
@@ -125,13 +125,13 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
125 flags &= ~OCFS2_BH_CACHED; 125 flags &= ~OCFS2_BH_CACHED;
126 126
127 if (inode) 127 if (inode)
128 down(&OCFS2_I(inode)->ip_io_sem); 128 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
129 for (i = 0 ; i < nr ; i++) { 129 for (i = 0 ; i < nr ; i++) {
130 if (bhs[i] == NULL) { 130 if (bhs[i] == NULL) {
131 bhs[i] = sb_getblk(sb, block++); 131 bhs[i] = sb_getblk(sb, block++);
132 if (bhs[i] == NULL) { 132 if (bhs[i] == NULL) {
133 if (inode) 133 if (inode)
134 up(&OCFS2_I(inode)->ip_io_sem); 134 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
135 status = -EIO; 135 status = -EIO;
136 mlog_errno(status); 136 mlog_errno(status);
137 goto bail; 137 goto bail;
@@ -220,7 +220,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
220 ocfs2_set_buffer_uptodate(inode, bh); 220 ocfs2_set_buffer_uptodate(inode, bh);
221 } 221 }
222 if (inode) 222 if (inode)
223 up(&OCFS2_I(inode)->ip_io_sem); 223 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
224 224
225 mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr, 225 mlog(ML_BH_IO, "block=(%"MLFu64"), nr=(%d), cached=%s\n", block, nr,
226 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes"); 226 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 7307ba528913..d08971d29b63 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -917,8 +917,9 @@ static int o2hb_thread(void *data)
917 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 917 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
918 918
919 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n", 919 mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
920 before_hb.tv_sec, before_hb.tv_usec, 920 before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
921 after_hb.tv_sec, after_hb.tv_usec, elapsed_msec); 921 after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
922 elapsed_msec);
922 923
923 if (elapsed_msec < reg->hr_timeout_ms) { 924 if (elapsed_msec < reg->hr_timeout_ms) {
924 /* the kthread api has blocked signals for us so no 925 /* the kthread api has blocked signals for us so no
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 35d92c01a972..d22d4cf08db1 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1285,14 +1285,16 @@ static void o2net_idle_timer(unsigned long data)
1285 mlog(ML_NOTICE, "here are some times that might help debug the " 1285 mlog(ML_NOTICE, "here are some times that might help debug the "
1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " 1286 "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
1287 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", 1287 "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
1288 sc->sc_tv_timer.tv_sec, sc->sc_tv_timer.tv_usec, 1288 sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
1289 now.tv_sec, now.tv_usec, 1289 now.tv_sec, (long) now.tv_usec,
1290 sc->sc_tv_data_ready.tv_sec, sc->sc_tv_data_ready.tv_usec, 1290 sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
1291 sc->sc_tv_advance_start.tv_sec, sc->sc_tv_advance_start.tv_usec, 1291 sc->sc_tv_advance_start.tv_sec,
1292 sc->sc_tv_advance_stop.tv_sec, sc->sc_tv_advance_stop.tv_usec, 1292 (long) sc->sc_tv_advance_start.tv_usec,
1293 sc->sc_tv_advance_stop.tv_sec,
1294 (long) sc->sc_tv_advance_stop.tv_usec,
1293 sc->sc_msg_key, sc->sc_msg_type, 1295 sc->sc_msg_key, sc->sc_msg_type,
1294 sc->sc_tv_func_start.tv_sec, sc->sc_tv_func_start.tv_usec, 1296 sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec,
1295 sc->sc_tv_func_stop.tv_sec, sc->sc_tv_func_stop.tv_usec); 1297 sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec);
1296 1298
1297 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 1299 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
1298} 1300}
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 3fecba0a6023..42eb53b5293b 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -657,6 +657,7 @@ void dlm_complete_thread(struct dlm_ctxt *dlm);
657int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); 657int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
659void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 659void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
660int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
660 661
661void dlm_put(struct dlm_ctxt *dlm); 662void dlm_put(struct dlm_ctxt *dlm);
662struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 663struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index da3c22045f89..6ee30837389c 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -573,8 +573,11 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
573 spin_lock(&dlm_domain_lock); 573 spin_lock(&dlm_domain_lock);
574 dlm = __dlm_lookup_domain_full(query->domain, query->name_len); 574 dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
575 /* Once the dlm ctxt is marked as leaving then we don't want 575 /* Once the dlm ctxt is marked as leaving then we don't want
576 * to be put in someone's domain map. */ 576 * to be put in someone's domain map.
577 * Also, explicitly disallow joining at certain troublesome
578 * times (ie. during recovery). */
577 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { 579 if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
580 int bit = query->node_idx;
578 spin_lock(&dlm->spinlock); 581 spin_lock(&dlm->spinlock);
579 582
580 if (dlm->dlm_state == DLM_CTXT_NEW && 583 if (dlm->dlm_state == DLM_CTXT_NEW &&
@@ -586,6 +589,19 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
586 } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) { 589 } else if (dlm->joining_node != DLM_LOCK_RES_OWNER_UNKNOWN) {
587 /* Disallow parallel joins. */ 590 /* Disallow parallel joins. */
588 response = JOIN_DISALLOW; 591 response = JOIN_DISALLOW;
592 } else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
593 mlog(ML_NOTICE, "node %u trying to join, but recovery "
594 "is ongoing.\n", bit);
595 response = JOIN_DISALLOW;
596 } else if (test_bit(bit, dlm->recovery_map)) {
597 mlog(ML_NOTICE, "node %u trying to join, but it "
598 "still needs recovery.\n", bit);
599 response = JOIN_DISALLOW;
600 } else if (test_bit(bit, dlm->domain_map)) {
601 mlog(ML_NOTICE, "node %u trying to join, but it "
602 "is still in the domain! needs recovery?\n",
603 bit);
604 response = JOIN_DISALLOW;
589 } else { 605 } else {
590 /* Alright we're fully a part of this domain 606 /* Alright we're fully a part of this domain
591 * so we keep some state as to who's joining 607 * so we keep some state as to who's joining
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 27e984f7e4cd..a3194fe173d9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1050,17 +1050,10 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
1050 node = dlm_bitmap_diff_iter_next(&bdi, &sc); 1050 node = dlm_bitmap_diff_iter_next(&bdi, &sc);
1051 while (node >= 0) { 1051 while (node >= 0) {
1052 if (sc == NODE_UP) { 1052 if (sc == NODE_UP) {
1053 /* a node came up. easy. might not even need 1053 /* a node came up. clear any old vote from
1054 * to talk to it if its node number is higher 1054 * the response map and set it in the vote map
1055 * or if we are already blocked. */ 1055 * then restart the mastery. */
1056 mlog(0, "node up! %d\n", node); 1056 mlog(ML_NOTICE, "node %d up while restarting\n", node);
1057 if (blocked)
1058 goto next;
1059
1060 if (node > dlm->node_num) {
1061 mlog(0, "node > this node. skipping.\n");
1062 goto next;
1063 }
1064 1057
1065 /* redo the master request, but only for the new node */ 1058 /* redo the master request, but only for the new node */
1066 mlog(0, "sending request to new node\n"); 1059 mlog(0, "sending request to new node\n");
@@ -2005,6 +1998,15 @@ fail:
2005 break; 1998 break;
2006 1999
2007 mlog(0, "timed out during migration\n"); 2000 mlog(0, "timed out during migration\n");
2001 /* avoid hang during shutdown when migrating lockres
2002 * to a node which also goes down */
2003 if (dlm_is_node_dead(dlm, target)) {
2004 mlog(0, "%s:%.*s: expected migration target %u "
2005 "is no longer up. restarting.\n",
2006 dlm->name, res->lockname.len,
2007 res->lockname.name, target);
2008 ret = -ERESTARTSYS;
2009 }
2008 } 2010 }
2009 if (ret == -ERESTARTSYS) { 2011 if (ret == -ERESTARTSYS) {
2010 /* migration failed, detach and clean up mle */ 2012 /* migration failed, detach and clean up mle */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 0c8eb1093f00..186e9a76aa58 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -39,6 +39,7 @@
39#include <linux/inet.h> 39#include <linux/inet.h>
40#include <linux/timer.h> 40#include <linux/timer.h>
41#include <linux/kthread.h> 41#include <linux/kthread.h>
42#include <linux/delay.h>
42 43
43 44
44#include "cluster/heartbeat.h" 45#include "cluster/heartbeat.h"
@@ -256,6 +257,27 @@ static int dlm_recovery_thread(void *data)
256 return 0; 257 return 0;
257} 258}
258 259
260/* returns true when the recovery master has contacted us */
261static int dlm_reco_master_ready(struct dlm_ctxt *dlm)
262{
263 int ready;
264 spin_lock(&dlm->spinlock);
265 ready = (dlm->reco.new_master != O2NM_INVALID_NODE_NUM);
266 spin_unlock(&dlm->spinlock);
267 return ready;
268}
269
270/* returns true if node is no longer in the domain
271 * could be dead or just not joined */
272int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
273{
274 int dead;
275 spin_lock(&dlm->spinlock);
276 dead = test_bit(node, dlm->domain_map);
277 spin_unlock(&dlm->spinlock);
278 return dead;
279}
280
259/* callers of the top-level api calls (dlmlock/dlmunlock) should 281/* callers of the top-level api calls (dlmlock/dlmunlock) should
260 * block on the dlm->reco.event when recovery is in progress. 282 * block on the dlm->reco.event when recovery is in progress.
261 * the dlm recovery thread will set this state when it begins 283 * the dlm recovery thread will set this state when it begins
@@ -297,6 +319,7 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm)
297static int dlm_do_recovery(struct dlm_ctxt *dlm) 319static int dlm_do_recovery(struct dlm_ctxt *dlm)
298{ 320{
299 int status = 0; 321 int status = 0;
322 int ret;
300 323
301 spin_lock(&dlm->spinlock); 324 spin_lock(&dlm->spinlock);
302 325
@@ -343,10 +366,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
343 goto master_here; 366 goto master_here;
344 367
345 if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) { 368 if (dlm->reco.new_master == O2NM_INVALID_NODE_NUM) {
346 /* choose a new master */ 369 /* choose a new master, returns 0 if this node
347 if (!dlm_pick_recovery_master(dlm)) { 370 * is the master, -EEXIST if it's another node.
371 * this does not return until a new master is chosen
372 * or recovery completes entirely. */
373 ret = dlm_pick_recovery_master(dlm);
374 if (!ret) {
348 /* already notified everyone. go. */ 375 /* already notified everyone. go. */
349 dlm->reco.new_master = dlm->node_num;
350 goto master_here; 376 goto master_here;
351 } 377 }
352 mlog(0, "another node will master this recovery session.\n"); 378 mlog(0, "another node will master this recovery session.\n");
@@ -371,8 +397,13 @@ master_here:
371 if (status < 0) { 397 if (status < 0) {
372 mlog(ML_ERROR, "error %d remastering locks for node %u, " 398 mlog(ML_ERROR, "error %d remastering locks for node %u, "
373 "retrying.\n", status, dlm->reco.dead_node); 399 "retrying.\n", status, dlm->reco.dead_node);
400 /* yield a bit to allow any final network messages
401 * to get handled on remaining nodes */
402 msleep(100);
374 } else { 403 } else {
375 /* success! see if any other nodes need recovery */ 404 /* success! see if any other nodes need recovery */
405 mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
406 dlm->name, dlm->reco.dead_node, dlm->node_num);
376 dlm_reset_recovery(dlm); 407 dlm_reset_recovery(dlm);
377 } 408 }
378 dlm_end_recovery(dlm); 409 dlm_end_recovery(dlm);
@@ -477,7 +508,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
477 BUG(); 508 BUG();
478 break; 509 break;
479 case DLM_RECO_NODE_DATA_DEAD: 510 case DLM_RECO_NODE_DATA_DEAD:
480 mlog(0, "node %u died after " 511 mlog(ML_NOTICE, "node %u died after "
481 "requesting recovery info for " 512 "requesting recovery info for "
482 "node %u\n", ndata->node_num, 513 "node %u\n", ndata->node_num,
483 dead_node); 514 dead_node);
@@ -485,6 +516,19 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
485 // start all over 516 // start all over
486 destroy = 1; 517 destroy = 1;
487 status = -EAGAIN; 518 status = -EAGAIN;
519 /* instead of spinning like crazy here,
520 * wait for the domain map to catch up
521 * with the network state. otherwise this
522 * can be hit hundreds of times before
523 * the node is really seen as dead. */
524 wait_event_timeout(dlm->dlm_reco_thread_wq,
525 dlm_is_node_dead(dlm,
526 ndata->node_num),
527 msecs_to_jiffies(1000));
528 mlog(0, "waited 1 sec for %u, "
529 "dead? %s\n", ndata->node_num,
530 dlm_is_node_dead(dlm, ndata->node_num) ?
531 "yes" : "no");
488 goto leave; 532 goto leave;
489 case DLM_RECO_NODE_DATA_RECEIVING: 533 case DLM_RECO_NODE_DATA_RECEIVING:
490 case DLM_RECO_NODE_DATA_REQUESTED: 534 case DLM_RECO_NODE_DATA_REQUESTED:
@@ -678,11 +722,27 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
678 dlm = item->dlm; 722 dlm = item->dlm;
679 dead_node = item->u.ral.dead_node; 723 dead_node = item->u.ral.dead_node;
680 reco_master = item->u.ral.reco_master; 724 reco_master = item->u.ral.reco_master;
725 mres = (struct dlm_migratable_lockres *)data;
726
727 if (dead_node != dlm->reco.dead_node ||
728 reco_master != dlm->reco.new_master) {
729 /* show extra debug info if the recovery state is messed */
730 mlog(ML_ERROR, "%s: bad reco state: reco(dead=%u, master=%u), "
731 "request(dead=%u, master=%u)\n",
732 dlm->name, dlm->reco.dead_node, dlm->reco.new_master,
733 dead_node, reco_master);
734 mlog(ML_ERROR, "%s: name=%.*s master=%u locks=%u/%u flags=%u "
735 "entry[0]={c=%"MLFu64",l=%u,f=%u,t=%d,ct=%d,hb=%d,n=%u}\n",
736 dlm->name, mres->lockname_len, mres->lockname, mres->master,
737 mres->num_locks, mres->total_locks, mres->flags,
738 mres->ml[0].cookie, mres->ml[0].list, mres->ml[0].flags,
739 mres->ml[0].type, mres->ml[0].convert_type,
740 mres->ml[0].highest_blocked, mres->ml[0].node);
741 BUG();
742 }
681 BUG_ON(dead_node != dlm->reco.dead_node); 743 BUG_ON(dead_node != dlm->reco.dead_node);
682 BUG_ON(reco_master != dlm->reco.new_master); 744 BUG_ON(reco_master != dlm->reco.new_master);
683 745
684 mres = (struct dlm_migratable_lockres *)data;
685
686 /* lock resources should have already been moved to the 746 /* lock resources should have already been moved to the
687 * dlm->reco.resources list. now move items from that list 747 * dlm->reco.resources list. now move items from that list
688 * to a temp list if the dead owner matches. note that the 748 * to a temp list if the dead owner matches. note that the
@@ -757,15 +817,18 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data)
757 continue; 817 continue;
758 818
759 switch (ndata->state) { 819 switch (ndata->state) {
820 /* should have moved beyond INIT but not to FINALIZE yet */
760 case DLM_RECO_NODE_DATA_INIT: 821 case DLM_RECO_NODE_DATA_INIT:
761 case DLM_RECO_NODE_DATA_DEAD: 822 case DLM_RECO_NODE_DATA_DEAD:
762 case DLM_RECO_NODE_DATA_DONE:
763 case DLM_RECO_NODE_DATA_FINALIZE_SENT: 823 case DLM_RECO_NODE_DATA_FINALIZE_SENT:
764 mlog(ML_ERROR, "bad ndata state for node %u:" 824 mlog(ML_ERROR, "bad ndata state for node %u:"
765 " state=%d\n", ndata->node_num, 825 " state=%d\n", ndata->node_num,
766 ndata->state); 826 ndata->state);
767 BUG(); 827 BUG();
768 break; 828 break;
829 /* these states are possible at this point, anywhere along
830 * the line of recovery */
831 case DLM_RECO_NODE_DATA_DONE:
769 case DLM_RECO_NODE_DATA_RECEIVING: 832 case DLM_RECO_NODE_DATA_RECEIVING:
770 case DLM_RECO_NODE_DATA_REQUESTED: 833 case DLM_RECO_NODE_DATA_REQUESTED:
771 case DLM_RECO_NODE_DATA_REQUESTING: 834 case DLM_RECO_NODE_DATA_REQUESTING:
@@ -799,13 +862,31 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
799{ 862{
800 struct dlm_lock_resource *res; 863 struct dlm_lock_resource *res;
801 struct list_head *iter, *iter2; 864 struct list_head *iter, *iter2;
865 struct dlm_lock *lock;
802 866
803 spin_lock(&dlm->spinlock); 867 spin_lock(&dlm->spinlock);
804 list_for_each_safe(iter, iter2, &dlm->reco.resources) { 868 list_for_each_safe(iter, iter2, &dlm->reco.resources) {
805 res = list_entry (iter, struct dlm_lock_resource, recovering); 869 res = list_entry (iter, struct dlm_lock_resource, recovering);
870 /* always prune any $RECOVERY entries for dead nodes,
871 * otherwise hangs can occur during later recovery */
806 if (dlm_is_recovery_lock(res->lockname.name, 872 if (dlm_is_recovery_lock(res->lockname.name,
807 res->lockname.len)) 873 res->lockname.len)) {
874 spin_lock(&res->spinlock);
875 list_for_each_entry(lock, &res->granted, list) {
876 if (lock->ml.node == dead_node) {
877 mlog(0, "AHA! there was "
878 "a $RECOVERY lock for dead "
879 "node %u (%s)!\n",
880 dead_node, dlm->name);
881 list_del_init(&lock->list);
882 dlm_lock_put(lock);
883 break;
884 }
885 }
886 spin_unlock(&res->spinlock);
808 continue; 887 continue;
888 }
889
809 if (res->owner == dead_node) { 890 if (res->owner == dead_node) {
810 mlog(0, "found lockres owned by dead node while " 891 mlog(0, "found lockres owned by dead node while "
811 "doing recovery for node %u. sending it.\n", 892 "doing recovery for node %u. sending it.\n",
@@ -1179,7 +1260,7 @@ static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data)
1179again: 1260again:
1180 ret = dlm_lockres_master_requery(dlm, res, &real_master); 1261 ret = dlm_lockres_master_requery(dlm, res, &real_master);
1181 if (ret < 0) { 1262 if (ret < 0) {
1182 mlog(0, "dlm_lockres_master_requery failure: %d\n", 1263 mlog(0, "dlm_lockres_master_requery ret=%d\n",
1183 ret); 1264 ret);
1184 goto again; 1265 goto again;
1185 } 1266 }
@@ -1757,6 +1838,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
1757 struct dlm_lock_resource *res; 1838 struct dlm_lock_resource *res;
1758 int i; 1839 int i;
1759 struct list_head *bucket; 1840 struct list_head *bucket;
1841 struct dlm_lock *lock;
1760 1842
1761 1843
1762 /* purge any stale mles */ 1844 /* purge any stale mles */
@@ -1780,10 +1862,25 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
1780 bucket = &(dlm->resources[i]); 1862 bucket = &(dlm->resources[i]);
1781 list_for_each(iter, bucket) { 1863 list_for_each(iter, bucket) {
1782 res = list_entry (iter, struct dlm_lock_resource, list); 1864 res = list_entry (iter, struct dlm_lock_resource, list);
1865 /* always prune any $RECOVERY entries for dead nodes,
1866 * otherwise hangs can occur during later recovery */
1783 if (dlm_is_recovery_lock(res->lockname.name, 1867 if (dlm_is_recovery_lock(res->lockname.name,
1784 res->lockname.len)) 1868 res->lockname.len)) {
1869 spin_lock(&res->spinlock);
1870 list_for_each_entry(lock, &res->granted, list) {
1871 if (lock->ml.node == dead_node) {
1872 mlog(0, "AHA! there was "
1873 "a $RECOVERY lock for dead "
1874 "node %u (%s)!\n",
1875 dead_node, dlm->name);
1876 list_del_init(&lock->list);
1877 dlm_lock_put(lock);
1878 break;
1879 }
1880 }
1881 spin_unlock(&res->spinlock);
1785 continue; 1882 continue;
1786 1883 }
1787 spin_lock(&res->spinlock); 1884 spin_lock(&res->spinlock);
1788 /* zero the lvb if necessary */ 1885 /* zero the lvb if necessary */
1789 dlm_revalidate_lvb(dlm, res, dead_node); 1886 dlm_revalidate_lvb(dlm, res, dead_node);
@@ -1869,12 +1966,9 @@ void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data)
1869 return; 1966 return;
1870 1967
1871 spin_lock(&dlm->spinlock); 1968 spin_lock(&dlm->spinlock);
1872
1873 set_bit(idx, dlm->live_nodes_map); 1969 set_bit(idx, dlm->live_nodes_map);
1874 1970 /* do NOT notify mle attached to the heartbeat events.
1875 /* notify any mles attached to the heartbeat events */ 1971 * new nodes are not interesting in mastery until joined. */
1876 dlm_hb_event_notify_attached(dlm, idx, 1);
1877
1878 spin_unlock(&dlm->spinlock); 1972 spin_unlock(&dlm->spinlock);
1879 1973
1880 dlm_put(dlm); 1974 dlm_put(dlm);
@@ -1897,7 +1991,18 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
1897 mlog(0, "unlockast for recovery lock fired!\n"); 1991 mlog(0, "unlockast for recovery lock fired!\n");
1898} 1992}
1899 1993
1900 1994/*
1995 * dlm_pick_recovery_master will continually attempt to use
1996 * dlmlock() on the special "$RECOVERY" lockres with the
1997 * LKM_NOQUEUE flag to get an EX. every thread that enters
1998 * this function on each node racing to become the recovery
1999 * master will not stop attempting this until either:
2000 * a) this node gets the EX (and becomes the recovery master),
2001 * or b) dlm->reco.new_master gets set to some nodenum
2002 * != O2NM_INVALID_NODE_NUM (another node will do the reco).
2003 * so each time a recovery master is needed, the entire cluster
2004 * will sync at this point. if the new master dies, that will
2005 * be detected in dlm_do_recovery */
1901static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) 2006static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
1902{ 2007{
1903 enum dlm_status ret; 2008 enum dlm_status ret;
@@ -1906,23 +2011,45 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
1906 2011
1907 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", 2012 mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
1908 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); 2013 dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
1909retry: 2014again:
1910 memset(&lksb, 0, sizeof(lksb)); 2015 memset(&lksb, 0, sizeof(lksb));
1911 2016
1912 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, 2017 ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
1913 DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast); 2018 DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast);
1914 2019
2020 mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
2021 dlm->name, ret, lksb.status);
2022
1915 if (ret == DLM_NORMAL) { 2023 if (ret == DLM_NORMAL) {
1916 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", 2024 mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
1917 dlm->name, dlm->node_num); 2025 dlm->name, dlm->node_num);
1918 /* I am master, send message to all nodes saying 2026
1919 * that I am beginning a recovery session */ 2027 /* got the EX lock. check to see if another node
1920 status = dlm_send_begin_reco_message(dlm, 2028 * just became the reco master */
1921 dlm->reco.dead_node); 2029 if (dlm_reco_master_ready(dlm)) {
2030 mlog(0, "%s: got reco EX lock, but %u will "
2031 "do the recovery\n", dlm->name,
2032 dlm->reco.new_master);
2033 status = -EEXIST;
2034 } else {
2035 status = dlm_send_begin_reco_message(dlm,
2036 dlm->reco.dead_node);
2037 /* this always succeeds */
2038 BUG_ON(status);
2039
2040 /* set the new_master to this node */
2041 spin_lock(&dlm->spinlock);
2042 dlm->reco.new_master = dlm->node_num;
2043 spin_unlock(&dlm->spinlock);
2044 }
1922 2045
1923 /* recovery lock is a special case. ast will not get fired, 2046 /* recovery lock is a special case. ast will not get fired,
1924 * so just go ahead and unlock it. */ 2047 * so just go ahead and unlock it. */
1925 ret = dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm); 2048 ret = dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm);
2049 if (ret == DLM_DENIED) {
2050 mlog(0, "got DLM_DENIED, trying LKM_CANCEL\n");
2051 ret = dlmunlock(dlm, &lksb, LKM_CANCEL, dlm_reco_unlock_ast, dlm);
2052 }
1926 if (ret != DLM_NORMAL) { 2053 if (ret != DLM_NORMAL) {
1927 /* this would really suck. this could only happen 2054 /* this would really suck. this could only happen
1928 * if there was a network error during the unlock 2055 * if there was a network error during the unlock
@@ -1930,20 +2057,42 @@ retry:
1930 * is actually "done" and the lock structure is 2057 * is actually "done" and the lock structure is
1931 * even freed. we can continue, but only 2058 * even freed. we can continue, but only
1932 * because this specific lock name is special. */ 2059 * because this specific lock name is special. */
1933 mlog(0, "dlmunlock returned %d\n", ret); 2060 mlog(ML_ERROR, "dlmunlock returned %d\n", ret);
1934 }
1935
1936 if (status < 0) {
1937 mlog(0, "failed to send recovery message. "
1938 "must retry with new node map.\n");
1939 goto retry;
1940 } 2061 }
1941 } else if (ret == DLM_NOTQUEUED) { 2062 } else if (ret == DLM_NOTQUEUED) {
1942 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", 2063 mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
1943 dlm->name, dlm->node_num); 2064 dlm->name, dlm->node_num);
1944 /* another node is master. wait on 2065 /* another node is master. wait on
1945 * reco.new_master != O2NM_INVALID_NODE_NUM */ 2066 * reco.new_master != O2NM_INVALID_NODE_NUM
2067 * for at most one second */
2068 wait_event_timeout(dlm->dlm_reco_thread_wq,
2069 dlm_reco_master_ready(dlm),
2070 msecs_to_jiffies(1000));
2071 if (!dlm_reco_master_ready(dlm)) {
2072 mlog(0, "%s: reco master taking awhile\n",
2073 dlm->name);
2074 goto again;
2075 }
2076 /* another node has informed this one that it is reco master */
2077 mlog(0, "%s: reco master %u is ready to recover %u\n",
2078 dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
1946 status = -EEXIST; 2079 status = -EEXIST;
2080 } else {
2081 struct dlm_lock_resource *res;
2082
2083 /* dlmlock returned something other than NOTQUEUED or NORMAL */
2084 mlog(ML_ERROR, "%s: got %s from dlmlock($RECOVERY), "
2085 "lksb.status=%s\n", dlm->name, dlm_errname(ret),
2086 dlm_errname(lksb.status));
2087 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
2088 DLM_RECOVERY_LOCK_NAME_LEN);
2089 if (res) {
2090 dlm_print_one_lock_resource(res);
2091 dlm_lockres_put(res);
2092 } else {
2093 mlog(ML_ERROR, "recovery lock not found\n");
2094 }
2095 BUG();
1947 } 2096 }
1948 2097
1949 return status; 2098 return status;
@@ -1982,7 +2131,7 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
1982 mlog(0, "not sending begin reco to self\n"); 2131 mlog(0, "not sending begin reco to self\n");
1983 continue; 2132 continue;
1984 } 2133 }
1985 2134retry:
1986 ret = -EINVAL; 2135 ret = -EINVAL;
1987 mlog(0, "attempting to send begin reco msg to %d\n", 2136 mlog(0, "attempting to send begin reco msg to %d\n",
1988 nodenum); 2137 nodenum);
@@ -1991,8 +2140,17 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
1991 /* negative status is handled ok by caller here */ 2140 /* negative status is handled ok by caller here */
1992 if (ret >= 0) 2141 if (ret >= 0)
1993 ret = status; 2142 ret = status;
2143 if (dlm_is_host_down(ret)) {
2144 /* node is down. not involved in recovery
2145 * so just keep going */
2146 mlog(0, "%s: node %u was down when sending "
2147 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2148 ret = 0;
2149 }
1994 if (ret < 0) { 2150 if (ret < 0) {
1995 struct dlm_lock_resource *res; 2151 struct dlm_lock_resource *res;
2152 /* this is now a serious problem, possibly ENOMEM
2153 * in the network stack. must retry */
1996 mlog_errno(ret); 2154 mlog_errno(ret);
1997 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2155 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
1998 " returned %d\n", dlm->name, nodenum, ret); 2156 " returned %d\n", dlm->name, nodenum, ret);
@@ -2004,7 +2162,10 @@ static int dlm_send_begin_reco_message(struct dlm_ctxt *dlm, u8 dead_node)
2004 } else { 2162 } else {
2005 mlog(ML_ERROR, "recovery lock not found\n"); 2163 mlog(ML_ERROR, "recovery lock not found\n");
2006 } 2164 }
2007 break; 2165 /* sleep for a bit in hopes that we can avoid
2166 * another ENOMEM */
2167 msleep(100);
2168 goto retry;
2008 } 2169 }
2009 } 2170 }
2010 2171
@@ -2027,19 +2188,34 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
2027 2188
2028 spin_lock(&dlm->spinlock); 2189 spin_lock(&dlm->spinlock);
2029 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { 2190 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
2030 mlog(0, "new_master already set to %u!\n", 2191 if (test_bit(dlm->reco.new_master, dlm->recovery_map)) {
2031 dlm->reco.new_master); 2192 mlog(0, "%s: new_master %u died, changing "
2193 "to %u\n", dlm->name, dlm->reco.new_master,
2194 br->node_idx);
2195 } else {
2196 mlog(0, "%s: new_master %u NOT DEAD, changing "
2197 "to %u\n", dlm->name, dlm->reco.new_master,
2198 br->node_idx);
2199 /* may not have seen the new master as dead yet */
2200 }
2032 } 2201 }
2033 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { 2202 if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
2034 mlog(0, "dead_node already set to %u!\n", 2203 mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
2035 dlm->reco.dead_node); 2204 "node %u changing it to %u\n", dlm->name,
2205 dlm->reco.dead_node, br->node_idx, br->dead_node);
2036 } 2206 }
2037 dlm->reco.new_master = br->node_idx; 2207 dlm->reco.new_master = br->node_idx;
2038 dlm->reco.dead_node = br->dead_node; 2208 dlm->reco.dead_node = br->dead_node;
2039 if (!test_bit(br->dead_node, dlm->recovery_map)) { 2209 if (!test_bit(br->dead_node, dlm->recovery_map)) {
2040 mlog(ML_ERROR, "recovery master %u sees %u as dead, but this " 2210 mlog(0, "recovery master %u sees %u as dead, but this "
2041 "node has not yet. marking %u as dead\n", 2211 "node has not yet. marking %u as dead\n",
2042 br->node_idx, br->dead_node, br->dead_node); 2212 br->node_idx, br->dead_node, br->dead_node);
2213 if (!test_bit(br->dead_node, dlm->domain_map) ||
2214 !test_bit(br->dead_node, dlm->live_nodes_map))
2215 mlog(0, "%u not in domain/live_nodes map "
2216 "so setting it in reco map manually\n",
2217 br->dead_node);
2218 set_bit(br->dead_node, dlm->recovery_map);
2043 __dlm_hb_node_down(dlm, br->dead_node); 2219 __dlm_hb_node_down(dlm, br->dead_node);
2044 } 2220 }
2045 spin_unlock(&dlm->spinlock); 2221 spin_unlock(&dlm->spinlock);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index cec2ce1cd318..c95f08d2e925 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -188,6 +188,19 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
188 actions &= ~(DLM_UNLOCK_REMOVE_LOCK| 188 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
189 DLM_UNLOCK_REGRANT_LOCK| 189 DLM_UNLOCK_REGRANT_LOCK|
190 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 190 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
191 } else if (status == DLM_RECOVERING ||
192 status == DLM_MIGRATING ||
193 status == DLM_FORWARD) {
194 /* must clear the actions because this unlock
195 * is about to be retried. cannot free or do
196 * any list manipulation. */
197 mlog(0, "%s:%.*s: clearing actions, %s\n",
198 dlm->name, res->lockname.len,
199 res->lockname.name,
200 status==DLM_RECOVERING?"recovering":
201 (status==DLM_MIGRATING?"migrating":
202 "forward"));
203 actions = 0;
191 } 204 }
192 if (flags & LKM_CANCEL) 205 if (flags & LKM_CANCEL)
193 lock->cancel_pending = 0; 206 lock->cancel_pending = 0;
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e1fdd288796e..c3764f4744ee 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -27,7 +27,7 @@
27 * Boston, MA 021110-1307, USA. 27 * Boston, MA 021110-1307, USA.
28 */ 28 */
29 29
30#include <asm/signal.h> 30#include <linux/signal.h>
31 31
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f2fb40cd296a..b6ba292e9544 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -262,8 +262,7 @@ static int ocfs2_extent_map_find_leaf(struct inode *inode,
262 el = &eb->h_list; 262 el = &eb->h_list;
263 } 263 }
264 264
265 if (el->l_tree_depth) 265 BUG_ON(el->l_tree_depth);
266 BUG();
267 266
268 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 267 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
269 rec = &el->l_recs[i]; 268 rec = &el->l_recs[i];
@@ -364,8 +363,8 @@ static int ocfs2_extent_map_lookup_read(struct inode *inode,
364 return ret; 363 return ret;
365 } 364 }
366 365
367 if (ent->e_tree_depth) 366 /* FIXME: Make sure this isn't a corruption */
368 BUG(); /* FIXME: Make sure this isn't a corruption */ 367 BUG_ON(ent->e_tree_depth);
369 368
370 *ret_ent = ent; 369 *ret_ent = ent;
371 370
@@ -423,8 +422,7 @@ static int ocfs2_extent_map_try_insert(struct inode *inode,
423 le32_to_cpu(rec->e_clusters), NULL, 422 le32_to_cpu(rec->e_clusters), NULL,
424 NULL); 423 NULL);
425 424
426 if (!old_ent) 425 BUG_ON(!old_ent);
427 BUG();
428 426
429 ret = -EEXIST; 427 ret = -EEXIST;
430 if (old_ent->e_tree_depth < tree_depth) 428 if (old_ent->e_tree_depth < tree_depth)
@@ -988,7 +986,7 @@ int __init init_ocfs2_extent_maps(void)
988 return 0; 986 return 0;
989} 987}
990 988
991void __exit exit_ocfs2_extent_maps(void) 989void exit_ocfs2_extent_maps(void)
992{ 990{
993 kmem_cache_destroy(ocfs2_em_ent_cachep); 991 kmem_cache_destroy(ocfs2_em_ent_cachep);
994} 992}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index eaf33caa0a1f..1715bc90e705 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1022,8 +1022,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1022 } 1022 }
1023 newsize = count + saved_pos; 1023 newsize = count + saved_pos;
1024 1024
1025 mlog(0, "pos=%lld newsize=%"MLFu64" cursize=%lld\n", 1025 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1026 saved_pos, newsize, i_size_read(inode)); 1026 (long long) saved_pos, (long long) newsize,
1027 (long long) i_size_read(inode));
1027 1028
1028 /* No need for a higher level metadata lock if we're 1029 /* No need for a higher level metadata lock if we're
1029 * never going past i_size. */ 1030 * never going past i_size. */
@@ -1042,8 +1043,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1042 spin_unlock(&OCFS2_I(inode)->ip_lock); 1043 spin_unlock(&OCFS2_I(inode)->ip_lock);
1043 1044
1044 mlog(0, "Writing at EOF, may need more allocation: " 1045 mlog(0, "Writing at EOF, may need more allocation: "
1045 "i_size = %lld, newsize = %"MLFu64", need %u clusters\n", 1046 "i_size = %lld, newsize = %lld, need %u clusters\n",
1046 i_size_read(inode), newsize, clusters); 1047 (long long) i_size_read(inode), (long long) newsize,
1048 clusters);
1047 1049
1048 /* We only want to continue the rest of this loop if 1050 /* We only want to continue the rest of this loop if
1049 * our extend will actually require more 1051 * our extend will actually require more
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d4ecc0627716..8122489c5762 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -903,10 +903,10 @@ void ocfs2_clear_inode(struct inode *inode)
903 "Clear inode of %"MLFu64", inode is locked\n", 903 "Clear inode of %"MLFu64", inode is locked\n",
904 oi->ip_blkno); 904 oi->ip_blkno);
905 905
906 mlog_bug_on_msg(down_trylock(&oi->ip_io_sem), 906 mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
907 "Clear inode of %"MLFu64", io_sem is locked\n", 907 "Clear inode of %"MLFu64", io_mutex is locked\n",
908 oi->ip_blkno); 908 oi->ip_blkno);
909 up(&oi->ip_io_sem); 909 mutex_unlock(&oi->ip_io_mutex);
910 910
911 /* 911 /*
912 * down_trylock() returns 0, down_write_trylock() returns 1 912 * down_trylock() returns 0, down_write_trylock() returns 1
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9b0177433653..84c507961287 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -46,10 +46,10 @@ struct ocfs2_inode_info
46 struct list_head ip_io_markers; 46 struct list_head ip_io_markers;
47 int ip_orphaned_slot; 47 int ip_orphaned_slot;
48 48
49 struct semaphore ip_io_sem; 49 struct mutex ip_io_mutex;
50 50
51 /* Used by the journalling code to attach an inode to a 51 /* Used by the journalling code to attach an inode to a
52 * handle. These are protected by ip_io_sem in order to lock 52 * handle. These are protected by ip_io_mutex in order to lock
53 * out other I/O to the inode until we either commit or 53 * out other I/O to the inode until we either commit or
54 * abort. */ 54 * abort. */
55 struct list_head ip_handle_list; 55 struct list_head ip_handle_list;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 303c8d96457f..fa0bcac5ceae 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -147,8 +147,7 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb,
147 147
148 mlog_entry("(max_buffs = %d)\n", max_buffs); 148 mlog_entry("(max_buffs = %d)\n", max_buffs);
149 149
150 if (!osb || !osb->journal->j_journal) 150 BUG_ON(!osb || !osb->journal->j_journal);
151 BUG();
152 151
153 if (ocfs2_is_hard_readonly(osb)) { 152 if (ocfs2_is_hard_readonly(osb)) {
154 ret = -EROFS; 153 ret = -EROFS;
@@ -401,7 +400,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
401 * j_trans_barrier for us. */ 400 * j_trans_barrier for us. */
402 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); 401 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode);
403 402
404 down(&OCFS2_I(inode)->ip_io_sem); 403 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
405 switch (type) { 404 switch (type) {
406 case OCFS2_JOURNAL_ACCESS_CREATE: 405 case OCFS2_JOURNAL_ACCESS_CREATE:
407 case OCFS2_JOURNAL_ACCESS_WRITE: 406 case OCFS2_JOURNAL_ACCESS_WRITE:
@@ -416,7 +415,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle,
416 status = -EINVAL; 415 status = -EINVAL;
417 mlog(ML_ERROR, "Uknown access type!\n"); 416 mlog(ML_ERROR, "Uknown access type!\n");
418 } 417 }
419 up(&OCFS2_I(inode)->ip_io_sem); 418 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
420 419
421 if (status < 0) 420 if (status < 0)
422 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 421 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
@@ -561,7 +560,11 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
561 SET_INODE_JOURNAL(inode); 560 SET_INODE_JOURNAL(inode);
562 OCFS2_I(inode)->ip_open_count++; 561 OCFS2_I(inode)->ip_open_count++;
563 562
564 status = ocfs2_meta_lock(inode, NULL, &bh, 1); 563 /* Skip recovery waits here - journal inode metadata never
564 * changes in a live cluster so it can be considered an
565 * exception to the rule. */
566 status = ocfs2_meta_lock_full(inode, NULL, &bh, 1,
567 OCFS2_META_LOCK_RECOVERY);
565 if (status < 0) { 568 if (status < 0) {
566 if (status != -ERESTARTSYS) 569 if (status != -ERESTARTSYS)
567 mlog(ML_ERROR, "Could not get lock on journal!\n"); 570 mlog(ML_ERROR, "Could not get lock on journal!\n");
@@ -672,8 +675,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
672 675
673 mlog_entry_void(); 676 mlog_entry_void();
674 677
675 if (!osb) 678 BUG_ON(!osb);
676 BUG();
677 679
678 journal = osb->journal; 680 journal = osb->journal;
679 if (!journal) 681 if (!journal)
@@ -805,8 +807,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
805 807
806 mlog_entry_void(); 808 mlog_entry_void();
807 809
808 if (!journal) 810 BUG_ON(!journal);
809 BUG();
810 811
811 status = journal_wipe(journal->j_journal, full); 812 status = journal_wipe(journal->j_journal, full);
812 if (status < 0) { 813 if (status < 0) {
@@ -1072,10 +1073,10 @@ restart:
1072 NULL); 1073 NULL);
1073 1074
1074bail: 1075bail:
1075 down(&osb->recovery_lock); 1076 mutex_lock(&osb->recovery_lock);
1076 if (!status && 1077 if (!status &&
1077 !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { 1078 !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) {
1078 up(&osb->recovery_lock); 1079 mutex_unlock(&osb->recovery_lock);
1079 goto restart; 1080 goto restart;
1080 } 1081 }
1081 1082
@@ -1083,7 +1084,7 @@ bail:
1083 mb(); /* sync with ocfs2_recovery_thread_running */ 1084 mb(); /* sync with ocfs2_recovery_thread_running */
1084 wake_up(&osb->recovery_event); 1085 wake_up(&osb->recovery_event);
1085 1086
1086 up(&osb->recovery_lock); 1087 mutex_unlock(&osb->recovery_lock);
1087 1088
1088 mlog_exit(status); 1089 mlog_exit(status);
1089 /* no one is callint kthread_stop() for us so the kthread() api 1090 /* no one is callint kthread_stop() for us so the kthread() api
@@ -1098,7 +1099,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1098 mlog_entry("(node_num=%d, osb->node_num = %d)\n", 1099 mlog_entry("(node_num=%d, osb->node_num = %d)\n",
1099 node_num, osb->node_num); 1100 node_num, osb->node_num);
1100 1101
1101 down(&osb->recovery_lock); 1102 mutex_lock(&osb->recovery_lock);
1102 if (osb->disable_recovery) 1103 if (osb->disable_recovery)
1103 goto out; 1104 goto out;
1104 1105
@@ -1120,7 +1121,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
1120 } 1121 }
1121 1122
1122out: 1123out:
1123 up(&osb->recovery_lock); 1124 mutex_unlock(&osb->recovery_lock);
1124 wake_up(&osb->recovery_event); 1125 wake_up(&osb->recovery_event);
1125 1126
1126 mlog_exit_void(); 1127 mlog_exit_void();
@@ -1271,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1271 1272
1272 /* Should not ever be called to recover ourselves -- in that 1273 /* Should not ever be called to recover ourselves -- in that
1273 * case we should've called ocfs2_journal_load instead. */ 1274 * case we should've called ocfs2_journal_load instead. */
1274 if (osb->node_num == node_num) 1275 BUG_ON(osb->node_num == node_num);
1275 BUG();
1276 1276
1277 slot_num = ocfs2_node_num_to_slot(si, node_num); 1277 slot_num = ocfs2_node_num_to_slot(si, node_num);
1278 if (slot_num == OCFS2_INVALID_SLOT) { 1278 if (slot_num == OCFS2_INVALID_SLOT) {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index f468c600cf92..8d8e4779df92 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -33,6 +33,7 @@
33#include <linux/rbtree.h> 33#include <linux/rbtree.h>
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h>
36 37
37#include "cluster/nodemanager.h" 38#include "cluster/nodemanager.h"
38#include "cluster/heartbeat.h" 39#include "cluster/heartbeat.h"
@@ -233,7 +234,7 @@ struct ocfs2_super
233 struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */ 234 struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */
234 235
235 atomic_t vol_state; 236 atomic_t vol_state;
236 struct semaphore recovery_lock; 237 struct mutex recovery_lock;
237 struct task_struct *recovery_thread_task; 238 struct task_struct *recovery_thread_task;
238 int disable_recovery; 239 int disable_recovery;
239 wait_queue_head_t checkpoint_event; 240 wait_queue_head_t checkpoint_event;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 364d64bd5f10..046824b6b625 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -932,7 +932,7 @@ static void ocfs2_inode_init_once(void *data,
932 oi->ip_dir_start_lookup = 0; 932 oi->ip_dir_start_lookup = 0;
933 933
934 init_rwsem(&oi->ip_alloc_sem); 934 init_rwsem(&oi->ip_alloc_sem);
935 init_MUTEX(&(oi->ip_io_sem)); 935 mutex_init(&oi->ip_io_mutex);
936 936
937 oi->ip_blkno = 0ULL; 937 oi->ip_blkno = 0ULL;
938 oi->ip_clusters = 0; 938 oi->ip_clusters = 0;
@@ -1137,9 +1137,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1137 1137
1138 /* disable any new recovery threads and wait for any currently 1138 /* disable any new recovery threads and wait for any currently
1139 * running ones to exit. Do this before setting the vol_state. */ 1139 * running ones to exit. Do this before setting the vol_state. */
1140 down(&osb->recovery_lock); 1140 mutex_lock(&osb->recovery_lock);
1141 osb->disable_recovery = 1; 1141 osb->disable_recovery = 1;
1142 up(&osb->recovery_lock); 1142 mutex_unlock(&osb->recovery_lock);
1143 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); 1143 wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
1144 1144
1145 /* At this point, we know that no more recovery threads can be 1145 /* At this point, we know that no more recovery threads can be
@@ -1254,8 +1254,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1254 osb->sb = sb; 1254 osb->sb = sb;
1255 /* Save off for ocfs2_rw_direct */ 1255 /* Save off for ocfs2_rw_direct */
1256 osb->s_sectsize_bits = blksize_bits(sector_size); 1256 osb->s_sectsize_bits = blksize_bits(sector_size);
1257 if (!osb->s_sectsize_bits) 1257 BUG_ON(!osb->s_sectsize_bits);
1258 BUG();
1259 1258
1260 osb->net_response_ids = 0; 1259 osb->net_response_ids = 0;
1261 spin_lock_init(&osb->net_response_lock); 1260 spin_lock_init(&osb->net_response_lock);
@@ -1283,7 +1282,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1283 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 1282 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1284 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 1283 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1285 1284
1286 init_MUTEX(&osb->recovery_lock); 1285 mutex_init(&osb->recovery_lock);
1287 1286
1288 osb->disable_recovery = 0; 1287 osb->disable_recovery = 0;
1289 osb->recovery_thread_task = NULL; 1288 osb->recovery_thread_task = NULL;
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 600a8bc5b541..fc29cb7a437d 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -77,8 +77,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
77 if (arr && ((inode = *arr) != NULL)) { 77 if (arr && ((inode = *arr) != NULL)) {
78 /* get a ref in addition to the array ref */ 78 /* get a ref in addition to the array ref */
79 inode = igrab(inode); 79 inode = igrab(inode);
80 if (!inode) 80 BUG_ON(!inode);
81 BUG();
82 81
83 return inode; 82 return inode;
84 } 83 }
@@ -89,8 +88,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
89 /* add one more if putting into array for first time */ 88 /* add one more if putting into array for first time */
90 if (arr && inode) { 89 if (arr && inode) {
91 *arr = igrab(inode); 90 *arr = igrab(inode);
92 if (!*arr) 91 BUG_ON(!*arr);
93 BUG();
94 } 92 }
95 return inode; 93 return inode;
96} 94}
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 3a0458fd3e1b..300b5bedfb21 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -388,7 +388,7 @@ out_free:
388 } 388 }
389} 389}
390 390
391/* Item insertion is guarded by ip_io_sem, so the insertion path takes 391/* Item insertion is guarded by ip_io_mutex, so the insertion path takes
392 * advantage of this by not rechecking for a duplicate insert during 392 * advantage of this by not rechecking for a duplicate insert during
393 * the slow case. Additionally, if the cache needs to be bumped up to 393 * the slow case. Additionally, if the cache needs to be bumped up to
394 * a tree, the code will not recheck after acquiring the lock -- 394 * a tree, the code will not recheck after acquiring the lock --
@@ -418,7 +418,7 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
418 (unsigned long long) bh->b_blocknr); 418 (unsigned long long) bh->b_blocknr);
419 419
420 /* No need to recheck under spinlock - insertion is guarded by 420 /* No need to recheck under spinlock - insertion is guarded by
421 * ip_io_sem */ 421 * ip_io_mutex */
422 spin_lock(&oi->ip_lock); 422 spin_lock(&oi->ip_lock);
423 if (ocfs2_insert_can_use_array(oi, ci)) { 423 if (ocfs2_insert_can_use_array(oi, ci)) {
424 /* Fast case - it's an array and there's a free 424 /* Fast case - it's an array and there's a free
@@ -440,7 +440,7 @@ void ocfs2_set_buffer_uptodate(struct inode *inode,
440 440
441/* Called against a newly allocated buffer. Most likely nobody should 441/* Called against a newly allocated buffer. Most likely nobody should
442 * be able to read this sort of metadata while it's still being 442 * be able to read this sort of metadata while it's still being
443 * allocated, but this is careful to take ip_io_sem anyway. */ 443 * allocated, but this is careful to take ip_io_mutex anyway. */
444void ocfs2_set_new_buffer_uptodate(struct inode *inode, 444void ocfs2_set_new_buffer_uptodate(struct inode *inode,
445 struct buffer_head *bh) 445 struct buffer_head *bh)
446{ 446{
@@ -451,9 +451,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
451 451
452 set_buffer_uptodate(bh); 452 set_buffer_uptodate(bh);
453 453
454 down(&oi->ip_io_sem); 454 mutex_lock(&oi->ip_io_mutex);
455 ocfs2_set_buffer_uptodate(inode, bh); 455 ocfs2_set_buffer_uptodate(inode, bh);
456 up(&oi->ip_io_sem); 456 mutex_unlock(&oi->ip_io_mutex);
457} 457}
458 458
459/* Requires ip_lock. */ 459/* Requires ip_lock. */
@@ -537,7 +537,7 @@ int __init init_ocfs2_uptodate_cache(void)
537 return 0; 537 return 0;
538} 538}
539 539
540void __exit exit_ocfs2_uptodate_cache(void) 540void exit_ocfs2_uptodate_cache(void)
541{ 541{
542 if (ocfs2_uptodate_cachep) 542 if (ocfs2_uptodate_cachep)
543 kmem_cache_destroy(ocfs2_uptodate_cachep); 543 kmem_cache_destroy(ocfs2_uptodate_cachep);
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index e5aacdf4eabf..01cd32d26b06 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -27,7 +27,7 @@
27#define OCFS2_UPTODATE_H 27#define OCFS2_UPTODATE_H
28 28
29int __init init_ocfs2_uptodate_cache(void); 29int __init init_ocfs2_uptodate_cache(void);
30void __exit exit_ocfs2_uptodate_cache(void); 30void exit_ocfs2_uptodate_cache(void);
31 31
32void ocfs2_metadata_cache_init(struct inode *inode); 32void ocfs2_metadata_cache_init(struct inode *inode);
33void ocfs2_metadata_cache_purge(struct inode *inode); 33void ocfs2_metadata_cache_purge(struct inode *inode);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 8f8014285a34..1d24fead51a6 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -548,7 +548,7 @@ static int show_stat(struct seq_file *p, void *v)
548 } 548 }
549 seq_printf(p, "intr %llu", (unsigned long long)sum); 549 seq_printf(p, "intr %llu", (unsigned long long)sum);
550 550
551#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) 551#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) && !defined(CONFIG_IA64)
552 for (i = 0; i < NR_IRQS; i++) 552 for (i = 0; i < NR_IRQS; i++)
553 seq_printf(p, " %u", kstat_irqs(i)); 553 seq_printf(p, " %u", kstat_irqs(i));
554#endif 554#endif
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index a4ef91bb4f3b..b4199ec3ece4 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -35,7 +35,7 @@ static int v2_check_quota_file(struct super_block *sb, int type)
35 35
36 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0); 36 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
37 if (size != sizeof(struct v2_disk_dqheader)) { 37 if (size != sizeof(struct v2_disk_dqheader)) {
38 printk("quota_v2: failed read expected=%d got=%d\n", 38 printk("quota_v2: failed read expected=%zd got=%zd\n",
39 sizeof(struct v2_disk_dqheader), size); 39 sizeof(struct v2_disk_dqheader), size);
40 return 0; 40 return 0;
41 } 41 }
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 9dd71e807034..d71ac6579289 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -150,18 +150,15 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
150 if (d_reclen <= 32) { 150 if (d_reclen <= 32) {
151 local_buf = small_buf; 151 local_buf = small_buf;
152 } else { 152 } else {
153 local_buf = 153 local_buf = kmalloc(d_reclen,
154 reiserfs_kmalloc(d_reclen, GFP_NOFS, 154 GFP_NOFS);
155 inode->i_sb);
156 if (!local_buf) { 155 if (!local_buf) {
157 pathrelse(&path_to_entry); 156 pathrelse(&path_to_entry);
158 ret = -ENOMEM; 157 ret = -ENOMEM;
159 goto out; 158 goto out;
160 } 159 }
161 if (item_moved(&tmp_ih, &path_to_entry)) { 160 if (item_moved(&tmp_ih, &path_to_entry)) {
162 reiserfs_kfree(local_buf, 161 kfree(local_buf);
163 d_reclen,
164 inode->i_sb);
165 goto research; 162 goto research;
166 } 163 }
167 } 164 }
@@ -174,15 +171,12 @@ static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
174 (dirent, local_buf, d_reclen, d_off, d_ino, 171 (dirent, local_buf, d_reclen, d_off, d_ino,
175 DT_UNKNOWN) < 0) { 172 DT_UNKNOWN) < 0) {
176 if (local_buf != small_buf) { 173 if (local_buf != small_buf) {
177 reiserfs_kfree(local_buf, 174 kfree(local_buf);
178 d_reclen,
179 inode->i_sb);
180 } 175 }
181 goto end; 176 goto end;
182 } 177 }
183 if (local_buf != small_buf) { 178 if (local_buf != small_buf) {
184 reiserfs_kfree(local_buf, d_reclen, 179 kfree(local_buf);
185 inode->i_sb);
186 } 180 }
187 // next entry should be looked for with such offset 181 // next entry should be looked for with such offset
188 next_pos = deh_offset(deh) + 1; 182 next_pos = deh_offset(deh) + 1;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ad6fa964b0e7..f3473176c83a 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -192,6 +192,8 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
192 192
193 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * 193 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
194 sizeof(b_blocknr_t), GFP_NOFS); 194 sizeof(b_blocknr_t), GFP_NOFS);
195 if (!allocated_blocks)
196 return -ENOMEM;
195 197
196 /* First we compose a key to point at the writing position, we want to do 198 /* First we compose a key to point at the writing position, we want to do
197 that outside of any locking region. */ 199 that outside of any locking region. */
@@ -1285,6 +1287,23 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
1285 struct reiserfs_transaction_handle th; 1287 struct reiserfs_transaction_handle th;
1286 th.t_trans_id = 0; 1288 th.t_trans_id = 0;
1287 1289
1290 /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
1291 * lying around (most of the disk, in fact). Despite the filesystem
1292 * now being a v3.6 format, the old items still can't support large
1293 * file sizes. Catch this case here, as the rest of the VFS layer is
1294 * oblivious to the different limitations between old and new items.
1295 * reiserfs_setattr catches this for truncates. This chunk is lifted
1296 * from generic_write_checks. */
1297 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
1298 *ppos + count > MAX_NON_LFS) {
1299 if (*ppos >= MAX_NON_LFS) {
1300 send_sig(SIGXFSZ, current, 0);
1301 return -EFBIG;
1302 }
1303 if (count > MAX_NON_LFS - (unsigned long)*ppos)
1304 count = MAX_NON_LFS - (unsigned long)*ppos;
1305 }
1306
1288 if (file->f_flags & O_DIRECT) { // Direct IO needs treatment 1307 if (file->f_flags & O_DIRECT) { // Direct IO needs treatment
1289 ssize_t result, after_file_end = 0; 1308 ssize_t result, after_file_end = 0;
1290 if ((*ppos + count >= inode->i_size) 1309 if ((*ppos + count >= inode->i_size)
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 45829889dcdc..aa22588019ec 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2021,38 +2021,6 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
2021 return CARRY_ON; 2021 return CARRY_ON;
2022} 2022}
2023 2023
2024#ifdef CONFIG_REISERFS_CHECK
2025void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
2026{
2027 void *vp;
2028 static size_t malloced;
2029
2030 vp = kmalloc(size, flags);
2031 if (vp) {
2032 REISERFS_SB(s)->s_kmallocs += size;
2033 if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) {
2034 reiserfs_warning(s,
2035 "vs-8301: reiserfs_kmalloc: allocated memory %d",
2036 REISERFS_SB(s)->s_kmallocs);
2037 malloced = REISERFS_SB(s)->s_kmallocs;
2038 }
2039 }
2040 return vp;
2041}
2042
2043void reiserfs_kfree(const void *vp, size_t size, struct super_block *s)
2044{
2045 kfree(vp);
2046
2047 REISERFS_SB(s)->s_kmallocs -= size;
2048 if (REISERFS_SB(s)->s_kmallocs < 0)
2049 reiserfs_warning(s,
2050 "vs-8302: reiserfs_kfree: allocated memory %d",
2051 REISERFS_SB(s)->s_kmallocs);
2052
2053}
2054#endif
2055
2056static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) 2024static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2057{ 2025{
2058 int max_num_of_items; 2026 int max_num_of_items;
@@ -2086,7 +2054,7 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2086 /* we have to allocate more memory for virtual node */ 2054 /* we have to allocate more memory for virtual node */
2087 if (tb->vn_buf) { 2055 if (tb->vn_buf) {
2088 /* free memory allocated before */ 2056 /* free memory allocated before */
2089 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb); 2057 kfree(tb->vn_buf);
2090 /* this is not needed if kfree is atomic */ 2058 /* this is not needed if kfree is atomic */
2091 check_fs = 1; 2059 check_fs = 1;
2092 } 2060 }
@@ -2095,24 +2063,15 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2095 tb->vn_buf_size = size; 2063 tb->vn_buf_size = size;
2096 2064
2097 /* get memory for virtual item */ 2065 /* get memory for virtual item */
2098 buf = 2066 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2099 reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN,
2100 tb->tb_sb);
2101 if (!buf) { 2067 if (!buf) {
2102 /* getting memory with GFP_KERNEL priority may involve 2068 /* getting memory with GFP_KERNEL priority may involve
2103 balancing now (due to indirect_to_direct conversion on 2069 balancing now (due to indirect_to_direct conversion on
2104 dcache shrinking). So, release path and collected 2070 dcache shrinking). So, release path and collected
2105 resources here */ 2071 resources here */
2106 free_buffers_in_tb(tb); 2072 free_buffers_in_tb(tb);
2107 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb); 2073 buf = kmalloc(size, GFP_NOFS);
2108 if (!buf) { 2074 if (!buf) {
2109#ifdef CONFIG_REISERFS_CHECK
2110 reiserfs_warning(tb->tb_sb,
2111 "vs-8345: get_mem_for_virtual_node: "
2112 "kmalloc failed. reiserfs kmalloced %d bytes",
2113 REISERFS_SB(tb->tb_sb)->
2114 s_kmallocs);
2115#endif
2116 tb->vn_buf_size = 0; 2075 tb->vn_buf_size = 0;
2117 } 2076 }
2118 tb->vn_buf = buf; 2077 tb->vn_buf = buf;
@@ -2619,7 +2578,6 @@ void unfix_nodes(struct tree_balance *tb)
2619 } 2578 }
2620 } 2579 }
2621 2580
2622 if (tb->vn_buf) 2581 kfree(tb->vn_buf);
2623 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2624 2582
2625} 2583}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ffa34b861bdb..b33d67bba2fd 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2363,6 +2363,13 @@ static int reiserfs_write_full_page(struct page *page,
2363 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 2363 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2364 th.t_trans_id = 0; 2364 th.t_trans_id = 0;
2365 2365
2366 /* no logging allowed when nonblocking or from PF_MEMALLOC */
2367 if (checked && (current->flags & PF_MEMALLOC)) {
2368 redirty_page_for_writepage(wbc, page);
2369 unlock_page(page);
2370 return 0;
2371 }
2372
2366 /* The page dirty bit is cleared before writepage is called, which 2373 /* The page dirty bit is cleared before writepage is called, which
2367 * means we have to tell create_empty_buffers to make dirty buffers 2374 * means we have to tell create_empty_buffers to make dirty buffers
2368 * The page really should be up to date at this point, so tossing 2375 * The page really should be up to date at this point, so tossing
@@ -2743,6 +2750,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2743 int ret = 1; 2750 int ret = 1;
2744 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 2751 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2745 2752
2753 lock_buffer(bh);
2746 spin_lock(&j->j_dirty_buffers_lock); 2754 spin_lock(&j->j_dirty_buffers_lock);
2747 if (!buffer_mapped(bh)) { 2755 if (!buffer_mapped(bh)) {
2748 goto free_jh; 2756 goto free_jh;
@@ -2758,7 +2766,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2758 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { 2766 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2759 ret = 0; 2767 ret = 0;
2760 } 2768 }
2761 } else if (buffer_dirty(bh) || buffer_locked(bh)) { 2769 } else if (buffer_dirty(bh)) {
2762 struct reiserfs_journal_list *jl; 2770 struct reiserfs_journal_list *jl;
2763 struct reiserfs_jh *jh = bh->b_private; 2771 struct reiserfs_jh *jh = bh->b_private;
2764 2772
@@ -2784,6 +2792,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2784 reiserfs_free_jh(bh); 2792 reiserfs_free_jh(bh);
2785 } 2793 }
2786 spin_unlock(&j->j_dirty_buffers_lock); 2794 spin_unlock(&j->j_dirty_buffers_lock);
2795 unlock_buffer(bh);
2787 return ret; 2796 return ret;
2788} 2797}
2789 2798
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4491fcf2a0e6..b7a179560ab4 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -152,18 +152,16 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
152 struct reiserfs_bitmap_node *bn; 152 struct reiserfs_bitmap_node *bn;
153 static int id; 153 static int id;
154 154
155 bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, 155 bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
156 p_s_sb);
157 if (!bn) { 156 if (!bn) {
158 return NULL; 157 return NULL;
159 } 158 }
160 bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb); 159 bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
161 if (!bn->data) { 160 if (!bn->data) {
162 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); 161 kfree(bn);
163 return NULL; 162 return NULL;
164 } 163 }
165 bn->id = id++; 164 bn->id = id++;
166 memset(bn->data, 0, p_s_sb->s_blocksize);
167 INIT_LIST_HEAD(&bn->list); 165 INIT_LIST_HEAD(&bn->list);
168 return bn; 166 return bn;
169} 167}
@@ -197,8 +195,8 @@ static inline void free_bitmap_node(struct super_block *p_s_sb,
197 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 195 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
198 journal->j_used_bitmap_nodes--; 196 journal->j_used_bitmap_nodes--;
199 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 197 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
200 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb); 198 kfree(bn->data);
201 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); 199 kfree(bn);
202 } else { 200 } else {
203 list_add(&bn->list, &journal->j_bitmap_nodes); 201 list_add(&bn->list, &journal->j_bitmap_nodes);
204 journal->j_free_bitmap_nodes++; 202 journal->j_free_bitmap_nodes++;
@@ -276,8 +274,8 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
276 while (next != &journal->j_bitmap_nodes) { 274 while (next != &journal->j_bitmap_nodes) {
277 bn = list_entry(next, struct reiserfs_bitmap_node, list); 275 bn = list_entry(next, struct reiserfs_bitmap_node, list);
278 list_del(next); 276 list_del(next);
279 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb); 277 kfree(bn->data);
280 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb); 278 kfree(bn);
281 next = journal->j_bitmap_nodes.next; 279 next = journal->j_bitmap_nodes.next;
282 journal->j_free_bitmap_nodes--; 280 journal->j_free_bitmap_nodes--;
283 } 281 }
@@ -581,7 +579,7 @@ static inline void put_journal_list(struct super_block *s,
581 jl->j_trans_id, jl->j_refcount); 579 jl->j_trans_id, jl->j_refcount);
582 } 580 }
583 if (--jl->j_refcount == 0) 581 if (--jl->j_refcount == 0)
584 reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); 582 kfree(jl);
585} 583}
586 584
587/* 585/*
@@ -848,6 +846,14 @@ static int write_ordered_buffers(spinlock_t * lock,
848 spin_lock(lock); 846 spin_lock(lock);
849 goto loop_next; 847 goto loop_next;
850 } 848 }
849 /* in theory, dirty non-uptodate buffers should never get here,
850 * but the upper layer io error paths still have a few quirks.
851 * Handle them here as gracefully as we can
852 */
853 if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
854 clear_buffer_dirty(bh);
855 ret = -EIO;
856 }
851 if (buffer_dirty(bh)) { 857 if (buffer_dirty(bh)) {
852 list_del_init(&jh->list); 858 list_del_init(&jh->list);
853 list_add(&jh->list, &tmp); 859 list_add(&jh->list, &tmp);
@@ -879,6 +885,19 @@ static int write_ordered_buffers(spinlock_t * lock,
879 if (!buffer_uptodate(bh)) { 885 if (!buffer_uptodate(bh)) {
880 ret = -EIO; 886 ret = -EIO;
881 } 887 }
888 /* ugly interaction with invalidatepage here.
889 * reiserfs_invalidate_page will pin any buffer that has a valid
890 * journal head from an older transaction. If someone else sets
891 * our buffer dirty after we write it in the first loop, and
892 * then someone truncates the page away, nobody will ever write
893 * the buffer. We're safe if we write the page one last time
894 * after freeing the journal header.
895 */
896 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
897 spin_unlock(lock);
898 ll_rw_block(WRITE, 1, &bh);
899 spin_lock(lock);
900 }
882 put_bh(bh); 901 put_bh(bh);
883 cond_resched_lock(lock); 902 cond_resched_lock(lock);
884 } 903 }
@@ -977,6 +996,7 @@ static int flush_commit_list(struct super_block *s,
977 struct reiserfs_journal *journal = SB_JOURNAL(s); 996 struct reiserfs_journal *journal = SB_JOURNAL(s);
978 int barrier = 0; 997 int barrier = 0;
979 int retval = 0; 998 int retval = 0;
999 int write_len;
980 1000
981 reiserfs_check_lock_depth(s, "flush_commit_list"); 1001 reiserfs_check_lock_depth(s, "flush_commit_list");
982 1002
@@ -1018,24 +1038,35 @@ static int flush_commit_list(struct super_block *s,
1018 } 1038 }
1019 1039
1020 if (!list_empty(&jl->j_bh_list)) { 1040 if (!list_empty(&jl->j_bh_list)) {
1041 int ret;
1021 unlock_kernel(); 1042 unlock_kernel();
1022 write_ordered_buffers(&journal->j_dirty_buffers_lock, 1043 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
1023 journal, jl, &jl->j_bh_list); 1044 journal, jl, &jl->j_bh_list);
1045 if (ret < 0 && retval == 0)
1046 retval = ret;
1024 lock_kernel(); 1047 lock_kernel();
1025 } 1048 }
1026 BUG_ON(!list_empty(&jl->j_bh_list)); 1049 BUG_ON(!list_empty(&jl->j_bh_list));
1027 /* 1050 /*
1028 * for the description block and all the log blocks, submit any buffers 1051 * for the description block and all the log blocks, submit any buffers
1029 * that haven't already reached the disk 1052 * that haven't already reached the disk. Try to write at least 256
1053 * log blocks. later on, we will only wait on blocks that correspond
1054 * to this transaction, but while we're unplugging we might as well
1055 * get a chunk of data on there.
1030 */ 1056 */
1031 atomic_inc(&journal->j_async_throttle); 1057 atomic_inc(&journal->j_async_throttle);
1032 for (i = 0; i < (jl->j_len + 1); i++) { 1058 write_len = jl->j_len + 1;
1059 if (write_len < 256)
1060 write_len = 256;
1061 for (i = 0 ; i < write_len ; i++) {
1033 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % 1062 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
1034 SB_ONDISK_JOURNAL_SIZE(s); 1063 SB_ONDISK_JOURNAL_SIZE(s);
1035 tbh = journal_find_get_block(s, bn); 1064 tbh = journal_find_get_block(s, bn);
1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1065 if (tbh) {
1037 ll_rw_block(SWRITE, 1, &tbh); 1066 if (buffer_dirty(tbh))
1038 put_bh(tbh); 1067 ll_rw_block(WRITE, 1, &tbh) ;
1068 put_bh(tbh) ;
1069 }
1039 } 1070 }
1040 atomic_dec(&journal->j_async_throttle); 1071 atomic_dec(&journal->j_async_throttle);
1041 1072
@@ -1818,8 +1849,7 @@ void remove_journal_hash(struct super_block *sb,
1818static void free_journal_ram(struct super_block *p_s_sb) 1849static void free_journal_ram(struct super_block *p_s_sb)
1819{ 1850{
1820 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1851 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1821 reiserfs_kfree(journal->j_current_jl, 1852 kfree(journal->j_current_jl);
1822 sizeof(struct reiserfs_journal_list), p_s_sb);
1823 journal->j_num_lists--; 1853 journal->j_num_lists--;
1824 1854
1825 vfree(journal->j_cnode_free_orig); 1855 vfree(journal->j_cnode_free_orig);
@@ -2093,21 +2123,15 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2093 } 2123 }
2094 trans_id = get_desc_trans_id(desc); 2124 trans_id = get_desc_trans_id(desc);
2095 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2125 /* now we know we've got a good transaction, and it was inside the valid time ranges */
2096 log_blocks = 2126 log_blocks = kmalloc(get_desc_trans_len(desc) *
2097 reiserfs_kmalloc(get_desc_trans_len(desc) * 2127 sizeof(struct buffer_head *), GFP_NOFS);
2098 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb); 2128 real_blocks = kmalloc(get_desc_trans_len(desc) *
2099 real_blocks = 2129 sizeof(struct buffer_head *), GFP_NOFS);
2100 reiserfs_kmalloc(get_desc_trans_len(desc) *
2101 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
2102 if (!log_blocks || !real_blocks) { 2130 if (!log_blocks || !real_blocks) {
2103 brelse(c_bh); 2131 brelse(c_bh);
2104 brelse(d_bh); 2132 brelse(d_bh);
2105 reiserfs_kfree(log_blocks, 2133 kfree(log_blocks);
2106 get_desc_trans_len(desc) * 2134 kfree(real_blocks);
2107 sizeof(struct buffer_head *), p_s_sb);
2108 reiserfs_kfree(real_blocks,
2109 get_desc_trans_len(desc) *
2110 sizeof(struct buffer_head *), p_s_sb);
2111 reiserfs_warning(p_s_sb, 2135 reiserfs_warning(p_s_sb,
2112 "journal-1169: kmalloc failed, unable to mount FS"); 2136 "journal-1169: kmalloc failed, unable to mount FS");
2113 return -1; 2137 return -1;
@@ -2145,12 +2169,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2145 brelse_array(real_blocks, i); 2169 brelse_array(real_blocks, i);
2146 brelse(c_bh); 2170 brelse(c_bh);
2147 brelse(d_bh); 2171 brelse(d_bh);
2148 reiserfs_kfree(log_blocks, 2172 kfree(log_blocks);
2149 get_desc_trans_len(desc) * 2173 kfree(real_blocks);
2150 sizeof(struct buffer_head *), p_s_sb);
2151 reiserfs_kfree(real_blocks,
2152 get_desc_trans_len(desc) *
2153 sizeof(struct buffer_head *), p_s_sb);
2154 return -1; 2174 return -1;
2155 } 2175 }
2156 } 2176 }
@@ -2166,12 +2186,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2166 brelse_array(real_blocks, get_desc_trans_len(desc)); 2186 brelse_array(real_blocks, get_desc_trans_len(desc));
2167 brelse(c_bh); 2187 brelse(c_bh);
2168 brelse(d_bh); 2188 brelse(d_bh);
2169 reiserfs_kfree(log_blocks, 2189 kfree(log_blocks);
2170 get_desc_trans_len(desc) * 2190 kfree(real_blocks);
2171 sizeof(struct buffer_head *), p_s_sb);
2172 reiserfs_kfree(real_blocks,
2173 get_desc_trans_len(desc) *
2174 sizeof(struct buffer_head *), p_s_sb);
2175 return -1; 2191 return -1;
2176 } 2192 }
2177 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, 2193 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
@@ -2193,12 +2209,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2193 get_desc_trans_len(desc) - i); 2209 get_desc_trans_len(desc) - i);
2194 brelse(c_bh); 2210 brelse(c_bh);
2195 brelse(d_bh); 2211 brelse(d_bh);
2196 reiserfs_kfree(log_blocks, 2212 kfree(log_blocks);
2197 get_desc_trans_len(desc) * 2213 kfree(real_blocks);
2198 sizeof(struct buffer_head *), p_s_sb);
2199 reiserfs_kfree(real_blocks,
2200 get_desc_trans_len(desc) *
2201 sizeof(struct buffer_head *), p_s_sb);
2202 return -1; 2214 return -1;
2203 } 2215 }
2204 brelse(real_blocks[i]); 2216 brelse(real_blocks[i]);
@@ -2217,12 +2229,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
2217 journal->j_trans_id = trans_id + 1; 2229 journal->j_trans_id = trans_id + 1;
2218 brelse(c_bh); 2230 brelse(c_bh);
2219 brelse(d_bh); 2231 brelse(d_bh);
2220 reiserfs_kfree(log_blocks, 2232 kfree(log_blocks);
2221 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), 2233 kfree(real_blocks);
2222 p_s_sb);
2223 reiserfs_kfree(real_blocks,
2224 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
2225 p_s_sb);
2226 return 0; 2234 return 0;
2227} 2235}
2228 2236
@@ -2471,14 +2479,8 @@ static int journal_read(struct super_block *p_s_sb)
2471static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 2479static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2472{ 2480{
2473 struct reiserfs_journal_list *jl; 2481 struct reiserfs_journal_list *jl;
2474 retry: 2482 jl = kzalloc(sizeof(struct reiserfs_journal_list),
2475 jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, 2483 GFP_NOFS | __GFP_NOFAIL);
2476 s);
2477 if (!jl) {
2478 yield();
2479 goto retry;
2480 }
2481 memset(jl, 0, sizeof(*jl));
2482 INIT_LIST_HEAD(&jl->j_list); 2484 INIT_LIST_HEAD(&jl->j_list);
2483 INIT_LIST_HEAD(&jl->j_working_list); 2485 INIT_LIST_HEAD(&jl->j_working_list);
2484 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2486 INIT_LIST_HEAD(&jl->j_tail_bh_list);
@@ -2821,6 +2823,9 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2821 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2823 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2822 return 1; 2824 return 1;
2823 } 2825 }
2826 /* protected by the BKL here */
2827 journal->j_len_alloc += new_alloc;
2828 th->t_blocks_allocated += new_alloc ;
2824 return 0; 2829 return 0;
2825} 2830}
2826 2831
@@ -3042,14 +3047,12 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3042 } 3047 }
3043 return th; 3048 return th;
3044 } 3049 }
3045 th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), 3050 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
3046 GFP_NOFS, s);
3047 if (!th) 3051 if (!th)
3048 return NULL; 3052 return NULL;
3049 ret = journal_begin(th, s, nblocks); 3053 ret = journal_begin(th, s, nblocks);
3050 if (ret) { 3054 if (ret) {
3051 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), 3055 kfree(th);
3052 s);
3053 return NULL; 3056 return NULL;
3054 } 3057 }
3055 3058
@@ -3067,8 +3070,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3067 ret = -EIO; 3070 ret = -EIO;
3068 if (th->t_refcount == 0) { 3071 if (th->t_refcount == 0) {
3069 SB_JOURNAL(s)->j_persistent_trans--; 3072 SB_JOURNAL(s)->j_persistent_trans--;
3070 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), 3073 kfree(th);
3071 s);
3072 } 3074 }
3073 return ret; 3075 return ret;
3074} 3076}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8f8d8d01107c..c8123308e060 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -456,7 +456,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
456 /* get memory for composing the entry */ 456 /* get memory for composing the entry */
457 buflen = DEH_SIZE + ROUND_UP(namelen); 457 buflen = DEH_SIZE + ROUND_UP(namelen);
458 if (buflen > sizeof(small_buf)) { 458 if (buflen > sizeof(small_buf)) {
459 buffer = reiserfs_kmalloc(buflen, GFP_NOFS, dir->i_sb); 459 buffer = kmalloc(buflen, GFP_NOFS);
460 if (buffer == 0) 460 if (buffer == 0)
461 return -ENOMEM; 461 return -ENOMEM;
462 } else 462 } else
@@ -490,7 +490,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
490 retval = reiserfs_find_entry(dir, name, namelen, &path, &de); 490 retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
491 if (retval != NAME_NOT_FOUND) { 491 if (retval != NAME_NOT_FOUND) {
492 if (buffer != small_buf) 492 if (buffer != small_buf)
493 reiserfs_kfree(buffer, buflen, dir->i_sb); 493 kfree(buffer);
494 pathrelse(&path); 494 pathrelse(&path);
495 495
496 if (retval == IO_ERROR) { 496 if (retval == IO_ERROR) {
@@ -515,7 +515,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
515 reiserfs_warning(dir->i_sb, 515 reiserfs_warning(dir->i_sb,
516 "reiserfs_add_entry: Congratulations! we have got hash function screwed up"); 516 "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
517 if (buffer != small_buf) 517 if (buffer != small_buf)
518 reiserfs_kfree(buffer, buflen, dir->i_sb); 518 kfree(buffer);
519 pathrelse(&path); 519 pathrelse(&path);
520 return -EBUSY; 520 return -EBUSY;
521 } 521 }
@@ -535,7 +535,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
535 &entry_key); 535 &entry_key);
536 536
537 if (buffer != small_buf) 537 if (buffer != small_buf)
538 reiserfs_kfree(buffer, buflen, dir->i_sb); 538 kfree(buffer);
539 pathrelse(&path); 539 pathrelse(&path);
540 return -EBUSY; 540 return -EBUSY;
541 } 541 }
@@ -546,7 +546,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
546 reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer, 546 reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer,
547 paste_size); 547 paste_size);
548 if (buffer != small_buf) 548 if (buffer != small_buf)
549 reiserfs_kfree(buffer, buflen, dir->i_sb); 549 kfree(buffer);
550 if (retval) { 550 if (retval) {
551 reiserfs_check_path(&path); 551 reiserfs_check_path(&path);
552 return retval; 552 return retval;
@@ -1065,7 +1065,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1065 goto out_failed; 1065 goto out_failed;
1066 } 1066 }
1067 1067
1068 name = reiserfs_kmalloc(item_len, GFP_NOFS, parent_dir->i_sb); 1068 name = kmalloc(item_len, GFP_NOFS);
1069 if (!name) { 1069 if (!name) {
1070 drop_new_inode(inode); 1070 drop_new_inode(inode);
1071 retval = -ENOMEM; 1071 retval = -ENOMEM;
@@ -1079,14 +1079,14 @@ static int reiserfs_symlink(struct inode *parent_dir,
1079 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); 1079 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
1080 if (retval) { 1080 if (retval) {
1081 drop_new_inode(inode); 1081 drop_new_inode(inode);
1082 reiserfs_kfree(name, item_len, parent_dir->i_sb); 1082 kfree(name);
1083 goto out_failed; 1083 goto out_failed;
1084 } 1084 }
1085 1085
1086 retval = 1086 retval =
1087 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), 1087 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
1088 dentry, inode); 1088 dentry, inode);
1089 reiserfs_kfree(name, item_len, parent_dir->i_sb); 1089 kfree(name);
1090 if (retval) { /* reiserfs_new_inode iputs for us */ 1090 if (retval) { /* reiserfs_new_inode iputs for us */
1091 goto out_failed; 1091 goto out_failed;
1092 } 1092 }
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fc2f43c75df4..ef6caed9336b 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -88,7 +88,6 @@ static int show_super(struct seq_file *m, struct super_block *sb)
88 seq_printf(m, "state: \t%s\n" 88 seq_printf(m, "state: \t%s\n"
89 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" 89 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n"
90 "gen. counter: \t%i\n" 90 "gen. counter: \t%i\n"
91 "s_kmallocs: \t%i\n"
92 "s_disk_reads: \t%i\n" 91 "s_disk_reads: \t%i\n"
93 "s_disk_writes: \t%i\n" 92 "s_disk_writes: \t%i\n"
94 "s_fix_nodes: \t%i\n" 93 "s_fix_nodes: \t%i\n"
@@ -128,7 +127,7 @@ static int show_super(struct seq_file *m, struct super_block *sb)
128 "SMALL_TAILS " : "NO_TAILS ", 127 "SMALL_TAILS " : "NO_TAILS ",
129 replay_only(sb) ? "REPLAY_ONLY " : "", 128 replay_only(sb) ? "REPLAY_ONLY " : "",
130 convert_reiserfs(sb) ? "CONV " : "", 129 convert_reiserfs(sb) ? "CONV " : "",
131 atomic_read(&r->s_generation_counter), SF(s_kmallocs), 130 atomic_read(&r->s_generation_counter),
132 SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes), 131 SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes),
133 SF(s_do_balance), SF(s_unneeded_left_neighbor), 132 SF(s_do_balance), SF(s_unneeded_left_neighbor),
134 SF(s_good_search_by_key_reada), SF(s_bmaps), 133 SF(s_good_search_by_key_reada), SF(s_bmaps),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 397d9590c8f2..ef5e5414e7a8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -472,12 +472,6 @@ static void reiserfs_put_super(struct super_block *s)
472 472
473 print_statistics(s); 473 print_statistics(s);
474 474
475 if (REISERFS_SB(s)->s_kmallocs != 0) {
476 reiserfs_warning(s,
477 "vs-2004: reiserfs_put_super: allocated memory left %d",
478 REISERFS_SB(s)->s_kmallocs);
479 }
480
481 if (REISERFS_SB(s)->reserved_blocks != 0) { 475 if (REISERFS_SB(s)->reserved_blocks != 0) {
482 reiserfs_warning(s, 476 reiserfs_warning(s,
483 "green-2005: reiserfs_put_super: reserved blocks left %d", 477 "green-2005: reiserfs_put_super: reserved blocks left %d",
@@ -1131,7 +1125,7 @@ static void handle_attrs(struct super_block *s)
1131 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); 1125 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1132 } 1126 }
1133 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) { 1127 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) {
1134 REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS; 1128 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ATTRS);
1135 } 1129 }
1136} 1130}
1137 1131
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index cc061bfd437b..ffb79c48c5bf 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -368,15 +368,13 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
368 if (d_reclen <= 32) { 368 if (d_reclen <= 32) {
369 local_buf = small_buf; 369 local_buf = small_buf;
370 } else { 370 } else {
371 local_buf = 371 local_buf = kmalloc(d_reclen, GFP_NOFS);
372 reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb);
373 if (!local_buf) { 372 if (!local_buf) {
374 pathrelse(&path_to_entry); 373 pathrelse(&path_to_entry);
375 return -ENOMEM; 374 return -ENOMEM;
376 } 375 }
377 if (item_moved(&tmp_ih, &path_to_entry)) { 376 if (item_moved(&tmp_ih, &path_to_entry)) {
378 reiserfs_kfree(local_buf, d_reclen, 377 kfree(local_buf);
379 inode->i_sb);
380 378
381 /* sigh, must retry. Do this same offset again */ 379 /* sigh, must retry. Do this same offset again */
382 next_pos = d_off; 380 next_pos = d_off;
@@ -399,13 +397,12 @@ static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
399 if (filldir(dirent, local_buf, d_reclen, d_off, d_ino, 397 if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
400 DT_UNKNOWN) < 0) { 398 DT_UNKNOWN) < 0) {
401 if (local_buf != small_buf) { 399 if (local_buf != small_buf) {
402 reiserfs_kfree(local_buf, d_reclen, 400 kfree(local_buf);
403 inode->i_sb);
404 } 401 }
405 goto end; 402 goto end;
406 } 403 }
407 if (local_buf != small_buf) { 404 if (local_buf != small_buf) {
408 reiserfs_kfree(local_buf, d_reclen, inode->i_sb); 405 kfree(local_buf);
409 } 406 }
410 } /* while */ 407 } /* while */
411 408
@@ -1322,109 +1319,44 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1322 return err; 1319 return err;
1323} 1320}
1324 1321
1325static int 1322static int reiserfs_check_acl(struct inode *inode, int mask)
1326__reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
1327 int need_lock)
1328{ 1323{
1329 umode_t mode = inode->i_mode; 1324 struct posix_acl *acl;
1330 1325 int error = -EAGAIN; /* do regular unix permission checks by default */
1331 if (mask & MAY_WRITE) {
1332 /*
1333 * Nobody gets write access to a read-only fs.
1334 */
1335 if (IS_RDONLY(inode) &&
1336 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
1337 return -EROFS;
1338 1326
1339 /* 1327 reiserfs_read_lock_xattr_i(inode);
1340 * Nobody gets write access to an immutable file. 1328 reiserfs_read_lock_xattrs(inode->i_sb);
1341 */
1342 if (IS_IMMUTABLE(inode))
1343 return -EACCES;
1344 }
1345 1329
1346 /* We don't do permission checks on the internal objects. 1330 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1347 * Permissions are determined by the "owning" object. */
1348 if (is_reiserfs_priv_object(inode))
1349 return 0;
1350 1331
1351 if (current->fsuid == inode->i_uid) { 1332 reiserfs_read_unlock_xattrs(inode->i_sb);
1352 mode >>= 6; 1333 reiserfs_read_unlock_xattr_i(inode);
1353#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1354 } else if (reiserfs_posixacl(inode->i_sb) &&
1355 get_inode_sd_version(inode) != STAT_DATA_V1) {
1356 struct posix_acl *acl;
1357
1358 /* ACL can't contain additional permissions if
1359 the ACL_MASK entry is 0 */
1360 if (!(mode & S_IRWXG))
1361 goto check_groups;
1362
1363 if (need_lock) {
1364 reiserfs_read_lock_xattr_i(inode);
1365 reiserfs_read_lock_xattrs(inode->i_sb);
1366 }
1367 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1368 if (need_lock) {
1369 reiserfs_read_unlock_xattrs(inode->i_sb);
1370 reiserfs_read_unlock_xattr_i(inode);
1371 }
1372 if (IS_ERR(acl)) {
1373 if (PTR_ERR(acl) == -ENODATA)
1374 goto check_groups;
1375 return PTR_ERR(acl);
1376 }
1377 1334
1378 if (acl) { 1335 if (acl) {
1379 int err = posix_acl_permission(inode, acl, mask); 1336 if (!IS_ERR(acl)) {
1337 error = posix_acl_permission(inode, acl, mask);
1380 posix_acl_release(acl); 1338 posix_acl_release(acl);
1381 if (err == -EACCES) { 1339 } else if (PTR_ERR(acl) != -ENODATA)
1382 goto check_capabilities; 1340 error = PTR_ERR(acl);
1383 }
1384 return err;
1385 } else {
1386 goto check_groups;
1387 }
1388#endif
1389 } else {
1390 check_groups:
1391 if (in_group_p(inode->i_gid))
1392 mode >>= 3;
1393 } 1341 }
1394 1342
1395 /* 1343 return error;
1396 * If the DACs are ok we don't need any capability check. 1344}
1397 */
1398 if (((mode & mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == mask))
1399 return 0;
1400 1345
1401 check_capabilities: 1346int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1347{
1402 /* 1348 /*
1403 * Read/write DACs are always overridable. 1349 * We don't do permission checks on the internal objects.
1404 * Executable DACs are overridable if at least one exec bit is set. 1350 * Permissions are determined by the "owning" object.
1405 */ 1351 */
1406 if (!(mask & MAY_EXEC) || 1352 if (is_reiserfs_priv_object(inode))
1407 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) 1353 return 0;
1408 if (capable(CAP_DAC_OVERRIDE))
1409 return 0;
1410 1354
1411 /* 1355 /*
1412 * Searching includes executable on directories, else just read. 1356 * Stat data v1 doesn't support ACLs.
1413 */ 1357 */
1414 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 1358 if (get_inode_sd_version(inode) == STAT_DATA_V1)
1415 if (capable(CAP_DAC_READ_SEARCH)) 1359 return generic_permission(inode, mask, NULL);
1416 return 0; 1360 else
1417 1361 return generic_permission(inode, mask, reiserfs_check_acl);
1418 return -EACCES;
1419}
1420
1421int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1422{
1423 return __reiserfs_permission(inode, mask, nd, 1);
1424}
1425
1426int
1427reiserfs_permission_locked(struct inode *inode, int mask, struct nameidata *nd)
1428{
1429 return __reiserfs_permission(inode, mask, nd, 0);
1430} 1362}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index c6c33e15143a..0424d06b147e 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -209,6 +209,8 @@ init_cache:
209 ctl.valid = 1; 209 ctl.valid = 1;
210read_really: 210read_really:
211 result = server->ops->readdir(filp, dirent, filldir, &ctl); 211 result = server->ops->readdir(filp, dirent, filldir, &ctl);
212 if (result == -ERESTARTSYS && page)
213 ClearPageUptodate(page);
212 if (ctl.idx == -1) 214 if (ctl.idx == -1)
213 goto invalid_cache; /* retry */ 215 goto invalid_cache; /* retry */
214 ctl.head.end = ctl.fpos - 1; 216 ctl.head.end = ctl.fpos - 1;
@@ -217,7 +219,8 @@ finished:
217 if (page) { 219 if (page) {
218 cache->head = ctl.head; 220 cache->head = ctl.head;
219 kunmap(page); 221 kunmap(page);
220 SetPageUptodate(page); 222 if (result != -ERESTARTSYS)
223 SetPageUptodate(page);
221 unlock_page(page); 224 unlock_page(page);
222 page_cache_release(page); 225 page_cache_release(page);
223 } 226 }
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 4fae57d9d115..201049ac8a96 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -579,10 +579,9 @@ static void udf_table_free_blocks(struct super_block * sb,
579 { 579 {
580 loffset = nextoffset; 580 loffset = nextoffset;
581 aed->lengthAllocDescs = cpu_to_le32(adsize); 581 aed->lengthAllocDescs = cpu_to_le32(adsize);
582 if (obh) 582 sptr = UDF_I_DATA(inode) + nextoffset -
583 sptr = UDF_I_DATA(inode) + nextoffset - udf_file_entry_alloc_offset(inode) + UDF_I_LENEATTR(inode) - adsize; 583 udf_file_entry_alloc_offset(inode) +
584 else 584 UDF_I_LENEATTR(inode) - adsize;
585 sptr = obh->b_data + nextoffset - adsize;
586 dptr = nbh->b_data + sizeof(struct allocExtDesc); 585 dptr = nbh->b_data + sizeof(struct allocExtDesc);
587 memcpy(dptr, sptr, adsize); 586 memcpy(dptr, sptr, adsize);
588 nextoffset = sizeof(struct allocExtDesc) + adsize; 587 nextoffset = sizeof(struct allocExtDesc) + adsize;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index ca732e79c48b..ab9a7629d23e 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -296,7 +296,7 @@ static struct dentry *
296udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 296udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
297{ 297{
298 struct inode *inode = NULL; 298 struct inode *inode = NULL;
299 struct fileIdentDesc cfi, *fi; 299 struct fileIdentDesc cfi;
300 struct udf_fileident_bh fibh; 300 struct udf_fileident_bh fibh;
301 301
302 if (dentry->d_name.len > UDF_NAME_LEN-2) 302 if (dentry->d_name.len > UDF_NAME_LEN-2)
@@ -318,7 +318,7 @@ udf_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
318 else 318 else
319#endif /* UDF_RECOVERY */ 319#endif /* UDF_RECOVERY */
320 320
321 if ((fi = udf_find_entry(dir, dentry, &fibh, &cfi))) 321 if (udf_find_entry(dir, dentry, &fibh, &cfi))
322 { 322 {
323 if (fibh.sbh != fibh.ebh) 323 if (fibh.sbh != fibh.ebh)
324 udf_release_data(fibh.ebh); 324 udf_release_data(fibh.ebh);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e0c04e36a051..3c3f62ce2ad9 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -376,7 +376,7 @@ out:
376 * This function gets the block which contains the fragment. 376 * This function gets the block which contains the fragment.
377 */ 377 */
378 378
379static int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) 379int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
380{ 380{
381 struct super_block * sb = inode->i_sb; 381 struct super_block * sb = inode->i_sb;
382 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; 382 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index d4aacee593ff..e9055ef7f5ac 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -388,7 +388,8 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
388/* 388/*
389 * Read on-disk structures associated with cylinder groups 389 * Read on-disk structures associated with cylinder groups
390 */ 390 */
391static int ufs_read_cylinder_structures (struct super_block *sb) { 391static int ufs_read_cylinder_structures (struct super_block *sb)
392{
392 struct ufs_sb_info * sbi = UFS_SB(sb); 393 struct ufs_sb_info * sbi = UFS_SB(sb);
393 struct ufs_sb_private_info * uspi; 394 struct ufs_sb_private_info * uspi;
394 struct ufs_super_block *usb; 395 struct ufs_super_block *usb;
@@ -415,6 +416,7 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
415 base = space = kmalloc(size, GFP_KERNEL); 416 base = space = kmalloc(size, GFP_KERNEL);
416 if (!base) 417 if (!base)
417 goto failed; 418 goto failed;
419 sbi->s_csp = (struct ufs_csum *)space;
418 for (i = 0; i < blks; i += uspi->s_fpb) { 420 for (i = 0; i < blks; i += uspi->s_fpb) {
419 size = uspi->s_bsize; 421 size = uspi->s_bsize;
420 if (i + uspi->s_fpb > blks) 422 if (i + uspi->s_fpb > blks)
@@ -430,7 +432,6 @@ static int ufs_read_cylinder_structures (struct super_block *sb) {
430 goto failed; 432 goto failed;
431 433
432 ubh_ubhcpymem (space, ubh, size); 434 ubh_ubhcpymem (space, ubh, size);
433 sbi->s_csp[ufs_fragstoblks(i)]=(struct ufs_csum *)space;
434 435
435 space += size; 436 space += size;
436 ubh_brelse (ubh); 437 ubh_brelse (ubh);
@@ -486,7 +487,8 @@ failed:
486 * Put on-disk structures associated with cylinder groups and 487 * Put on-disk structures associated with cylinder groups and
487 * write them back to disk 488 * write them back to disk
488 */ 489 */
489static void ufs_put_cylinder_structures (struct super_block *sb) { 490static void ufs_put_cylinder_structures (struct super_block *sb)
491{
490 struct ufs_sb_info * sbi = UFS_SB(sb); 492 struct ufs_sb_info * sbi = UFS_SB(sb);
491 struct ufs_sb_private_info * uspi; 493 struct ufs_sb_private_info * uspi;
492 struct ufs_buffer_head * ubh; 494 struct ufs_buffer_head * ubh;
@@ -499,7 +501,7 @@ static void ufs_put_cylinder_structures (struct super_block *sb) {
499 501
500 size = uspi->s_cssize; 502 size = uspi->s_cssize;
501 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 503 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
502 base = space = (char*) sbi->s_csp[0]; 504 base = space = (char*) sbi->s_csp;
503 for (i = 0; i < blks; i += uspi->s_fpb) { 505 for (i = 0; i < blks; i += uspi->s_fpb) {
504 size = uspi->s_bsize; 506 size = uspi->s_bsize;
505 if (i + uspi->s_fpb > blks) 507 if (i + uspi->s_fpb > blks)
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 61d2e35012a4..02e86291ef8a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -29,6 +29,11 @@
29 * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr> 29 * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
30 */ 30 */
31 31
32/*
33 * Modified to avoid infinite loop on 2006 by
34 * Evgeniy Dushistov <dushistov@mail.ru>
35 */
36
32#include <linux/errno.h> 37#include <linux/errno.h>
33#include <linux/fs.h> 38#include <linux/fs.h>
34#include <linux/ufs_fs.h> 39#include <linux/ufs_fs.h>
@@ -65,19 +70,16 @@
65#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift) 70#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
66#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift) 71#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
67 72
68#define DATA_BUFFER_USED(bh) \
69 (atomic_read(&bh->b_count)>1 || buffer_locked(bh))
70 73
71static int ufs_trunc_direct (struct inode * inode) 74static int ufs_trunc_direct (struct inode * inode)
72{ 75{
73 struct ufs_inode_info *ufsi = UFS_I(inode); 76 struct ufs_inode_info *ufsi = UFS_I(inode);
74 struct super_block * sb; 77 struct super_block * sb;
75 struct ufs_sb_private_info * uspi; 78 struct ufs_sb_private_info * uspi;
76 struct buffer_head * bh;
77 __fs32 * p; 79 __fs32 * p;
78 unsigned frag1, frag2, frag3, frag4, block1, block2; 80 unsigned frag1, frag2, frag3, frag4, block1, block2;
79 unsigned frag_to_free, free_count; 81 unsigned frag_to_free, free_count;
80 unsigned i, j, tmp; 82 unsigned i, tmp;
81 int retry; 83 int retry;
82 84
83 UFSD(("ENTER\n")) 85 UFSD(("ENTER\n"))
@@ -117,15 +119,7 @@ static int ufs_trunc_direct (struct inode * inode)
117 ufs_panic (sb, "ufs_trunc_direct", "internal error"); 119 ufs_panic (sb, "ufs_trunc_direct", "internal error");
118 frag1 = ufs_fragnum (frag1); 120 frag1 = ufs_fragnum (frag1);
119 frag2 = ufs_fragnum (frag2); 121 frag2 = ufs_fragnum (frag2);
120 for (j = frag1; j < frag2; j++) { 122
121 bh = sb_find_get_block (sb, tmp + j);
122 if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
123 retry = 1;
124 brelse (bh);
125 goto next1;
126 }
127 bforget (bh);
128 }
129 inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift; 123 inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift;
130 mark_inode_dirty(inode); 124 mark_inode_dirty(inode);
131 ufs_free_fragments (inode, tmp + frag1, frag2 - frag1); 125 ufs_free_fragments (inode, tmp + frag1, frag2 - frag1);
@@ -140,15 +134,7 @@ next1:
140 tmp = fs32_to_cpu(sb, *p); 134 tmp = fs32_to_cpu(sb, *p);
141 if (!tmp) 135 if (!tmp)
142 continue; 136 continue;
143 for (j = 0; j < uspi->s_fpb; j++) { 137
144 bh = sb_find_get_block(sb, tmp + j);
145 if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
146 retry = 1;
147 brelse (bh);
148 goto next2;
149 }
150 bforget (bh);
151 }
152 *p = 0; 138 *p = 0;
153 inode->i_blocks -= uspi->s_nspb; 139 inode->i_blocks -= uspi->s_nspb;
154 mark_inode_dirty(inode); 140 mark_inode_dirty(inode);
@@ -162,7 +148,6 @@ next1:
162 frag_to_free = tmp; 148 frag_to_free = tmp;
163 free_count = uspi->s_fpb; 149 free_count = uspi->s_fpb;
164 } 150 }
165next2:;
166 } 151 }
167 152
168 if (free_count > 0) 153 if (free_count > 0)
@@ -179,15 +164,7 @@ next2:;
179 if (!tmp ) 164 if (!tmp )
180 ufs_panic(sb, "ufs_truncate_direct", "internal error"); 165 ufs_panic(sb, "ufs_truncate_direct", "internal error");
181 frag4 = ufs_fragnum (frag4); 166 frag4 = ufs_fragnum (frag4);
182 for (j = 0; j < frag4; j++) { 167
183 bh = sb_find_get_block (sb, tmp + j);
184 if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
185 retry = 1;
186 brelse (bh);
187 goto next1;
188 }
189 bforget (bh);
190 }
191 *p = 0; 168 *p = 0;
192 inode->i_blocks -= frag4 << uspi->s_nspfshift; 169 inode->i_blocks -= frag4 << uspi->s_nspfshift;
193 mark_inode_dirty(inode); 170 mark_inode_dirty(inode);
@@ -204,9 +181,8 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
204 struct super_block * sb; 181 struct super_block * sb;
205 struct ufs_sb_private_info * uspi; 182 struct ufs_sb_private_info * uspi;
206 struct ufs_buffer_head * ind_ubh; 183 struct ufs_buffer_head * ind_ubh;
207 struct buffer_head * bh;
208 __fs32 * ind; 184 __fs32 * ind;
209 unsigned indirect_block, i, j, tmp; 185 unsigned indirect_block, i, tmp;
210 unsigned frag_to_free, free_count; 186 unsigned frag_to_free, free_count;
211 int retry; 187 int retry;
212 188
@@ -238,15 +214,7 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
238 tmp = fs32_to_cpu(sb, *ind); 214 tmp = fs32_to_cpu(sb, *ind);
239 if (!tmp) 215 if (!tmp)
240 continue; 216 continue;
241 for (j = 0; j < uspi->s_fpb; j++) { 217
242 bh = sb_find_get_block(sb, tmp + j);
243 if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *ind)) {
244 retry = 1;
245 brelse (bh);
246 goto next;
247 }
248 bforget (bh);
249 }
250 *ind = 0; 218 *ind = 0;
251 ubh_mark_buffer_dirty(ind_ubh); 219 ubh_mark_buffer_dirty(ind_ubh);
252 if (free_count == 0) { 220 if (free_count == 0) {
@@ -261,7 +229,6 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
261 } 229 }
262 inode->i_blocks -= uspi->s_nspb; 230 inode->i_blocks -= uspi->s_nspb;
263 mark_inode_dirty(inode); 231 mark_inode_dirty(inode);
264next:;
265 } 232 }
266 233
267 if (free_count > 0) { 234 if (free_count > 0) {
@@ -430,9 +397,7 @@ void ufs_truncate (struct inode * inode)
430 struct ufs_inode_info *ufsi = UFS_I(inode); 397 struct ufs_inode_info *ufsi = UFS_I(inode);
431 struct super_block * sb; 398 struct super_block * sb;
432 struct ufs_sb_private_info * uspi; 399 struct ufs_sb_private_info * uspi;
433 struct buffer_head * bh; 400 int retry;
434 unsigned offset;
435 int err, retry;
436 401
437 UFSD(("ENTER\n")) 402 UFSD(("ENTER\n"))
438 sb = inode->i_sb; 403 sb = inode->i_sb;
@@ -442,6 +407,9 @@ void ufs_truncate (struct inode * inode)
442 return; 407 return;
443 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 408 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
444 return; 409 return;
410
411 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
412
445 lock_kernel(); 413 lock_kernel();
446 while (1) { 414 while (1) {
447 retry = ufs_trunc_direct(inode); 415 retry = ufs_trunc_direct(inode);
@@ -457,15 +425,7 @@ void ufs_truncate (struct inode * inode)
457 blk_run_address_space(inode->i_mapping); 425 blk_run_address_space(inode->i_mapping);
458 yield(); 426 yield();
459 } 427 }
460 offset = inode->i_size & uspi->s_fshift; 428
461 if (offset) {
462 bh = ufs_bread (inode, inode->i_size >> uspi->s_fshift, 0, &err);
463 if (bh) {
464 memset (bh->b_data + offset, 0, uspi->s_fsize - offset);
465 mark_buffer_dirty (bh);
466 brelse (bh);
467 }
468 }
469 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 429 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
470 ufsi->i_lastfrag = DIRECT_FRAGMENT; 430 ufsi->i_lastfrag = DIRECT_FRAGMENT;
471 unlock_kernel(); 431 unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 120626789406..9892268e3005 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1462,4 +1462,5 @@ struct address_space_operations linvfs_aops = {
1462 .commit_write = generic_commit_write, 1462 .commit_write = generic_commit_write,
1463 .bmap = linvfs_bmap, 1463 .bmap = linvfs_bmap,
1464 .direct_IO = linvfs_direct_IO, 1464 .direct_IO = linvfs_direct_IO,
1465 .migratepage = buffer_migrate_page,
1465}; 1466};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e44b7c1a3a36..bfb4f2917bb6 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -822,6 +822,13 @@ xfs_buf_rele(
822 822
823 XB_TRACE(bp, "rele", bp->b_relse); 823 XB_TRACE(bp, "rele", bp->b_relse);
824 824
825 if (unlikely(!hash)) {
826 ASSERT(!bp->b_relse);
827 if (atomic_dec_and_test(&bp->b_hold))
828 xfs_buf_free(bp);
829 return;
830 }
831
825 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { 832 if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
826 if (bp->b_relse) { 833 if (bp->b_relse) {
827 atomic_inc(&bp->b_hold); 834 atomic_inc(&bp->b_hold);
@@ -1514,6 +1521,7 @@ xfs_mapping_buftarg(
1514 struct address_space *mapping; 1521 struct address_space *mapping;
1515 static struct address_space_operations mapping_aops = { 1522 static struct address_space_operations mapping_aops = {
1516 .sync_page = block_sync_page, 1523 .sync_page = block_sync_page,
1524 .migratepage = fail_migrate_page,
1517 }; 1525 };
1518 1526
1519 inode = new_inode(bdev->bd_inode->i_sb); 1527 inode = new_inode(bdev->bd_inode->i_sb);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 76c6df34d0db..eda7919b70a1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -262,6 +262,31 @@ has_fs_struct(struct task_struct *task)
262 return (task->fs != init_task.fs); 262 return (task->fs != init_task.fs);
263} 263}
264 264
265STATIC inline void
266cleanup_inode(
267 vnode_t *dvp,
268 vnode_t *vp,
269 struct dentry *dentry,
270 int mode)
271{
272 struct dentry teardown = {};
273 int err2;
274
275 /* Oh, the horror.
276 * If we can't add the ACL or we fail in
277 * linvfs_init_security we must back out.
278 * ENOSPC can hit here, among other things.
279 */
280 teardown.d_inode = LINVFS_GET_IP(vp);
281 teardown.d_name = dentry->d_name;
282
283 if (S_ISDIR(mode))
284 VOP_RMDIR(dvp, &teardown, NULL, err2);
285 else
286 VOP_REMOVE(dvp, &teardown, NULL, err2);
287 VN_RELE(vp);
288}
289
265STATIC int 290STATIC int
266linvfs_mknod( 291linvfs_mknod(
267 struct inode *dir, 292 struct inode *dir,
@@ -316,30 +341,19 @@ linvfs_mknod(
316 } 341 }
317 342
318 if (!error) 343 if (!error)
344 {
319 error = linvfs_init_security(vp, dir); 345 error = linvfs_init_security(vp, dir);
346 if (error)
347 cleanup_inode(dvp, vp, dentry, mode);
348 }
320 349
321 if (default_acl) { 350 if (default_acl) {
322 if (!error) { 351 if (!error) {
323 error = _ACL_INHERIT(vp, &va, default_acl); 352 error = _ACL_INHERIT(vp, &va, default_acl);
324 if (!error) { 353 if (!error)
325 VMODIFY(vp); 354 VMODIFY(vp);
326 } else { 355 else
327 struct dentry teardown = {}; 356 cleanup_inode(dvp, vp, dentry, mode);
328 int err2;
329
330 /* Oh, the horror.
331 * If we can't add the ACL we must back out.
332 * ENOSPC can hit here, among other things.
333 */
334 teardown.d_inode = ip = LINVFS_GET_IP(vp);
335 teardown.d_name = dentry->d_name;
336
337 if (S_ISDIR(mode))
338 VOP_RMDIR(dvp, &teardown, NULL, err2);
339 else
340 VOP_REMOVE(dvp, &teardown, NULL, err2);
341 VN_RELE(vp);
342 }
343 } 357 }
344 _ACL_FREE(default_acl); 358 _ACL_FREE(default_acl);
345 } 359 }