aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKumar Gala <galak@kernel.crashing.org>2006-02-28 12:25:22 -0500
committerKumar Gala <galak@kernel.crashing.org>2006-02-28 12:25:22 -0500
commit9585da3729e7e27bf22818625c10ac6c64ebb609 (patch)
tree8c8d46f7f2cc933fdf75a6a75cf593a63f7b8717 /fs
parent8080d5497146d5d27d9e8e78229d1adc7fe280cf (diff)
parent6749c5507388f3fc3719f57a54b540ee83f6661a (diff)
Merge branch 'master' of git+ssh://galak@master.kernel.org/pub/scm/linux/kernel/git/galak/powerpc
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/binfmt_elf.c5
-rw-r--r--fs/cifs/cifssmb.c7
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/file.c14
-rw-r--r--fs/compat.c37
-rw-r--r--fs/compat_ioctl.c13
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/xattr.c6
-rw-r--r--fs/fuse/dev.c6
-rw-r--r--fs/fuse/file.c11
-rw-r--r--fs/jbd/checkpoint.c418
-rw-r--r--fs/jbd/commit.c3
-rw-r--r--fs/lockd/clntlock.c27
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/namei.c8
-rw-r--r--fs/ntfs/ChangeLog36
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c18
-rw-r--r--fs/ntfs/file.c10
-rw-r--r--fs/ntfs/inode.c49
-rw-r--r--fs/ntfs/layout.h25
-rw-r--r--fs/ntfs/mft.c8
-rw-r--r--fs/ntfs/ntfs.h10
-rw-r--r--fs/ntfs/super.c197
-rw-r--r--fs/ntfs/upcase.c10
-rw-r--r--fs/ntfs/volume.h28
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c12
-rw-r--r--fs/ocfs2/dlm/dlmlock.c25
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c7
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c42
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/root.c17
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/select.c32
-rw-r--r--fs/stat.c22
-rw-r--r--fs/super.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/quota/xfs_qm.c11
-rw-r--r--fs/xfs/xfs_rtalloc.c29
46 files changed, 707 insertions, 495 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5250c428fc..ef33865491 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -66,7 +66,7 @@ static match_table_t tokens = {
66 {Opt_afid, "afid=%u"}, 66 {Opt_afid, "afid=%u"},
67 {Opt_rfdno, "rfdno=%u"}, 67 {Opt_rfdno, "rfdno=%u"},
68 {Opt_wfdno, "wfdno=%u"}, 68 {Opt_wfdno, "wfdno=%u"},
69 {Opt_debug, "debug=%u"}, 69 {Opt_debug, "debug=%x"},
70 {Opt_name, "name=%s"}, 70 {Opt_name, "name=%s"},
71 {Opt_remotename, "aname=%s"}, 71 {Opt_remotename, "aname=%s"},
72 {Opt_unix, "proto=unix"}, 72 {Opt_unix, "proto=unix"},
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b117a4412..c2eac2a50b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -938,6 +938,11 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
938 kfree(elf_interpreter); 938 kfree(elf_interpreter);
939 } else { 939 } else {
940 elf_entry = loc->elf_ex.e_entry; 940 elf_entry = loc->elf_ex.e_entry;
941 if (BAD_ADDR(elf_entry)) {
942 send_sig(SIGSEGV, current, 0);
943 retval = -ENOEXEC; /* Nobody gets to see this, but.. */
944 goto out_free_dentry;
945 }
941 } 946 }
942 947
943 kfree(elf_phdata); 948 kfree(elf_phdata);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 217323b0c8..b41e8b3796 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1048,13 +1048,14 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon,
1048 cifs_small_buf_release(iov[0].iov_base); 1048 cifs_small_buf_release(iov[0].iov_base);
1049 else if(resp_buf_type == CIFS_LARGE_BUFFER) 1049 else if(resp_buf_type == CIFS_LARGE_BUFFER)
1050 cifs_buf_release(iov[0].iov_base); 1050 cifs_buf_release(iov[0].iov_base);
1051 } else /* return buffer to caller to free */ /* BB FIXME how do we tell caller if it is not a large buffer */ { 1051 } else if(resp_buf_type != CIFS_NO_BUFFER) {
1052 *buf = iov[0].iov_base; 1052 /* return buffer to caller to free */
1053 *buf = iov[0].iov_base;
1053 if(resp_buf_type == CIFS_SMALL_BUFFER) 1054 if(resp_buf_type == CIFS_SMALL_BUFFER)
1054 *pbuf_type = CIFS_SMALL_BUFFER; 1055 *pbuf_type = CIFS_SMALL_BUFFER;
1055 else if(resp_buf_type == CIFS_LARGE_BUFFER) 1056 else if(resp_buf_type == CIFS_LARGE_BUFFER)
1056 *pbuf_type = CIFS_LARGE_BUFFER; 1057 *pbuf_type = CIFS_LARGE_BUFFER;
1057 } 1058 } /* else no valid buffer on return - leave as null */
1058 1059
1059 /* Note: On -EAGAIN error only caller can retry on handle based calls 1060 /* Note: On -EAGAIN error only caller can retry on handle based calls
1060 since file handle passed in no longer valid */ 1061 since file handle passed in no longer valid */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e488603fb1..ef5ae6f93c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1795,10 +1795,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1795 conjunction with 52K kvec constraint on arch with 4K 1795 conjunction with 52K kvec constraint on arch with 4K
1796 page size */ 1796 page size */
1797 1797
1798 if(cifs_sb->rsize < PAGE_CACHE_SIZE) { 1798 if(cifs_sb->rsize < 2048) {
1799 cifs_sb->rsize = PAGE_CACHE_SIZE; 1799 cifs_sb->rsize = 2048;
1800 /* Windows ME does this */ 1800 /* Windows ME may prefer this */
1801 cFYI(1,("Attempt to set readsize for mount to less than one page (4096)")); 1801 cFYI(1,("readsize set to minimum 2048"));
1802 } 1802 }
1803 cifs_sb->mnt_uid = volume_info.linux_uid; 1803 cifs_sb->mnt_uid = volume_info.linux_uid;
1804 cifs_sb->mnt_gid = volume_info.linux_gid; 1804 cifs_sb->mnt_gid = volume_info.linux_gid;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index d17c97d07c..675bd25682 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1442,13 +1442,15 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1442 &bytes_read, &smb_read_data, 1442 &bytes_read, &smb_read_data,
1443 &buf_type); 1443 &buf_type);
1444 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 1444 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1445 if (copy_to_user(current_offset,
1446 smb_read_data + 4 /* RFC1001 hdr */
1447 + le16_to_cpu(pSMBr->DataOffset),
1448 bytes_read)) {
1449 rc = -EFAULT;
1450 }
1451 if (smb_read_data) { 1445 if (smb_read_data) {
1446 if (copy_to_user(current_offset,
1447 smb_read_data +
1448 4 /* RFC1001 length field */ +
1449 le16_to_cpu(pSMBr->DataOffset),
1450 bytes_read)) {
1451 rc = -EFAULT;
1452 }
1453
1452 if(buf_type == CIFS_SMALL_BUFFER) 1454 if(buf_type == CIFS_SMALL_BUFFER)
1453 cifs_small_buf_release(smb_read_data); 1455 cifs_small_buf_release(smb_read_data);
1454 else if(buf_type == CIFS_LARGE_BUFFER) 1456 else if(buf_type == CIFS_LARGE_BUFFER)
diff --git a/fs/compat.c b/fs/compat.c
index 70c5af4cc2..5333c7d742 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1751,11 +1751,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1751 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1752 1752
1753 if (tvp) { 1753 if (tvp) {
1754 struct compat_timeval rtv;
1755
1754 if (current->personality & STICKY_TIMEOUTS) 1756 if (current->personality & STICKY_TIMEOUTS)
1755 goto sticky; 1757 goto sticky;
1756 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 1758 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1757 tv.tv_sec = timeout; 1759 rtv.tv_sec = timeout;
1758 if (copy_to_user(tvp, &tv, sizeof(tv))) { 1760 if (compat_timeval_compare(&rtv, &tv) >= 0)
1761 rtv = tv;
1762 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1759sticky: 1763sticky:
1760 /* 1764 /*
1761 * If an application puts its timeval in read-only 1765 * If an application puts its timeval in read-only
@@ -1822,13 +1826,17 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1822 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); 1826 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1823 1827
1824 if (tsp && !(current->personality & STICKY_TIMEOUTS)) { 1828 if (tsp && !(current->personality & STICKY_TIMEOUTS)) {
1825 ts.tv_sec += timeout / HZ; 1829 struct compat_timespec rts;
1826 ts.tv_nsec += (timeout % HZ) * (1000000000/HZ); 1830
1827 if (ts.tv_nsec >= 1000000000) { 1831 rts.tv_sec = timeout / HZ;
1828 ts.tv_sec++; 1832 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1829 ts.tv_nsec -= 1000000000; 1833 if (rts.tv_nsec >= NSEC_PER_SEC) {
1834 rts.tv_sec++;
1835 rts.tv_nsec -= NSEC_PER_SEC;
1830 } 1836 }
1831 (void)copy_to_user(tsp, &ts, sizeof(ts)); 1837 if (compat_timespec_compare(&rts, &ts) >= 0)
1838 rts = ts;
1839 copy_to_user(tsp, &rts, sizeof(rts));
1832 } 1840 }
1833 1841
1834 if (ret == -ERESTARTNOHAND) { 1842 if (ret == -ERESTARTNOHAND) {
@@ -1918,12 +1926,17 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1918 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1926 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1919 1927
1920 if (tsp && timeout >= 0) { 1928 if (tsp && timeout >= 0) {
1929 struct compat_timespec rts;
1930
1921 if (current->personality & STICKY_TIMEOUTS) 1931 if (current->personality & STICKY_TIMEOUTS)
1922 goto sticky; 1932 goto sticky;
1923 /* Yes, we know it's actually an s64, but it's also positive. */ 1933 /* Yes, we know it's actually an s64, but it's also positive. */
1924 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 1934 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1925 ts.tv_sec = timeout; 1935 1000;
1926 if (copy_to_user(tsp, &ts, sizeof(ts))) { 1936 rts.tv_sec = timeout;
1937 if (compat_timespec_compare(&rts, &ts) >= 0)
1938 rts = ts;
1939 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1927sticky: 1940sticky:
1928 /* 1941 /*
1929 * If an application puts its timeval in read-only 1942 * If an application puts its timeval in read-only
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 057e60217f..537ac70edf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2531,18 +2531,9 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
2531 val32 = kval; 2531 val32 = kval;
2532 return put_user(val32, (unsigned int __user *)arg); 2532 return put_user(val32, (unsigned int __user *)arg);
2533 case RTC_IRQP_SET32: 2533 case RTC_IRQP_SET32:
2534 return sys_ioctl(fd, RTC_IRQP_SET, arg);
2534 case RTC_EPOCH_SET32: 2535 case RTC_EPOCH_SET32:
2535 ret = get_user(val32, (unsigned int __user *)arg); 2536 return sys_ioctl(fd, RTC_EPOCH_SET, arg);
2536 if (ret)
2537 return ret;
2538 kval = val32;
2539
2540 set_fs(KERNEL_DS);
2541 ret = sys_ioctl(fd, (cmd == RTC_IRQP_SET32) ?
2542 RTC_IRQP_SET : RTC_EPOCH_SET,
2543 (unsigned long)&kval);
2544 set_fs(oldfs);
2545 return ret;
2546 default: 2537 default:
2547 /* unreached */ 2538 /* unreached */
2548 return -ENOIOCTLCMD; 2539 return -ENOIOCTLCMD;
diff --git a/fs/exec.c b/fs/exec.c
index 055378d251..0e1c95074d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm)
1403 do_each_thread(g,p) { 1403 do_each_thread(g,p) {
1404 if (mm == p->mm && p != tsk && 1404 if (mm == p->mm && p != tsk &&
1405 p->ptrace && p->parent->mm == mm) { 1405 p->ptrace && p->parent->mm == mm) {
1406 __ptrace_unlink(p); 1406 __ptrace_detach(p, 0);
1407 } 1407 }
1408 } while_each_thread(g,p); 1408 } while_each_thread(g,p);
1409 write_unlock_irq(&tasklist_lock); 1409 write_unlock_irq(&tasklist_lock);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index a2ca3107d4..86ae8e93ad 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode)
792 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); 792 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
793 get_bh(bh); 793 get_bh(bh);
794 bforget(bh); 794 bforget(bh);
795 unlock_buffer(bh);
795 } else { 796 } else {
796 HDR(bh)->h_refcount = cpu_to_le32( 797 HDR(bh)->h_refcount = cpu_to_le32(
797 le32_to_cpu(HDR(bh)->h_refcount) - 1); 798 le32_to_cpu(HDR(bh)->h_refcount) - 1);
798 if (ce) 799 if (ce)
799 mb_cache_entry_release(ce); 800 mb_cache_entry_release(ce);
801 ea_bdebug(bh, "refcount now=%d",
802 le32_to_cpu(HDR(bh)->h_refcount));
803 unlock_buffer(bh);
800 mark_buffer_dirty(bh); 804 mark_buffer_dirty(bh);
801 if (IS_SYNC(inode)) 805 if (IS_SYNC(inode))
802 sync_dirty_buffer(bh); 806 sync_dirty_buffer(bh);
803 DQUOT_FREE_BLOCK(inode, 1); 807 DQUOT_FREE_BLOCK(inode, 1);
804 } 808 }
805 ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
806 unlock_buffer(bh);
807 EXT2_I(inode)->i_file_acl = 0; 809 EXT2_I(inode)->i_file_acl = 0;
808 810
809cleanup: 811cleanup:
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f556a0d5c0..0c9a2ee54c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset)
66 sigprocmask(SIG_SETMASK, oldset, NULL); 66 sigprocmask(SIG_SETMASK, oldset, NULL);
67} 67}
68 68
69/*
70 * Reset request, so that it can be reused
71 *
72 * The caller must be _very_ careful to make sure, that it is holding
73 * the only reference to req
74 */
69void fuse_reset_request(struct fuse_req *req) 75void fuse_reset_request(struct fuse_req *req)
70{ 76{
71 int preallocated = req->preallocated; 77 int preallocated = req->preallocated;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 296351615b..6f05379b0a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
116/* Special case for failed iget in CREATE */ 116/* Special case for failed iget in CREATE */
117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) 117static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
118{ 118{
119 u64 nodeid = req->in.h.nodeid; 119 /* If called from end_io_requests(), req has more than one
120 fuse_reset_request(req); 120 reference and fuse_reset_request() cannot work */
121 fuse_send_forget(fc, req, nodeid, 1); 121 if (fc->connected) {
122 u64 nodeid = req->in.h.nodeid;
123 fuse_reset_request(req);
124 fuse_send_forget(fc, req, nodeid, 1);
125 } else
126 fuse_put_request(fc, req);
122} 127}
123 128
124void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, 129void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff,
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index e6265a0b56..543ed543d1 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,75 +24,29 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction checkpoint list. 27 * Unlink a buffer from a transaction.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31
32static void __buffer_unlink_first(struct journal_head *jh) 32static inline void __buffer_unlink(struct journal_head *jh)
33{ 33{
34 transaction_t *transaction; 34 transaction_t *transaction;
35 35
36 transaction = jh->b_cp_transaction; 36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
37 38
38 jh->b_cpnext->b_cpprev = jh->b_cpprev; 39 jh->b_cpnext->b_cpprev = jh->b_cpprev;
39 jh->b_cpprev->b_cpnext = jh->b_cpnext; 40 jh->b_cpprev->b_cpnext = jh->b_cpnext;
40 if (transaction->t_checkpoint_list == jh) { 41 if (transaction->t_checkpoint_list == jh)
41 transaction->t_checkpoint_list = jh->b_cpnext; 42 transaction->t_checkpoint_list = jh->b_cpnext;
42 if (transaction->t_checkpoint_list == jh) 43 if (transaction->t_checkpoint_list == jh)
43 transaction->t_checkpoint_list = NULL; 44 transaction->t_checkpoint_list = NULL;
44 }
45}
46
47/*
48 * Unlink a buffer from a transaction checkpoint(io) list.
49 *
50 * Called with j_list_lock held.
51 */
52
53static inline void __buffer_unlink(struct journal_head *jh)
54{
55 transaction_t *transaction;
56
57 transaction = jh->b_cp_transaction;
58
59 __buffer_unlink_first(jh);
60 if (transaction->t_checkpoint_io_list == jh) {
61 transaction->t_checkpoint_io_list = jh->b_cpnext;
62 if (transaction->t_checkpoint_io_list == jh)
63 transaction->t_checkpoint_io_list = NULL;
64 }
65}
66
67/*
68 * Move a buffer from the checkpoint list to the checkpoint io list
69 *
70 * Called with j_list_lock held
71 */
72
73static inline void __buffer_relink_io(struct journal_head *jh)
74{
75 transaction_t *transaction;
76
77 transaction = jh->b_cp_transaction;
78 __buffer_unlink_first(jh);
79
80 if (!transaction->t_checkpoint_io_list) {
81 jh->b_cpnext = jh->b_cpprev = jh;
82 } else {
83 jh->b_cpnext = transaction->t_checkpoint_io_list;
84 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
85 jh->b_cpprev->b_cpnext = jh;
86 jh->b_cpnext->b_cpprev = jh;
87 }
88 transaction->t_checkpoint_io_list = jh;
89} 45}
90 46
91/* 47/*
92 * Try to release a checkpointed buffer from its transaction. 48 * Try to release a checkpointed buffer from its transaction.
93 * Returns 1 if we released it and 2 if we also released the 49 * Returns 1 if we released it.
94 * whole transaction.
95 *
96 * Requires j_list_lock 50 * Requires j_list_lock
97 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
98 */ 52 */
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
103 57
104 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
105 JBUFFER_TRACE(jh, "remove from checkpoint list"); 59 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __journal_remove_checkpoint(jh) + 1; 60 __journal_remove_checkpoint(jh);
107 jbd_unlock_bh_state(bh); 61 jbd_unlock_bh_state(bh);
108 journal_remove_journal_head(bh); 62 journal_remove_journal_head(bh);
109 BUFFER_TRACE(bh, "release"); 63 BUFFER_TRACE(bh, "release");
110 __brelse(bh); 64 __brelse(bh);
65 ret = 1;
111 } else { 66 } else {
112 jbd_unlock_bh_state(bh); 67 jbd_unlock_bh_state(bh);
113 } 68 }
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
162} 117}
163 118
164/* 119/*
165 * Clean up transaction's list of buffers submitted for io. 120 * Clean up a transaction's checkpoint list.
166 * We wait for any pending IO to complete and remove any clean 121 *
167 * buffers. Note that we take the buffers in the opposite ordering 122 * We wait for any pending IO to complete and make sure any clean
168 * from the one in which they were submitted for IO. 123 * buffers are removed from the transaction.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
169 * 128 *
170 * Called with j_list_lock held. 129 * Called with j_list_lock held.
171 */ 130 */
172 131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
173static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
174{ 132{
175 struct journal_head *jh; 133 struct journal_head *jh, *next_jh, *last_jh;
176 struct buffer_head *bh; 134 struct buffer_head *bh;
177 tid_t this_tid; 135 int ret = 0;
178 int released = 0; 136
179 137 assert_spin_locked(&journal->j_list_lock);
180 this_tid = transaction->t_tid; 138 jh = transaction->t_checkpoint_list;
181restart: 139 if (!jh)
182 /* Didn't somebody clean up the transaction in the meanwhile */ 140 return 0;
183 if (journal->j_checkpoint_transactions != transaction || 141
184 transaction->t_tid != this_tid) 142 last_jh = jh->b_cpprev;
185 return; 143 next_jh = jh;
186 while (!released && transaction->t_checkpoint_io_list) { 144 do {
187 jh = transaction->t_checkpoint_io_list; 145 jh = next_jh;
188 bh = jh2bh(jh); 146 bh = jh2bh(jh);
189 if (!jbd_trylock_bh_state(bh)) {
190 jbd_sync_bh(journal, bh);
191 spin_lock(&journal->j_list_lock);
192 goto restart;
193 }
194 if (buffer_locked(bh)) { 147 if (buffer_locked(bh)) {
195 atomic_inc(&bh->b_count); 148 atomic_inc(&bh->b_count);
196 spin_unlock(&journal->j_list_lock); 149 spin_unlock(&journal->j_list_lock);
197 jbd_unlock_bh_state(bh);
198 wait_on_buffer(bh); 150 wait_on_buffer(bh);
199 /* the journal_head may have gone by now */ 151 /* the journal_head may have gone by now */
200 BUFFER_TRACE(bh, "brelse"); 152 BUFFER_TRACE(bh, "brelse");
201 __brelse(bh); 153 __brelse(bh);
202 spin_lock(&journal->j_list_lock); 154 goto out_return_1;
203 goto restart;
204 } 155 }
156
205 /* 157 /*
206 * Now in whatever state the buffer currently is, we know that 158 * This is foul
207 * it has been written out and so we can drop it from the list
208 */ 159 */
209 released = __journal_remove_checkpoint(jh); 160 if (!jbd_trylock_bh_state(bh)) {
210 jbd_unlock_bh_state(bh); 161 jbd_sync_bh(journal, bh);
211 } 162 goto out_return_1;
163 }
164
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it
178 * shouldn't have NULL b_transaction?
179 */
180 next_jh = jh->b_cpnext;
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
182 BUFFER_TRACE(bh, "remove from checkpoint");
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
212} 197}
213 198
214#define NR_BATCH 64 199#define NR_BATCH 64
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
218{ 203{
219 int i; 204 int i;
220 205
206 spin_unlock(&journal->j_list_lock);
221 ll_rw_block(SWRITE, *batch_count, bhs); 207 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
222 for (i = 0; i < *batch_count; i++) { 209 for (i = 0; i < *batch_count; i++) {
223 struct buffer_head *bh = bhs[i]; 210 struct buffer_head *bh = bhs[i];
224 clear_buffer_jwrite(bh); 211 clear_buffer_jwrite(bh);
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
234 * Return 1 if something happened which requires us to abort the current 221 * Return 1 if something happened which requires us to abort the current
235 * scan of the checkpoint list. 222 * scan of the checkpoint list.
236 * 223 *
237 * Called with j_list_lock held and drops it if 1 is returned 224 * Called with j_list_lock held.
238 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
239 */ 226 */
240static int __process_buffer(journal_t *journal, struct journal_head *jh, 227static int __flush_buffer(journal_t *journal, struct journal_head *jh,
241 struct buffer_head **bhs, int *batch_count) 228 struct buffer_head **bhs, int *batch_count,
229 int *drop_count)
242{ 230{
243 struct buffer_head *bh = jh2bh(jh); 231 struct buffer_head *bh = jh2bh(jh);
244 int ret = 0; 232 int ret = 0;
245 233
246 if (buffer_locked(bh)) { 234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
247 get_bh(bh); 235 J_ASSERT_JH(jh, jh->b_transaction == NULL);
248 spin_unlock(&journal->j_list_lock);
249 jbd_unlock_bh_state(bh);
250 wait_on_buffer(bh);
251 /* the journal_head may have gone by now */
252 BUFFER_TRACE(bh, "brelse");
253 put_bh(bh);
254 ret = 1;
255 }
256 else if (jh->b_transaction != NULL) {
257 transaction_t *t = jh->b_transaction;
258 tid_t tid = t->t_tid;
259 236
260 spin_unlock(&journal->j_list_lock);
261 jbd_unlock_bh_state(bh);
262 log_start_commit(journal, tid);
263 log_wait_commit(journal, tid);
264 ret = 1;
265 }
266 else if (!buffer_dirty(bh)) {
267 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
268 BUFFER_TRACE(bh, "remove from checkpoint");
269 __journal_remove_checkpoint(jh);
270 spin_unlock(&journal->j_list_lock);
271 jbd_unlock_bh_state(bh);
272 journal_remove_journal_head(bh);
273 put_bh(bh);
274 ret = 1;
275 }
276 else {
277 /* 237 /*
278 * Important: we are about to write the buffer, and 238 * Important: we are about to write the buffer, and
279 * possibly block, while still holding the journal lock. 239 * possibly block, while still holding the journal lock.
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
286 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 246 J_ASSERT_BH(bh, !buffer_jwrite(bh));
287 set_buffer_jwrite(bh); 247 set_buffer_jwrite(bh);
288 bhs[*batch_count] = bh; 248 bhs[*batch_count] = bh;
289 __buffer_relink_io(jh);
290 jbd_unlock_bh_state(bh); 249 jbd_unlock_bh_state(bh);
291 (*batch_count)++; 250 (*batch_count)++;
292 if (*batch_count == NR_BATCH) { 251 if (*batch_count == NR_BATCH) {
293 spin_unlock(&journal->j_list_lock);
294 __flush_batch(journal, bhs, batch_count); 252 __flush_batch(journal, bhs, batch_count);
295 ret = 1; 253 ret = 1;
296 } 254 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
297 } 267 }
298 return ret; 268 return ret;
299} 269}
300 270
301/* 271/*
302 * Perform an actual checkpoint. We take the first transaction on the 272 * Perform an actual checkpoint. We don't write out only enough to
303 * list of transactions to be checkpointed and send all its buffers 273 * satisfy the current blocked requests: rather we submit a reasonably
304 * to disk. We submit larger chunks of data at once. 274 * sized chunk of the outstanding data to disk at once for
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
305 * 276 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
306 * The journal should be locked before calling this function. 281 * The journal should be locked before calling this function.
307 */ 282 */
308int log_do_checkpoint(journal_t *journal) 283int log_do_checkpoint(journal_t *journal)
309{ 284{
310 transaction_t *transaction;
311 tid_t this_tid;
312 int result; 285 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
313 288
314 jbd_debug(1, "Start checkpoint\n"); 289 jbd_debug(1, "Start checkpoint\n");
315 290
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal)
324 return result; 299 return result;
325 300
326 /* 301 /*
327 * OK, we need to start writing disk blocks. Take one transaction 302 * OK, we need to start writing disk blocks. Try to free up a
328 * and write it. 303 * quarter of the log in a single checkpoint if we can.
329 */ 304 */
330 spin_lock(&journal->j_list_lock);
331 if (!journal->j_checkpoint_transactions)
332 goto out;
333 transaction = journal->j_checkpoint_transactions;
334 this_tid = transaction->t_tid;
335restart:
336 /* 305 /*
337 * If someone cleaned up this transaction while we slept, we're 306 * AKPM: check this code. I had a feeling a while back that it
338 * done (maybe it's a new transaction, but it fell at the same 307 * degenerates into a busy loop at unmount time.
339 * address).
340 */ 308 */
341 if (journal->j_checkpoint_transactions == transaction && 309 spin_lock(&journal->j_list_lock);
342 transaction->t_tid == this_tid) { 310 while (journal->j_checkpoint_transactions) {
343 int batch_count = 0; 311 transaction_t *transaction;
344 struct buffer_head *bhs[NR_BATCH]; 312 struct journal_head *jh, *last_jh, *next_jh;
345 struct journal_head *jh; 313 int drop_count = 0;
346 int retry = 0; 314 int cleanup_ret, retry = 0;
347 315 tid_t this_tid;
348 while (!retry && transaction->t_checkpoint_list) { 316
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
349 struct buffer_head *bh; 323 struct buffer_head *bh;
350 324
351 jh = transaction->t_checkpoint_list; 325 jh = next_jh;
326 next_jh = jh->b_cpnext;
352 bh = jh2bh(jh); 327 bh = jh2bh(jh);
353 if (!jbd_trylock_bh_state(bh)) { 328 if (!jbd_trylock_bh_state(bh)) {
354 jbd_sync_bh(journal, bh); 329 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
355 retry = 1; 331 retry = 1;
356 break; 332 break;
357 } 333 }
358 retry = __process_buffer(journal, jh, bhs, 334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
359 &batch_count); 335 if (cond_resched_lock(&journal->j_list_lock)) {
360 if (!retry &&
361 lock_need_resched(&journal->j_list_lock)) {
362 spin_unlock(&journal->j_list_lock);
363 retry = 1; 336 retry = 1;
364 break; 337 break;
365 } 338 }
366 } 339 } while (jh != last_jh && !retry);
367 340
368 if (batch_count) { 341 if (batch_count) {
369 if (!retry) {
370 spin_unlock(&journal->j_list_lock);
371 retry = 1;
372 }
373 __flush_batch(journal, bhs, &batch_count); 342 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
374 } 344 }
375 345
376 if (retry) {
377 spin_lock(&journal->j_list_lock);
378 goto restart;
379 }
380 /* 346 /*
381 * Now we have cleaned up the first transaction's checkpoint 347 * If someone cleaned up this transaction while we slept, we're
382 * list. Let's clean up the second one. 348 * done
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
383 */ 357 */
384 __wait_cp_io(journal, transaction); 358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */
365 cleanup_ret = __cleanup_transaction(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
385 } 369 }
386out:
387 spin_unlock(&journal->j_list_lock); 370 spin_unlock(&journal->j_list_lock);
388 result = cleanup_journal_tail(journal); 371 result = cleanup_journal_tail(journal);
389 if (result < 0) 372 if (result < 0)
390 return result; 373 return result;
374
391 return 0; 375 return 0;
392} 376}
393 377
@@ -472,91 +456,52 @@ int cleanup_journal_tail(journal_t *journal)
472/* Checkpoint list management */ 456/* Checkpoint list management */
473 457
474/* 458/*
475 * journal_clean_one_cp_list
476 *
477 * Find all the written-back checkpoint buffers in the given list and release them.
478 *
479 * Called with the journal locked.
480 * Called with j_list_lock held.
481 * Returns number of bufers reaped (for debug)
482 */
483
484static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
485{
486 struct journal_head *last_jh;
487 struct journal_head *next_jh = jh;
488 int ret, freed = 0;
489
490 *released = 0;
491 if (!jh)
492 return 0;
493
494 last_jh = jh->b_cpprev;
495 do {
496 jh = next_jh;
497 next_jh = jh->b_cpnext;
498 /* Use trylock because of the ranking */
499 if (jbd_trylock_bh_state(jh2bh(jh))) {
500 ret = __try_to_free_cp_buf(jh);
501 if (ret) {
502 freed++;
503 if (ret == 2) {
504 *released = 1;
505 return freed;
506 }
507 }
508 }
509 /*
510 * This function only frees up some memory if possible so we
511 * dont have an obligation to finish processing. Bail out if
512 * preemption requested:
513 */
514 if (need_resched())
515 return freed;
516 } while (jh != last_jh);
517
518 return freed;
519}
520
521/*
522 * journal_clean_checkpoint_list 459 * journal_clean_checkpoint_list
523 * 460 *
524 * Find all the written-back checkpoint buffers in the journal and release them. 461 * Find all the written-back checkpoint buffers in the journal and release them.
525 * 462 *
526 * Called with the journal locked. 463 * Called with the journal locked.
527 * Called with j_list_lock held. 464 * Called with j_list_lock held.
528 * Returns number of buffers reaped (for debug) 465 * Returns number of bufers reaped (for debug)
529 */ 466 */
530 467
531int __journal_clean_checkpoint_list(journal_t *journal) 468int __journal_clean_checkpoint_list(journal_t *journal)
532{ 469{
533 transaction_t *transaction, *last_transaction, *next_transaction; 470 transaction_t *transaction, *last_transaction, *next_transaction;
534 int ret = 0, released; 471 int ret = 0;
535 472
536 transaction = journal->j_checkpoint_transactions; 473 transaction = journal->j_checkpoint_transactions;
537 if (!transaction) 474 if (transaction == 0)
538 goto out; 475 goto out;
539 476
540 last_transaction = transaction->t_cpprev; 477 last_transaction = transaction->t_cpprev;
541 next_transaction = transaction; 478 next_transaction = transaction;
542 do { 479 do {
480 struct journal_head *jh;
481
543 transaction = next_transaction; 482 transaction = next_transaction;
544 next_transaction = transaction->t_cpnext; 483 next_transaction = transaction->t_cpnext;
545 ret += journal_clean_one_cp_list(transaction-> 484 jh = transaction->t_checkpoint_list;
546 t_checkpoint_list, &released); 485 if (jh) {
547 if (need_resched()) 486 struct journal_head *last_jh = jh->b_cpprev;
548 goto out; 487 struct journal_head *next_jh = jh;
549 if (released) 488
550 continue; 489 do {
551 /* 490 jh = next_jh;
552 * It is essential that we are as careful as in the case of 491 next_jh = jh->b_cpnext;
553 * t_checkpoint_list with removing the buffer from the list as 492 /* Use trylock because of the ranknig */
554 * we can possibly see not yet submitted buffers on io_list 493 if (jbd_trylock_bh_state(jh2bh(jh)))
555 */ 494 ret += __try_to_free_cp_buf(jh);
556 ret += journal_clean_one_cp_list(transaction-> 495 /*
557 t_checkpoint_io_list, &released); 496 * This function only frees up some memory
558 if (need_resched()) 497 * if possible so we dont have an obligation
559 goto out; 498 * to finish processing. Bail out if preemption
499 * requested:
500 */
501 if (need_resched())
502 goto out;
503 } while (jh != last_jh);
504 }
560 } while (transaction != last_transaction); 505 } while (transaction != last_transaction);
561out: 506out:
562 return ret; 507 return ret;
@@ -571,22 +516,18 @@ out:
571 * buffer updates committed in that transaction have safely been stored 516 * buffer updates committed in that transaction have safely been stored
572 * elsewhere on disk. To achieve this, all of the buffers in a 517 * elsewhere on disk. To achieve this, all of the buffers in a
573 * transaction need to be maintained on the transaction's checkpoint 518 * transaction need to be maintained on the transaction's checkpoint
574 * lists until they have been rewritten, at which point this function is 519 * list until they have been rewritten, at which point this function is
575 * called to remove the buffer from the existing transaction's 520 * called to remove the buffer from the existing transaction's
576 * checkpoint lists. 521 * checkpoint list.
577 *
578 * The function returns 1 if it frees the transaction, 0 otherwise.
579 * 522 *
580 * This function is called with the journal locked. 523 * This function is called with the journal locked.
581 * This function is called with j_list_lock held. 524 * This function is called with j_list_lock held.
582 * This function is called with jbd_lock_bh_state(jh2bh(jh))
583 */ 525 */
584 526
585int __journal_remove_checkpoint(struct journal_head *jh) 527void __journal_remove_checkpoint(struct journal_head *jh)
586{ 528{
587 transaction_t *transaction; 529 transaction_t *transaction;
588 journal_t *journal; 530 journal_t *journal;
589 int ret = 0;
590 531
591 JBUFFER_TRACE(jh, "entry"); 532 JBUFFER_TRACE(jh, "entry");
592 533
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
597 journal = transaction->t_journal; 538 journal = transaction->t_journal;
598 539
599 __buffer_unlink(jh); 540 __buffer_unlink(jh);
600 jh->b_cp_transaction = NULL;
601 541
602 if (transaction->t_checkpoint_list != NULL || 542 if (transaction->t_checkpoint_list != NULL)
603 transaction->t_checkpoint_io_list != NULL)
604 goto out; 543 goto out;
605 JBUFFER_TRACE(jh, "transaction has no more buffers"); 544 JBUFFER_TRACE(jh, "transaction has no more buffers");
606 545
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
626 /* Just in case anybody was waiting for more transactions to be 565 /* Just in case anybody was waiting for more transactions to be
627 checkpointed... */ 566 checkpointed... */
628 wake_up(&journal->j_wait_logspace); 567 wake_up(&journal->j_wait_logspace);
629 ret = 1;
630out: 568out:
631 JBUFFER_TRACE(jh, "exit"); 569 JBUFFER_TRACE(jh, "exit");
632 return ret;
633} 570}
634 571
635/* 572/*
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
691 J_ASSERT(transaction->t_shadow_list == NULL); 628 J_ASSERT(transaction->t_shadow_list == NULL);
692 J_ASSERT(transaction->t_log_list == NULL); 629 J_ASSERT(transaction->t_log_list == NULL);
693 J_ASSERT(transaction->t_checkpoint_list == NULL); 630 J_ASSERT(transaction->t_checkpoint_list == NULL);
694 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
695 J_ASSERT(transaction->t_updates == 0); 631 J_ASSERT(transaction->t_updates == 0);
696 J_ASSERT(journal->j_committing_transaction != transaction); 632 J_ASSERT(journal->j_committing_transaction != transaction);
697 J_ASSERT(journal->j_running_transaction != transaction); 633 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 29e62d98ba..002ad2bbc7 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -829,8 +829,7 @@ restart_loop:
829 journal->j_committing_transaction = NULL; 829 journal->j_committing_transaction = NULL;
830 spin_unlock(&journal->j_state_lock); 830 spin_unlock(&journal->j_state_lock);
831 831
832 if (commit_transaction->t_checkpoint_list == NULL && 832 if (commit_transaction->t_checkpoint_list == NULL) {
833 commit_transaction->t_checkpoint_io_list == NULL) {
834 __journal_drop_transaction(journal, commit_transaction); 833 __journal_drop_transaction(journal, commit_transaction);
835 } else { 834 } else {
836 if (journal->j_checkpoint_transactions == NULL) { 835 if (journal->j_checkpoint_transactions == NULL) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 3eaf6e7010..da6354baa0 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
111/* 111/*
112 * The server lockd has called us back to tell us the lock was granted 112 * The server lockd has called us back to tell us the lock was granted
113 */ 113 */
114u32 114u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
115nlmclnt_grant(struct nlm_lock *lock)
116{ 115{
116 const struct file_lock *fl = &lock->fl;
117 const struct nfs_fh *fh = &lock->fh;
117 struct nlm_wait *block; 118 struct nlm_wait *block;
118 u32 res = nlm_lck_denied; 119 u32 res = nlm_lck_denied;
119 120
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock)
122 * Warning: must not use cookie to match it! 123 * Warning: must not use cookie to match it!
123 */ 124 */
124 list_for_each_entry(block, &nlm_blocked, b_list) { 125 list_for_each_entry(block, &nlm_blocked, b_list) {
125 if (nlm_compare_locks(block->b_lock, &lock->fl)) { 126 struct file_lock *fl_blocked = block->b_lock;
126 /* Alright, we found a lock. Set the return status 127
127 * and wake up the caller 128 if (!nlm_compare_locks(fl_blocked, fl))
128 */ 129 continue;
129 block->b_status = NLM_LCK_GRANTED; 130 if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
130 wake_up(&block->b_wait); 131 continue;
131 res = nlm_granted; 132 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0)
132 } 133 continue;
134 /* Alright, we found a lock. Set the return status
135 * and wake up the caller
136 */
137 block->b_status = NLM_LCK_GRANTED;
138 wake_up(&block->b_wait);
139 res = nlm_granted;
133 } 140 }
134 return res; 141 return res;
135} 142}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4063095d84..b10f913aa0 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
228 resp->cookie = argp->cookie; 228 resp->cookie = argp->cookie;
229 229
230 dprintk("lockd: GRANTED called\n"); 230 dprintk("lockd: GRANTED called\n");
231 resp->status = nlmclnt_grant(&argp->lock); 231 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 232 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
233 return rpc_success; 233 return rpc_success;
234} 234}
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3bc437e0cf..35681d9cf1 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
256 resp->cookie = argp->cookie; 256 resp->cookie = argp->cookie;
257 257
258 dprintk("lockd: GRANTED called\n"); 258 dprintk("lockd: GRANTED called\n");
259 resp->status = nlmclnt_grant(&argp->lock); 259 resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); 260 dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
261 return rpc_success; 261 return rpc_success;
262} 262}
diff --git a/fs/namei.c b/fs/namei.c
index e28de846c5..557dcf395c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2224,13 +2224,17 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2224 * and other special files. --ADM 2224 * and other special files. --ADM
2225 */ 2225 */
2226asmlinkage long sys_linkat(int olddfd, const char __user *oldname, 2226asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2227 int newdfd, const char __user *newname) 2227 int newdfd, const char __user *newname,
2228 int flags)
2228{ 2229{
2229 struct dentry *new_dentry; 2230 struct dentry *new_dentry;
2230 struct nameidata nd, old_nd; 2231 struct nameidata nd, old_nd;
2231 int error; 2232 int error;
2232 char * to; 2233 char * to;
2233 2234
2235 if (flags != 0)
2236 return -EINVAL;
2237
2234 to = getname(newname); 2238 to = getname(newname);
2235 if (IS_ERR(to)) 2239 if (IS_ERR(to))
2236 return PTR_ERR(to); 2240 return PTR_ERR(to);
@@ -2263,7 +2267,7 @@ exit:
2263 2267
2264asmlinkage long sys_link(const char __user *oldname, const char __user *newname) 2268asmlinkage long sys_link(const char __user *oldname, const char __user *newname)
2265{ 2269{
2266 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname); 2270 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
2267} 2271}
2268 2272
2269/* 2273/*
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 02f44094bd..9d8ffa89e2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -1,9 +1,9 @@
1ToDo/Notes: 1ToDo/Notes:
2 - Find and fix bugs. 2 - Find and fix bugs.
3 - The only places in the kernel where a file is resized are 3 - The only places in the kernel where a file is resized are
4 ntfs_file_write*() and ntfs_truncate() for both of which i_sem is 4 ntfs_file_write*() and ntfs_truncate() for both of which i_mutex is
5 held. Just have to be careful in read-/writepage and other helpers 5 held. Just have to be careful in read-/writepage and other helpers
6 not running under i_sem that we play nice... Also need to be careful 6 not running under i_mutex that we play nice. Also need to be careful
7 with initialized_size extension in ntfs_file_write*() and writepage. 7 with initialized_size extension in ntfs_file_write*() and writepage.
8 UPDATE: The only things that need to be checked are the compressed 8 UPDATE: The only things that need to be checked are the compressed
9 write and the other attribute resize/write cases like index 9 write and the other attribute resize/write cases like index
@@ -19,6 +19,24 @@ ToDo/Notes:
19 - Enable the code for setting the NT4 compatibility flag when we start 19 - Enable the code for setting the NT4 compatibility flag when we start
20 making NTFS 1.2 specific modifications. 20 making NTFS 1.2 specific modifications.
21 21
222.1.26 - Minor bug fixes and updates.
23
24 - Fix a potential overflow in file.c where a cast to s64 was missing in
25 a left shift of a page index.
26 - The struct inode has had its i_sem semaphore changed to a mutex named
27 i_mutex.
28 - We have struct kmem_cache now so use it instead of the typedef
29 kmem_cache_t. (Pekka Enberg)
30 - Implement support for sector sizes above 512 bytes (up to the maximum
31 supported by NTFS which is 4096 bytes).
32 - Do more detailed reporting of why we cannot mount read-write by
33 special casing the VOLUME_MODIFIED_BY_CHKDSK flag.
34 - Miscellaneous updates to layout.h.
35 - Cope with attribute list attribute having invalid flags. Windows
36 copes with this and even chkdsk does not detect or fix this so we
37 have to cope with it, too. Thanks to Pawel Kot for reporting the
38 problem.
39
222.1.25 - (Almost) fully implement write(2) and truncate(2). 402.1.25 - (Almost) fully implement write(2) and truncate(2).
23 41
24 - Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and 42 - Change ntfs_map_runlist_nolock(), ntfs_attr_find_vcn_nolock() and
@@ -373,7 +391,7 @@ ToDo/Notes:
373 single one of them had an mst error. (Thanks to Ken MacFerrin for 391 single one of them had an mst error. (Thanks to Ken MacFerrin for
374 the bug report.) 392 the bug report.)
375 - Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date() 393 - Fix error handling in fs/ntfs/quota.c::ntfs_mark_quotas_out_of_date()
376 where we failed to release i_sem on the $Quota/$Q attribute inode. 394 where we failed to release i_mutex on the $Quota/$Q attribute inode.
377 - Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup(). 395 - Fix bug in handling of bad inodes in fs/ntfs/namei.c::ntfs_lookup().
378 - Add mapping of unmapped buffers to all remaining code paths, i.e. 396 - Add mapping of unmapped buffers to all remaining code paths, i.e.
379 fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(), 397 fs/ntfs/aops.c::ntfs_write_mst_block(), mft.c::ntfs_sync_mft_mirror(),
@@ -874,7 +892,7 @@ ToDo/Notes:
874 clusters. (Philipp Thomas) 892 clusters. (Philipp Thomas)
875 - attrib.c::load_attribute_list(): Fix bug when initialized_size is a 893 - attrib.c::load_attribute_list(): Fix bug when initialized_size is a
876 multiple of the block_size but not the cluster size. (Szabolcs 894 multiple of the block_size but not the cluster size. (Szabolcs
877 Szakacsits <szaka@sienet.hu>) 895 Szakacsits)
878 896
8792.1.2 - Important bug fixes aleviating the hangs in statfs. 8972.1.2 - Important bug fixes aleviating the hangs in statfs.
880 898
@@ -884,7 +902,7 @@ ToDo/Notes:
884 902
885 - Add handling for initialized_size != data_size in compressed files. 903 - Add handling for initialized_size != data_size in compressed files.
886 - Reduce function local stack usage from 0x3d4 bytes to just noise in 904 - Reduce function local stack usage from 0x3d4 bytes to just noise in
887 fs/ntfs/upcase.c. (Randy Dunlap <rdunlap@xenotime.net>) 905 fs/ntfs/upcase.c. (Randy Dunlap)
888 - Remove compiler warnings for newer gcc. 906 - Remove compiler warnings for newer gcc.
889 - Pages are no longer kmapped by mm/filemap.c::generic_file_write() 907 - Pages are no longer kmapped by mm/filemap.c::generic_file_write()
890 around calls to ->{prepare,commit}_write. Adapt NTFS appropriately 908 around calls to ->{prepare,commit}_write. Adapt NTFS appropriately
@@ -1201,11 +1219,11 @@ ToDo/Notes:
1201 the kernel. We probably want a kernel generic init_address_space() 1219 the kernel. We probably want a kernel generic init_address_space()
1202 function... 1220 function...
1203 - Drop BKL from ntfs_readdir() after consultation with Al Viro. The 1221 - Drop BKL from ntfs_readdir() after consultation with Al Viro. The
1204 only caller of ->readdir() is vfs_readdir() which holds i_sem during 1222 only caller of ->readdir() is vfs_readdir() which holds i_mutex
1205 the call, and i_sem is sufficient protection against changes in the 1223 during the call, and i_mutex is sufficient protection against changes
1206 directory inode (including ->i_size). 1224 in the directory inode (including ->i_size).
1207 - Use generic_file_llseek() for directories (as opposed to 1225 - Use generic_file_llseek() for directories (as opposed to
1208 default_llseek()) as this downs i_sem instead of the BKL which is 1226 default_llseek()) as this downs i_mutex instead of the BKL which is
1209 what we now need for exclusion against ->f_pos changes considering we 1227 what we now need for exclusion against ->f_pos changes considering we
1210 no longer take the BKL in ntfs_readdir(). 1228 no longer take the BKL in ntfs_readdir().
1211 1229
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index d0d45d1c85..d95fac7fde 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.25\" 9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.26\"
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ifeq ($(CONFIG_NTFS_DEBUG),y)
12EXTRA_CFLAGS += -DDEBUG 12EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 1c0a431587..7e361da770 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -2,7 +2,7 @@
2 * aops.c - NTFS kernel address space operations and page cache handling. 2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2005 Anton Altaparmakov 5 * Copyright (c) 2001-2006 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -200,8 +200,8 @@ static int ntfs_read_block(struct page *page)
200 /* $MFT/$DATA must have its complete runlist in memory at all times. */ 200 /* $MFT/$DATA must have its complete runlist in memory at all times. */
201 BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); 201 BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
202 202
203 blocksize_bits = VFS_I(ni)->i_blkbits; 203 blocksize = vol->sb->s_blocksize;
204 blocksize = 1 << blocksize_bits; 204 blocksize_bits = vol->sb->s_blocksize_bits;
205 205
206 if (!page_has_buffers(page)) { 206 if (!page_has_buffers(page)) {
207 create_empty_buffers(page, blocksize, 0); 207 create_empty_buffers(page, blocksize, 0);
@@ -569,10 +569,8 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
569 569
570 BUG_ON(!NInoNonResident(ni)); 570 BUG_ON(!NInoNonResident(ni));
571 BUG_ON(NInoMstProtected(ni)); 571 BUG_ON(NInoMstProtected(ni));
572 572 blocksize = vol->sb->s_blocksize;
573 blocksize_bits = vi->i_blkbits; 573 blocksize_bits = vol->sb->s_blocksize_bits;
574 blocksize = 1 << blocksize_bits;
575
576 if (!page_has_buffers(page)) { 574 if (!page_has_buffers(page)) {
577 BUG_ON(!PageUptodate(page)); 575 BUG_ON(!PageUptodate(page));
578 create_empty_buffers(page, blocksize, 576 create_empty_buffers(page, blocksize,
@@ -949,8 +947,8 @@ static int ntfs_write_mst_block(struct page *page,
949 */ 947 */
950 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 948 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
951 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 949 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
952 bh_size_bits = vi->i_blkbits; 950 bh_size = vol->sb->s_blocksize;
953 bh_size = 1 << bh_size_bits; 951 bh_size_bits = vol->sb->s_blocksize_bits;
954 max_bhs = PAGE_CACHE_SIZE / bh_size; 952 max_bhs = PAGE_CACHE_SIZE / bh_size;
955 BUG_ON(!max_bhs); 953 BUG_ON(!max_bhs);
956 BUG_ON(max_bhs > MAX_BUF_PER_PAGE); 954 BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
@@ -1596,7 +1594,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
1596 1594
1597 BUG_ON(!PageUptodate(page)); 1595 BUG_ON(!PageUptodate(page));
1598 end = ofs + ni->itype.index.block_size; 1596 end = ofs + ni->itype.index.block_size;
1599 bh_size = 1 << VFS_I(ni)->i_blkbits; 1597 bh_size = VFS_I(ni)->i_sb->s_blocksize;
1600 spin_lock(&mapping->private_lock); 1598 spin_lock(&mapping->private_lock);
1601 if (unlikely(!page_has_buffers(page))) { 1599 if (unlikely(!page_has_buffers(page))) {
1602 spin_unlock(&mapping->private_lock); 1600 spin_unlock(&mapping->private_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index fb413d3d86..5027d3d1b3 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2005 Anton Altaparmakov 4 * Copyright (c) 2001-2006 Anton Altaparmakov
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -248,7 +248,7 @@ do_non_resident_extend:
248 * enough to make ntfs_writepage() work. 248 * enough to make ntfs_writepage() work.
249 */ 249 */
250 write_lock_irqsave(&ni->size_lock, flags); 250 write_lock_irqsave(&ni->size_lock, flags);
251 ni->initialized_size = (index + 1) << PAGE_CACHE_SHIFT; 251 ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;
252 if (ni->initialized_size > new_init_size) 252 if (ni->initialized_size > new_init_size)
253 ni->initialized_size = new_init_size; 253 ni->initialized_size = new_init_size;
254 write_unlock_irqrestore(&ni->size_lock, flags); 254 write_unlock_irqrestore(&ni->size_lock, flags);
@@ -529,8 +529,8 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
529 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 529 "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.",
530 vi->i_ino, ni->type, pages[0]->index, nr_pages, 530 vi->i_ino, ni->type, pages[0]->index, nr_pages,
531 (long long)pos, bytes); 531 (long long)pos, bytes);
532 blocksize_bits = vi->i_blkbits; 532 blocksize = vol->sb->s_blocksize;
533 blocksize = 1 << blocksize_bits; 533 blocksize_bits = vol->sb->s_blocksize_bits;
534 u = 0; 534 u = 0;
535 do { 535 do {
536 struct page *page = pages[u]; 536 struct page *page = pages[u];
@@ -1525,7 +1525,7 @@ static inline int ntfs_commit_pages_after_non_resident_write(
1525 1525
1526 vi = pages[0]->mapping->host; 1526 vi = pages[0]->mapping->host;
1527 ni = NTFS_I(vi); 1527 ni = NTFS_I(vi);
1528 blocksize = 1 << vi->i_blkbits; 1528 blocksize = vi->i_sb->s_blocksize;
1529 end = pos + bytes; 1529 end = pos + bytes;
1530 u = 0; 1530 u = 0;
1531 do { 1531 do {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index ea1bd3feea..55263b7de9 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -677,13 +677,28 @@ static int ntfs_read_locked_inode(struct inode *vi)
677 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); 677 ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
678 NInoSetAttrList(ni); 678 NInoSetAttrList(ni);
679 a = ctx->attr; 679 a = ctx->attr;
680 if (a->flags & ATTR_IS_ENCRYPTED || 680 if (a->flags & ATTR_COMPRESSION_MASK) {
681 a->flags & ATTR_COMPRESSION_MASK ||
682 a->flags & ATTR_IS_SPARSE) {
683 ntfs_error(vi->i_sb, "Attribute list attribute is " 681 ntfs_error(vi->i_sb, "Attribute list attribute is "
684 "compressed/encrypted/sparse."); 682 "compressed.");
685 goto unm_err_out; 683 goto unm_err_out;
686 } 684 }
685 if (a->flags & ATTR_IS_ENCRYPTED ||
686 a->flags & ATTR_IS_SPARSE) {
687 if (a->non_resident) {
688 ntfs_error(vi->i_sb, "Non-resident attribute "
689 "list attribute is encrypted/"
690 "sparse.");
691 goto unm_err_out;
692 }
693 ntfs_warning(vi->i_sb, "Resident attribute list "
694 "attribute in inode 0x%lx is marked "
695 "encrypted/sparse which is not true. "
696 "However, Windows allows this and "
697 "chkdsk does not detect or correct it "
698 "so we will just ignore the invalid "
699 "flags and pretend they are not set.",
700 vi->i_ino);
701 }
687 /* Now allocate memory for the attribute list. */ 702 /* Now allocate memory for the attribute list. */
688 ni->attr_list_size = (u32)ntfs_attr_size(a); 703 ni->attr_list_size = (u32)ntfs_attr_size(a);
689 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 704 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
@@ -1809,19 +1824,33 @@ int ntfs_read_inode_mount(struct inode *vi)
1809 } else /* if (!err) */ { 1824 } else /* if (!err) */ {
1810 ATTR_LIST_ENTRY *al_entry, *next_al_entry; 1825 ATTR_LIST_ENTRY *al_entry, *next_al_entry;
1811 u8 *al_end; 1826 u8 *al_end;
1827 static const char *es = " Not allowed. $MFT is corrupt. "
1828 "You should run chkdsk.";
1812 1829
1813 ntfs_debug("Attribute list attribute found in $MFT."); 1830 ntfs_debug("Attribute list attribute found in $MFT.");
1814 NInoSetAttrList(ni); 1831 NInoSetAttrList(ni);
1815 a = ctx->attr; 1832 a = ctx->attr;
1816 if (a->flags & ATTR_IS_ENCRYPTED || 1833 if (a->flags & ATTR_COMPRESSION_MASK) {
1817 a->flags & ATTR_COMPRESSION_MASK ||
1818 a->flags & ATTR_IS_SPARSE) {
1819 ntfs_error(sb, "Attribute list attribute is " 1834 ntfs_error(sb, "Attribute list attribute is "
1820 "compressed/encrypted/sparse. Not " 1835 "compressed.%s", es);
1821 "allowed. $MFT is corrupt. You should "
1822 "run chkdsk.");
1823 goto put_err_out; 1836 goto put_err_out;
1824 } 1837 }
1838 if (a->flags & ATTR_IS_ENCRYPTED ||
1839 a->flags & ATTR_IS_SPARSE) {
1840 if (a->non_resident) {
1841 ntfs_error(sb, "Non-resident attribute list "
1842 "attribute is encrypted/"
1843 "sparse.%s", es);
1844 goto put_err_out;
1845 }
1846 ntfs_warning(sb, "Resident attribute list attribute "
1847 "in $MFT system file is marked "
1848 "encrypted/sparse which is not true. "
1849 "However, Windows allows this and "
1850 "chkdsk does not detect or correct it "
1851 "so we will just ignore the invalid "
1852 "flags and pretend they are not set.");
1853 }
1825 /* Now allocate memory for the attribute list. */ 1854 /* Now allocate memory for the attribute list. */
1826 ni->attr_list_size = (u32)ntfs_attr_size(a); 1855 ni->attr_list_size = (u32)ntfs_attr_size(a);
1827 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); 1856 ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index f5678d5d79..bb408d4dcb 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -838,15 +838,19 @@ enum {
838 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, 838 F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
839 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask 839 F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest. This mask
840 is used to to obtain all flags that are valid for setting. */ 840 is used to to obtain all flags that are valid for setting. */
841
842 /* 841 /*
843 * The following flags are only present in the FILE_NAME attribute (in 842 * The following flag is only present in the FILE_NAME attribute (in
844 * the field file_attributes). 843 * the field file_attributes).
845 */ 844 */
846 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), 845 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000),
847 /* Note, this is a copy of the corresponding bit from the mft record, 846 /* Note, this is a copy of the corresponding bit from the mft record,
848 telling us whether this is a directory or not, i.e. whether it has 847 telling us whether this is a directory or not, i.e. whether it has
849 an index root attribute or not. */ 848 an index root attribute or not. */
849 /*
850 * The following flag is present both in the STANDARD_INFORMATION
851 * attribute and in the FILE_NAME attribute (in the field
852 * file_attributes).
853 */
850 FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), 854 FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000),
851 /* Note, this is a copy of the corresponding bit from the mft record, 855 /* Note, this is a copy of the corresponding bit from the mft record,
852 telling us whether this file has a view index present (eg. object id 856 telling us whether this file has a view index present (eg. object id
@@ -1071,9 +1075,15 @@ typedef struct {
1071 modified. */ 1075 modified. */
1072/* 20*/ sle64 last_access_time; /* Time this mft record was last 1076/* 20*/ sle64 last_access_time; /* Time this mft record was last
1073 accessed. */ 1077 accessed. */
1074/* 28*/ sle64 allocated_size; /* Byte size of allocated space for the 1078/* 28*/ sle64 allocated_size; /* Byte size of on-disk allocated space
1075 data attribute. NOTE: Is a multiple 1079 for the data attribute. So for
1076 of the cluster size. */ 1080 normal $DATA, this is the
1081 allocated_size from the unnamed
1082 $DATA attribute and for compressed
1083 and/or sparse $DATA, this is the
1084 compressed_size from the unnamed
1085 $DATA attribute. NOTE: This is a
1086 multiple of the cluster size. */
1077/* 30*/ sle64 data_size; /* Byte size of actual data in data 1087/* 30*/ sle64 data_size; /* Byte size of actual data in data
1078 attribute. */ 1088 attribute. */
1079/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ 1089/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */
@@ -1904,12 +1914,13 @@ enum {
1904 VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), 1914 VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010),
1905 VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), 1915 VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020),
1906 1916
1917 VOLUME_CHKDSK_UNDERWAY = const_cpu_to_le16(0x4000),
1907 VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), 1918 VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000),
1908 1919
1909 VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), 1920 VOLUME_FLAGS_MASK = const_cpu_to_le16(0xc03f),
1910 1921
1911 /* To make our life easier when checking if we must mount read-only. */ 1922 /* To make our life easier when checking if we must mount read-only. */
1912 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0x8027), 1923 VOLUME_MUST_MOUNT_RO_MASK = const_cpu_to_le16(0xc027),
1913} __attribute__ ((__packed__)); 1924} __attribute__ ((__packed__));
1914 1925
1915typedef le16 VOLUME_FLAGS; 1926typedef le16 VOLUME_FLAGS;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 0c65cbb8c5..6499aafc22 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. 2 * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2005 Anton Altaparmakov 4 * Copyright (c) 2001-2006 Anton Altaparmakov
5 * Copyright (c) 2002 Richard Russon 5 * Copyright (c) 2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -473,7 +473,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
473 runlist_element *rl; 473 runlist_element *rl;
474 unsigned int block_start, block_end, m_start, m_end, page_ofs; 474 unsigned int block_start, block_end, m_start, m_end, page_ofs;
475 int i_bhs, nr_bhs, err = 0; 475 int i_bhs, nr_bhs, err = 0;
476 unsigned char blocksize_bits = vol->mftmirr_ino->i_blkbits; 476 unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
477 477
478 ntfs_debug("Entering for inode 0x%lx.", mft_no); 478 ntfs_debug("Entering for inode 0x%lx.", mft_no);
479 BUG_ON(!max_bhs); 479 BUG_ON(!max_bhs);
@@ -672,8 +672,8 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
672{ 672{
673 ntfs_volume *vol = ni->vol; 673 ntfs_volume *vol = ni->vol;
674 struct page *page = ni->page; 674 struct page *page = ni->page;
675 unsigned char blocksize_bits = vol->mft_ino->i_blkbits; 675 unsigned int blocksize = vol->sb->s_blocksize;
676 unsigned int blocksize = 1 << blocksize_bits; 676 unsigned char blocksize_bits = vol->sb->s_blocksize_bits;
677 int max_bhs = vol->mft_record_size / blocksize; 677 int max_bhs = vol->mft_record_size / blocksize;
678 struct buffer_head *bhs[max_bhs]; 678 struct buffer_head *bhs[max_bhs];
679 struct buffer_head *bh, *head; 679 struct buffer_head *bh, *head;
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index 446b501411..653d2a5c48 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -50,11 +50,11 @@ typedef enum {
50/* Global variables. */ 50/* Global variables. */
51 51
52/* Slab caches (from super.c). */ 52/* Slab caches (from super.c). */
53extern kmem_cache_t *ntfs_name_cache; 53extern struct kmem_cache *ntfs_name_cache;
54extern kmem_cache_t *ntfs_inode_cache; 54extern struct kmem_cache *ntfs_inode_cache;
55extern kmem_cache_t *ntfs_big_inode_cache; 55extern struct kmem_cache *ntfs_big_inode_cache;
56extern kmem_cache_t *ntfs_attr_ctx_cache; 56extern struct kmem_cache *ntfs_attr_ctx_cache;
57extern kmem_cache_t *ntfs_index_ctx_cache; 57extern struct kmem_cache *ntfs_index_ctx_cache;
58 58
59/* The various operations structs defined throughout the driver files. */ 59/* The various operations structs defined throughout the driver files. */
60extern struct address_space_operations ntfs_aops; 60extern struct address_space_operations ntfs_aops;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index c3a3f1a831..368a8ec106 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. 2 * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2005 Anton Altaparmakov 4 * Copyright (c) 2001-2006 Anton Altaparmakov
5 * Copyright (c) 2001,2002 Richard Russon 5 * Copyright (c) 2001,2002 Richard Russon
6 * 6 *
7 * This program/include file is free software; you can redistribute it and/or 7 * This program/include file is free software; you can redistribute it and/or
@@ -22,6 +22,7 @@
22 22
23#include <linux/stddef.h> 23#include <linux/stddef.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/slab.h>
25#include <linux/string.h> 26#include <linux/string.h>
26#include <linux/spinlock.h> 27#include <linux/spinlock.h>
27#include <linux/blkdev.h> /* For bdev_hardsect_size(). */ 28#include <linux/blkdev.h> /* For bdev_hardsect_size(). */
@@ -471,9 +472,16 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
471 ntfs_error(sb, "Volume is dirty and read-only%s", es); 472 ntfs_error(sb, "Volume is dirty and read-only%s", es);
472 return -EROFS; 473 return -EROFS;
473 } 474 }
475 if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
476 ntfs_error(sb, "Volume has been modified by chkdsk "
477 "and is read-only%s", es);
478 return -EROFS;
479 }
474 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { 480 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
475 ntfs_error(sb, "Volume has unsupported flags set and " 481 ntfs_error(sb, "Volume has unsupported flags set "
476 "is read-only%s", es); 482 "(0x%x) and is read-only%s",
483 (unsigned)le16_to_cpu(vol->vol_flags),
484 es);
477 return -EROFS; 485 return -EROFS;
478 } 486 }
479 if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { 487 if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
@@ -641,7 +649,7 @@ static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb,
641{ 649{
642 const char *read_err_str = "Unable to read %s boot sector."; 650 const char *read_err_str = "Unable to read %s boot sector.";
643 struct buffer_head *bh_primary, *bh_backup; 651 struct buffer_head *bh_primary, *bh_backup;
644 long nr_blocks = NTFS_SB(sb)->nr_blocks; 652 sector_t nr_blocks = NTFS_SB(sb)->nr_blocks;
645 653
646 /* Try to read primary boot sector. */ 654 /* Try to read primary boot sector. */
647 if ((bh_primary = sb_bread(sb, 0))) { 655 if ((bh_primary = sb_bread(sb, 0))) {
@@ -688,13 +696,18 @@ hotfix_primary_boot_sector:
688 /* 696 /*
689 * If we managed to read sector zero and the volume is not 697 * If we managed to read sector zero and the volume is not
690 * read-only, copy the found, valid backup boot sector to the 698 * read-only, copy the found, valid backup boot sector to the
691 * primary boot sector. 699 * primary boot sector. Note we only copy the actual boot
700 * sector structure, not the actual whole device sector as that
701 * may be bigger and would potentially damage the $Boot system
702 * file (FIXME: Would be nice to know if the backup boot sector
703 * on a large sector device contains the whole boot loader or
704 * just the first 512 bytes).
692 */ 705 */
693 if (!(sb->s_flags & MS_RDONLY)) { 706 if (!(sb->s_flags & MS_RDONLY)) {
694 ntfs_warning(sb, "Hot-fix: Recovering invalid primary " 707 ntfs_warning(sb, "Hot-fix: Recovering invalid primary "
695 "boot sector from backup copy."); 708 "boot sector from backup copy.");
696 memcpy(bh_primary->b_data, bh_backup->b_data, 709 memcpy(bh_primary->b_data, bh_backup->b_data,
697 sb->s_blocksize); 710 NTFS_BLOCK_SIZE);
698 mark_buffer_dirty(bh_primary); 711 mark_buffer_dirty(bh_primary);
699 sync_dirty_buffer(bh_primary); 712 sync_dirty_buffer(bh_primary);
700 if (buffer_uptodate(bh_primary)) { 713 if (buffer_uptodate(bh_primary)) {
@@ -733,9 +746,13 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
733 vol->sector_size); 746 vol->sector_size);
734 ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits, 747 ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits,
735 vol->sector_size_bits); 748 vol->sector_size_bits);
736 if (vol->sector_size != vol->sb->s_blocksize) 749 if (vol->sector_size < vol->sb->s_blocksize) {
737 ntfs_warning(vol->sb, "The boot sector indicates a sector size " 750 ntfs_error(vol->sb, "Sector size (%i) is smaller than the "
738 "different from the device sector size."); 751 "device block size (%lu). This is not "
752 "supported. Sorry.", vol->sector_size,
753 vol->sb->s_blocksize);
754 return FALSE;
755 }
739 ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster); 756 ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster);
740 sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1; 757 sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1;
741 ntfs_debug("sectors_per_cluster_bits = 0x%x", 758 ntfs_debug("sectors_per_cluster_bits = 0x%x",
@@ -748,16 +765,11 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
748 ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size, 765 ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size,
749 vol->cluster_size); 766 vol->cluster_size);
750 ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask); 767 ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
751 ntfs_debug("vol->cluster_size_bits = %i (0x%x)", 768 ntfs_debug("vol->cluster_size_bits = %i", vol->cluster_size_bits);
752 vol->cluster_size_bits, vol->cluster_size_bits); 769 if (vol->cluster_size < vol->sector_size) {
753 if (vol->sector_size > vol->cluster_size) { 770 ntfs_error(vol->sb, "Cluster size (%i) is smaller than the "
754 ntfs_error(vol->sb, "Sector sizes above the cluster size are " 771 "sector size (%i). This is not supported. "
755 "not supported. Sorry."); 772 "Sorry.", vol->cluster_size, vol->sector_size);
756 return FALSE;
757 }
758 if (vol->sb->s_blocksize > vol->cluster_size) {
759 ntfs_error(vol->sb, "Cluster sizes smaller than the device "
760 "sector size are not supported. Sorry.");
761 return FALSE; 773 return FALSE;
762 } 774 }
763 clusters_per_mft_record = b->clusters_per_mft_record; 775 clusters_per_mft_record = b->clusters_per_mft_record;
@@ -786,11 +798,18 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
786 * we store $MFT/$DATA, the table of mft records in the page cache. 798 * we store $MFT/$DATA, the table of mft records in the page cache.
787 */ 799 */
788 if (vol->mft_record_size > PAGE_CACHE_SIZE) { 800 if (vol->mft_record_size > PAGE_CACHE_SIZE) {
789 ntfs_error(vol->sb, "Mft record size %i (0x%x) exceeds the " 801 ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
790 "page cache size on your system %lu (0x%lx). " 802 "PAGE_CACHE_SIZE on your system (%lu). "
791 "This is not supported. Sorry.", 803 "This is not supported. Sorry.",
792 vol->mft_record_size, vol->mft_record_size, 804 vol->mft_record_size, PAGE_CACHE_SIZE);
793 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE); 805 return FALSE;
806 }
807 /* We cannot support mft record sizes below the sector size. */
808 if (vol->mft_record_size < vol->sector_size) {
809 ntfs_error(vol->sb, "Mft record size (%i) is smaller than the "
810 "sector size (%i). This is not supported. "
811 "Sorry.", vol->mft_record_size,
812 vol->sector_size);
794 return FALSE; 813 return FALSE;
795 } 814 }
796 clusters_per_index_record = b->clusters_per_index_record; 815 clusters_per_index_record = b->clusters_per_index_record;
@@ -816,6 +835,14 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
816 ntfs_debug("vol->index_record_size_bits = %i (0x%x)", 835 ntfs_debug("vol->index_record_size_bits = %i (0x%x)",
817 vol->index_record_size_bits, 836 vol->index_record_size_bits,
818 vol->index_record_size_bits); 837 vol->index_record_size_bits);
838 /* We cannot support index record sizes below the sector size. */
839 if (vol->index_record_size < vol->sector_size) {
840 ntfs_error(vol->sb, "Index record size (%i) is smaller than "
841 "the sector size (%i). This is not "
842 "supported. Sorry.", vol->index_record_size,
843 vol->sector_size);
844 return FALSE;
845 }
819 /* 846 /*
820 * Get the size of the volume in clusters and check for 64-bit-ness. 847 * Get the size of the volume in clusters and check for 64-bit-ness.
821 * Windows currently only uses 32 bits to save the clusters so we do 848 * Windows currently only uses 32 bits to save the clusters so we do
@@ -845,15 +872,18 @@ static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
845 } 872 }
846 ll = sle64_to_cpu(b->mft_lcn); 873 ll = sle64_to_cpu(b->mft_lcn);
847 if (ll >= vol->nr_clusters) { 874 if (ll >= vol->nr_clusters) {
848 ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird."); 875 ntfs_error(vol->sb, "MFT LCN (%lli, 0x%llx) is beyond end of "
876 "volume. Weird.", (unsigned long long)ll,
877 (unsigned long long)ll);
849 return FALSE; 878 return FALSE;
850 } 879 }
851 vol->mft_lcn = ll; 880 vol->mft_lcn = ll;
852 ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn); 881 ntfs_debug("vol->mft_lcn = 0x%llx", (long long)vol->mft_lcn);
853 ll = sle64_to_cpu(b->mftmirr_lcn); 882 ll = sle64_to_cpu(b->mftmirr_lcn);
854 if (ll >= vol->nr_clusters) { 883 if (ll >= vol->nr_clusters) {
855 ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. " 884 ntfs_error(vol->sb, "MFTMirr LCN (%lli, 0x%llx) is beyond end "
856 "Weird."); 885 "of volume. Weird.", (unsigned long long)ll,
886 (unsigned long long)ll);
857 return FALSE; 887 return FALSE;
858 } 888 }
859 vol->mftmirr_lcn = ll; 889 vol->mftmirr_lcn = ll;
@@ -1822,11 +1852,24 @@ get_ctx_vol_failed:
1822 /* Make sure that no unsupported volume flags are set. */ 1852 /* Make sure that no unsupported volume flags are set. */
1823 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { 1853 if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
1824 static const char *es1a = "Volume is dirty"; 1854 static const char *es1a = "Volume is dirty";
1825 static const char *es1b = "Volume has unsupported flags set"; 1855 static const char *es1b = "Volume has been modified by chkdsk";
1826 static const char *es2 = ". Run chkdsk and mount in Windows."; 1856 static const char *es1c = "Volume has unsupported flags set";
1827 const char *es1; 1857 static const char *es2a = ". Run chkdsk and mount in Windows.";
1828 1858 static const char *es2b = ". Mount in Windows.";
1829 es1 = vol->vol_flags & VOLUME_IS_DIRTY ? es1a : es1b; 1859 const char *es1, *es2;
1860
1861 es2 = es2a;
1862 if (vol->vol_flags & VOLUME_IS_DIRTY)
1863 es1 = es1a;
1864 else if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
1865 es1 = es1b;
1866 es2 = es2b;
1867 } else {
1868 es1 = es1c;
1869 ntfs_warning(sb, "Unsupported volume flags 0x%x "
1870 "encountered.",
1871 (unsigned)le16_to_cpu(vol->vol_flags));
1872 }
1830 /* If a read-write mount, convert it to a read-only mount. */ 1873 /* If a read-write mount, convert it to a read-only mount. */
1831 if (!(sb->s_flags & MS_RDONLY)) { 1874 if (!(sb->s_flags & MS_RDONLY)) {
1832 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO | 1875 if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
@@ -2685,7 +2728,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2685 ntfs_volume *vol; 2728 ntfs_volume *vol;
2686 struct buffer_head *bh; 2729 struct buffer_head *bh;
2687 struct inode *tmp_ino; 2730 struct inode *tmp_ino;
2688 int result; 2731 int blocksize, result;
2689 2732
2690 ntfs_debug("Entering."); 2733 ntfs_debug("Entering.");
2691#ifndef NTFS_RW 2734#ifndef NTFS_RW
@@ -2724,60 +2767,85 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2724 if (!parse_options(vol, (char*)opt)) 2767 if (!parse_options(vol, (char*)opt))
2725 goto err_out_now; 2768 goto err_out_now;
2726 2769
2770 /* We support sector sizes up to the PAGE_CACHE_SIZE. */
2771 if (bdev_hardsect_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
2772 if (!silent)
2773 ntfs_error(sb, "Device has unsupported sector size "
2774 "(%i). The maximum supported sector "
2775 "size on this architecture is %lu "
2776 "bytes.",
2777 bdev_hardsect_size(sb->s_bdev),
2778 PAGE_CACHE_SIZE);
2779 goto err_out_now;
2780 }
2727 /* 2781 /*
2728 * TODO: Fail safety check. In the future we should really be able to 2782 * Setup the device access block size to NTFS_BLOCK_SIZE or the hard
2729 * cope with this being the case, but for now just bail out. 2783 * sector size, whichever is bigger.
2730 */ 2784 */
2731 if (bdev_hardsect_size(sb->s_bdev) > NTFS_BLOCK_SIZE) { 2785 blocksize = sb_min_blocksize(sb, NTFS_BLOCK_SIZE);
2786 if (blocksize < NTFS_BLOCK_SIZE) {
2732 if (!silent) 2787 if (!silent)
2733 ntfs_error(sb, "Device has unsupported hardsect_size."); 2788 ntfs_error(sb, "Unable to set device block size.");
2734 goto err_out_now; 2789 goto err_out_now;
2735 } 2790 }
2736 2791 BUG_ON(blocksize != sb->s_blocksize);
2737 /* Setup the device access block size to NTFS_BLOCK_SIZE. */ 2792 ntfs_debug("Set device block size to %i bytes (block size bits %i).",
2738 if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) { 2793 blocksize, sb->s_blocksize_bits);
2794 /* Determine the size of the device in units of block_size bytes. */
2795 if (!i_size_read(sb->s_bdev->bd_inode)) {
2739 if (!silent) 2796 if (!silent)
2740 ntfs_error(sb, "Unable to set block size."); 2797 ntfs_error(sb, "Unable to determine device size.");
2741 goto err_out_now; 2798 goto err_out_now;
2742 } 2799 }
2743
2744 /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */
2745 vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> 2800 vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
2746 NTFS_BLOCK_SIZE_BITS; 2801 sb->s_blocksize_bits;
2747
2748 /* Read the boot sector and return unlocked buffer head to it. */ 2802 /* Read the boot sector and return unlocked buffer head to it. */
2749 if (!(bh = read_ntfs_boot_sector(sb, silent))) { 2803 if (!(bh = read_ntfs_boot_sector(sb, silent))) {
2750 if (!silent) 2804 if (!silent)
2751 ntfs_error(sb, "Not an NTFS volume."); 2805 ntfs_error(sb, "Not an NTFS volume.");
2752 goto err_out_now; 2806 goto err_out_now;
2753 } 2807 }
2754
2755 /* 2808 /*
2756 * Extract the data from the boot sector and setup the ntfs super block 2809 * Extract the data from the boot sector and setup the ntfs volume
2757 * using it. 2810 * using it.
2758 */ 2811 */
2759 result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data); 2812 result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data);
2760
2761 /* Initialize the cluster and mft allocators. */
2762 ntfs_setup_allocators(vol);
2763
2764 brelse(bh); 2813 brelse(bh);
2765
2766 if (!result) { 2814 if (!result) {
2767 if (!silent) 2815 if (!silent)
2768 ntfs_error(sb, "Unsupported NTFS filesystem."); 2816 ntfs_error(sb, "Unsupported NTFS filesystem.");
2769 goto err_out_now; 2817 goto err_out_now;
2770 } 2818 }
2771
2772 /* 2819 /*
2773 * TODO: When we start coping with sector sizes different from 2820 * If the boot sector indicates a sector size bigger than the current
2774 * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the 2821 * device block size, switch the device block size to the sector size.
2775 * device (probably to NTFS_BLOCK_SIZE). 2822 * TODO: It may be possible to support this case even when the set
2823 * below fails, we would just be breaking up the i/o for each sector
2824 * into multiple blocks for i/o purposes but otherwise it should just
2825 * work. However it is safer to leave disabled until someone hits this
2826 * error message and then we can get them to try it without the setting
2827 * so we know for sure that it works.
2776 */ 2828 */
2777 2829 if (vol->sector_size > blocksize) {
2830 blocksize = sb_set_blocksize(sb, vol->sector_size);
2831 if (blocksize != vol->sector_size) {
2832 if (!silent)
2833 ntfs_error(sb, "Unable to set device block "
2834 "size to sector size (%i).",
2835 vol->sector_size);
2836 goto err_out_now;
2837 }
2838 BUG_ON(blocksize != sb->s_blocksize);
2839 vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >>
2840 sb->s_blocksize_bits;
2841 ntfs_debug("Changed device block size to %i bytes (block size "
2842 "bits %i) to match volume sector size.",
2843 blocksize, sb->s_blocksize_bits);
2844 }
2845 /* Initialize the cluster and mft allocators. */
2846 ntfs_setup_allocators(vol);
2778 /* Setup remaining fields in the super block. */ 2847 /* Setup remaining fields in the super block. */
2779 sb->s_magic = NTFS_SB_MAGIC; 2848 sb->s_magic = NTFS_SB_MAGIC;
2780
2781 /* 2849 /*
2782 * Ntfs allows 63 bits for the file size, i.e. correct would be: 2850 * Ntfs allows 63 bits for the file size, i.e. correct would be:
2783 * sb->s_maxbytes = ~0ULL >> 1; 2851 * sb->s_maxbytes = ~0ULL >> 1;
@@ -2787,9 +2855,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2787 * without overflowing the index or to 2^63 - 1, whichever is smaller. 2855 * without overflowing the index or to 2^63 - 1, whichever is smaller.
2788 */ 2856 */
2789 sb->s_maxbytes = MAX_LFS_FILESIZE; 2857 sb->s_maxbytes = MAX_LFS_FILESIZE;
2790 2858 /* Ntfs measures time in 100ns intervals. */
2791 sb->s_time_gran = 100; 2859 sb->s_time_gran = 100;
2792
2793 /* 2860 /*
2794 * Now load the metadata required for the page cache and our address 2861 * Now load the metadata required for the page cache and our address
2795 * space operations to function. We do this by setting up a specialised 2862 * space operations to function. We do this by setting up a specialised
@@ -2987,14 +3054,14 @@ err_out_now:
2987 * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN 3054 * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN
2988 * (255) Unicode characters + a terminating NULL Unicode character. 3055 * (255) Unicode characters + a terminating NULL Unicode character.
2989 */ 3056 */
2990kmem_cache_t *ntfs_name_cache; 3057struct kmem_cache *ntfs_name_cache;
2991 3058
2992/* Slab caches for efficient allocation/deallocation of inodes. */ 3059/* Slab caches for efficient allocation/deallocation of inodes. */
2993kmem_cache_t *ntfs_inode_cache; 3060struct kmem_cache *ntfs_inode_cache;
2994kmem_cache_t *ntfs_big_inode_cache; 3061struct kmem_cache *ntfs_big_inode_cache;
2995 3062
2996/* Init once constructor for the inode slab cache. */ 3063/* Init once constructor for the inode slab cache. */
2997static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep, 3064static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep,
2998 unsigned long flags) 3065 unsigned long flags)
2999{ 3066{
3000 ntfs_inode *ni = (ntfs_inode *)foo; 3067 ntfs_inode *ni = (ntfs_inode *)foo;
@@ -3008,8 +3075,8 @@ static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep,
3008 * Slab caches to optimize allocations and deallocations of attribute search 3075 * Slab caches to optimize allocations and deallocations of attribute search
3009 * contexts and index contexts, respectively. 3076 * contexts and index contexts, respectively.
3010 */ 3077 */
3011kmem_cache_t *ntfs_attr_ctx_cache; 3078struct kmem_cache *ntfs_attr_ctx_cache;
3012kmem_cache_t *ntfs_index_ctx_cache; 3079struct kmem_cache *ntfs_index_ctx_cache;
3013 3080
3014/* Driver wide semaphore. */ 3081/* Driver wide semaphore. */
3015DECLARE_MUTEX(ntfs_lock); 3082DECLARE_MUTEX(ntfs_lock);
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c
index 879cdf1d5b..9101807dc8 100644
--- a/fs/ntfs/upcase.c
+++ b/fs/ntfs/upcase.c
@@ -3,10 +3,7 @@
3 * Part of the Linux-NTFS project. 3 * Part of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org> 5 * Copyright (c) 2001 Richard Russon <ntfs@flatcap.org>
6 * Copyright (c) 2001-2004 Anton Altaparmakov 6 * Copyright (c) 2001-2006 Anton Altaparmakov
7 *
8 * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov.
9 * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov.
10 * 7 *
11 * This program is free software; you can redistribute it and/or modify it 8 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the Free 9 * under the terms of the GNU General Public License as published by the Free
@@ -75,12 +72,13 @@ ntfschar *generate_default_upcase(void)
75 if (!uc) 72 if (!uc)
76 return uc; 73 return uc;
77 memset(uc, 0, default_upcase_len * sizeof(ntfschar)); 74 memset(uc, 0, default_upcase_len * sizeof(ntfschar));
75 /* Generate the little endian Unicode upcase table used by ntfs. */
78 for (i = 0; i < default_upcase_len; i++) 76 for (i = 0; i < default_upcase_len; i++)
79 uc[i] = cpu_to_le16(i); 77 uc[i] = cpu_to_le16(i);
80 for (r = 0; uc_run_table[r][0]; r++) 78 for (r = 0; uc_run_table[r][0]; r++)
81 for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) 79 for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
82 uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) + 80 uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) +
83 uc_run_table[r][2])); 81 uc_run_table[r][2]);
84 for (r = 0; uc_dup_table[r][0]; r++) 82 for (r = 0; uc_dup_table[r][0]; r++)
85 for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) 83 for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
86 uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); 84 uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1);
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 375cd20a9f..406ab55dfb 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -2,7 +2,7 @@
2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part 2 * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part
3 * of the Linux-NTFS project. 3 * of the Linux-NTFS project.
4 * 4 *
5 * Copyright (c) 2001-2005 Anton Altaparmakov 5 * Copyright (c) 2001-2006 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon 6 * Copyright (c) 2002 Richard Russon
7 * 7 *
8 * This program/include file is free software; you can redistribute it and/or 8 * This program/include file is free software; you can redistribute it and/or
@@ -41,10 +41,8 @@ typedef struct {
41 * structure has stabilized... (AIA) 41 * structure has stabilized... (AIA)
42 */ 42 */
43 /* Device specifics. */ 43 /* Device specifics. */
44 struct super_block *sb; /* Pointer back to the super_block, 44 struct super_block *sb; /* Pointer back to the super_block. */
45 so we don't have to get the offset 45 LCN nr_blocks; /* Number of sb->s_blocksize bytes
46 every time. */
47 LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes
48 sized blocks on the device. */ 46 sized blocks on the device. */
49 /* Configuration provided by user at mount time. */ 47 /* Configuration provided by user at mount time. */
50 unsigned long flags; /* Miscellaneous flags, see below. */ 48 unsigned long flags; /* Miscellaneous flags, see below. */
@@ -141,8 +139,8 @@ typedef enum {
141 NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ 139 NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */
142 NV_CaseSensitive, /* 1: Treat file names as case sensitive and 140 NV_CaseSensitive, /* 1: Treat file names as case sensitive and
143 create filenames in the POSIX namespace. 141 create filenames in the POSIX namespace.
144 Otherwise be case insensitive and create 142 Otherwise be case insensitive but still
145 file names in WIN32 namespace. */ 143 create file names in POSIX namespace. */
146 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ 144 NV_LogFileEmpty, /* 1: $LogFile journal is empty. */
147 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ 145 NV_QuotaOutOfDate, /* 1: $Quota is out of date. */
148 NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */ 146 NV_UsnJrnlStamped, /* 1: $UsnJrnl has been stamped. */
@@ -153,7 +151,7 @@ typedef enum {
153 * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo() 151 * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo()
154 * functions. 152 * functions.
155 */ 153 */
156#define NVOL_FNS(flag) \ 154#define DEFINE_NVOL_BIT_OPS(flag) \
157static inline int NVol##flag(ntfs_volume *vol) \ 155static inline int NVol##flag(ntfs_volume *vol) \
158{ \ 156{ \
159 return test_bit(NV_##flag, &(vol)->flags); \ 157 return test_bit(NV_##flag, &(vol)->flags); \
@@ -168,12 +166,12 @@ static inline void NVolClear##flag(ntfs_volume *vol) \
168} 166}
169 167
170/* Emit the ntfs volume bitops functions. */ 168/* Emit the ntfs volume bitops functions. */
171NVOL_FNS(Errors) 169DEFINE_NVOL_BIT_OPS(Errors)
172NVOL_FNS(ShowSystemFiles) 170DEFINE_NVOL_BIT_OPS(ShowSystemFiles)
173NVOL_FNS(CaseSensitive) 171DEFINE_NVOL_BIT_OPS(CaseSensitive)
174NVOL_FNS(LogFileEmpty) 172DEFINE_NVOL_BIT_OPS(LogFileEmpty)
175NVOL_FNS(QuotaOutOfDate) 173DEFINE_NVOL_BIT_OPS(QuotaOutOfDate)
176NVOL_FNS(UsnJrnlStamped) 174DEFINE_NVOL_BIT_OPS(UsnJrnlStamped)
177NVOL_FNS(SparseEnabled) 175DEFINE_NVOL_BIT_OPS(SparseEnabled)
178 176
179#endif /* _LINUX_NTFS_VOLUME_H */ 177#endif /* _LINUX_NTFS_VOLUME_H */
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 42eb53b529..23ceaa7127 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010 208#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
209#define DLM_LOCK_RES_MIGRATING 0x00000020 209#define DLM_LOCK_RES_MIGRATING 0x00000020
210 210
211/* max milliseconds to wait to sync up a network failure with a node death */
212#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
213
211#define DLM_PURGE_INTERVAL_MS (8 * 1000) 214#define DLM_PURGE_INTERVAL_MS (8 * 1000)
212 215
213struct dlm_lock_resource 216struct dlm_lock_resource
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm);
658void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); 661void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
659void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 662void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
660int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 663int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
664int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
661 665
662void dlm_put(struct dlm_ctxt *dlm); 666void dlm_put(struct dlm_ctxt *dlm);
663struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 667struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 6001b22a99..f66e2d818c 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog_errno(tmpret);
394 if (dlm_is_host_down(tmpret)) { 394 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat
397 * to notice the node is dead. times out after 5s. */
398 dlm_wait_for_node_death(dlm, res->owner,
399 DLM_NODE_DEATH_WAIT_MAX);
395 ret = DLM_RECOVERING; 400 ret = DLM_RECOVERING;
396 mlog(0, "node %u died so returning DLM_RECOVERING " 401 mlog(0, "node %u died so returning DLM_RECOVERING "
397 "from convert message!\n", res->owner); 402 "from convert message!\n", res->owner);
@@ -421,7 +426,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
421 struct dlm_lockstatus *lksb; 426 struct dlm_lockstatus *lksb;
422 enum dlm_status status = DLM_NORMAL; 427 enum dlm_status status = DLM_NORMAL;
423 u32 flags; 428 u32 flags;
424 int call_ast = 0, kick_thread = 0; 429 int call_ast = 0, kick_thread = 0, ast_reserved = 0;
425 430
426 if (!dlm_grab(dlm)) { 431 if (!dlm_grab(dlm)) {
427 dlm_error(DLM_REJECTED); 432 dlm_error(DLM_REJECTED);
@@ -490,6 +495,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
490 status = __dlm_lockres_state_to_status(res); 495 status = __dlm_lockres_state_to_status(res);
491 if (status == DLM_NORMAL) { 496 if (status == DLM_NORMAL) {
492 __dlm_lockres_reserve_ast(res); 497 __dlm_lockres_reserve_ast(res);
498 ast_reserved = 1;
493 res->state |= DLM_LOCK_RES_IN_PROGRESS; 499 res->state |= DLM_LOCK_RES_IN_PROGRESS;
494 status = __dlmconvert_master(dlm, res, lock, flags, 500 status = __dlmconvert_master(dlm, res, lock, flags,
495 cnv->requested_type, 501 cnv->requested_type,
@@ -512,10 +518,10 @@ leave:
512 else 518 else
513 dlm_lock_put(lock); 519 dlm_lock_put(lock);
514 520
515 /* either queue the ast or release it */ 521 /* either queue the ast or release it, if reserved */
516 if (call_ast) 522 if (call_ast)
517 dlm_queue_ast(dlm, lock); 523 dlm_queue_ast(dlm, lock);
518 else 524 else if (ast_reserved)
519 dlm_lockres_release_ast(dlm, res); 525 dlm_lockres_release_ast(dlm, res);
520 526
521 if (kick_thread) 527 if (kick_thread)
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index d1a0038557..671d4ff222 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -220,6 +220,17 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
220 dlm_error(status); 220 dlm_error(status);
221 dlm_revert_pending_lock(res, lock); 221 dlm_revert_pending_lock(res, lock);
222 dlm_lock_put(lock); 222 dlm_lock_put(lock);
223 } else if (dlm_is_recovery_lock(res->lockname.name,
224 res->lockname.len)) {
225 /* special case for the $RECOVERY lock.
226 * there will never be an AST delivered to put
227 * this lock on the proper secondary queue
228 * (granted), so do it manually. */
229 mlog(0, "%s: $RECOVERY lock for this node (%u) is "
230 "mastered by %u; got lock, manually granting (no ast)\n",
231 dlm->name, dlm->node_num, res->owner);
232 list_del_init(&lock->list);
233 list_add_tail(&lock->list, &res->granted);
223 } 234 }
224 spin_unlock(&res->spinlock); 235 spin_unlock(&res->spinlock);
225 236
@@ -646,7 +657,19 @@ retry_lock:
646 mlog(0, "retrying lock with migration/" 657 mlog(0, "retrying lock with migration/"
647 "recovery/in progress\n"); 658 "recovery/in progress\n");
648 msleep(100); 659 msleep(100);
649 dlm_wait_for_recovery(dlm); 660 /* no waiting for dlm_reco_thread */
661 if (recovery) {
662 if (status == DLM_RECOVERING) {
663 mlog(0, "%s: got RECOVERING "
664 "for $REOCVERY lock, master "
665 "was %u\n", dlm->name,
666 res->owner);
667 dlm_wait_for_node_death(dlm, res->owner,
668 DLM_NODE_DEATH_WAIT_MAX);
669 }
670 } else {
671 dlm_wait_for_recovery(dlm);
672 }
650 goto retry_lock; 673 goto retry_lock;
651 } 674 }
652 675
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index a3194fe173..2e2e95e694 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2482,7 +2482,9 @@ top:
2482 atomic_set(&mle->woken, 1); 2482 atomic_set(&mle->woken, 1);
2483 spin_unlock(&mle->spinlock); 2483 spin_unlock(&mle->spinlock);
2484 wake_up(&mle->wq); 2484 wake_up(&mle->wq);
2485 /* final put will take care of list removal */ 2485 /* do not need events any longer, so detach
2486 * from heartbeat */
2487 __dlm_mle_detach_hb_events(dlm, mle);
2486 __dlm_put_mle(mle); 2488 __dlm_put_mle(mle);
2487 } 2489 }
2488 continue; 2490 continue;
@@ -2537,6 +2539,9 @@ top:
2537 spin_unlock(&res->spinlock); 2539 spin_unlock(&res->spinlock);
2538 dlm_lockres_put(res); 2540 dlm_lockres_put(res);
2539 2541
2542 /* about to get rid of mle, detach from heartbeat */
2543 __dlm_mle_detach_hb_events(dlm, mle);
2544
2540 /* dump the mle */ 2545 /* dump the mle */
2541 spin_lock(&dlm->master_lock); 2546 spin_lock(&dlm->master_lock);
2542 __dlm_put_mle(mle); 2547 __dlm_put_mle(mle);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 186e9a76aa..ed76bda1a5 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node)
278 return dead; 278 return dead;
279} 279}
280 280
281int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
282{
283 if (timeout) {
284 mlog(ML_NOTICE, "%s: waiting %dms for notification of "
285 "death of node %u\n", dlm->name, timeout, node);
286 wait_event_timeout(dlm->dlm_reco_thread_wq,
287 dlm_is_node_dead(dlm, node),
288 msecs_to_jiffies(timeout));
289 } else {
290 mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
291 "of death of node %u\n", dlm->name, node);
292 wait_event(dlm->dlm_reco_thread_wq,
293 dlm_is_node_dead(dlm, node));
294 }
295 /* for now, return 0 */
296 return 0;
297}
298
281/* callers of the top-level api calls (dlmlock/dlmunlock) should 299/* callers of the top-level api calls (dlmlock/dlmunlock) should
282 * block on the dlm->reco.event when recovery is in progress. 300 * block on the dlm->reco.event when recovery is in progress.
283 * the dlm recovery thread will set this state when it begins 301 * the dlm recovery thread will set this state when it begins
@@ -2032,6 +2050,30 @@ again:
2032 dlm->reco.new_master); 2050 dlm->reco.new_master);
2033 status = -EEXIST; 2051 status = -EEXIST;
2034 } else { 2052 } else {
2053 status = 0;
2054
2055 /* see if recovery was already finished elsewhere */
2056 spin_lock(&dlm->spinlock);
2057 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
2058 status = -EINVAL;
2059 mlog(0, "%s: got reco EX lock, but "
2060 "node got recovered already\n", dlm->name);
2061 if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
2062 mlog(ML_ERROR, "%s: new master is %u "
2063 "but no dead node!\n",
2064 dlm->name, dlm->reco.new_master);
2065 BUG();
2066 }
2067 }
2068 spin_unlock(&dlm->spinlock);
2069 }
2070
2071 /* if this node has actually become the recovery master,
2072 * set the master and send the messages to begin recovery */
2073 if (!status) {
2074 mlog(0, "%s: dead=%u, this=%u, sending "
2075 "begin_reco now\n", dlm->name,
2076 dlm->reco.dead_node, dlm->node_num);
2035 status = dlm_send_begin_reco_message(dlm, 2077 status = dlm_send_begin_reco_message(dlm,
2036 dlm->reco.dead_node); 2078 dlm->reco.dead_node);
2037 /* this always succeeds */ 2079 /* this always succeeds */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index fa0bcac5ce..d329c9df90 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1584,10 +1584,9 @@ static int ocfs2_commit_thread(void *arg)
1584 while (!(kthread_should_stop() && 1584 while (!(kthread_should_stop() &&
1585 atomic_read(&journal->j_num_trans) == 0)) { 1585 atomic_read(&journal->j_num_trans) == 0)) {
1586 1586
1587 wait_event_interruptible_timeout(osb->checkpoint_event, 1587 wait_event_interruptible(osb->checkpoint_event,
1588 atomic_read(&journal->j_num_trans) 1588 atomic_read(&journal->j_num_trans)
1589 || kthread_should_stop(), 1589 || kthread_should_stop());
1590 OCFS2_CHECKPOINT_INTERVAL);
1591 1590
1592 status = ocfs2_commit_cache(osb); 1591 status = ocfs2_commit_cache(osb);
1593 if (status < 0) 1592 if (status < 0)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 7d0a816184..2f3a6acdac 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -29,8 +29,6 @@
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/jbd.h> 30#include <linux/jbd.h>
31 31
32#define OCFS2_CHECKPOINT_INTERVAL (8 * HZ)
33
34enum ocfs2_journal_state { 32enum ocfs2_journal_state {
35 OCFS2_JOURNAL_FREE = 0, 33 OCFS2_JOURNAL_FREE = 0,
36 OCFS2_JOURNAL_LOADED, 34 OCFS2_JOURNAL_LOADED,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6573f31f1f..075d3e9456 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -204,10 +204,6 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
204 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); 204 root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root);
205 if (!root_inode) 205 if (!root_inode)
206 goto out_no_root; 206 goto out_no_root;
207 /*
208 * Fixup the root inode's nlink value
209 */
210 root_inode->i_nlink += nr_processes();
211 root_inode->i_uid = 0; 207 root_inode->i_uid = 0;
212 root_inode->i_gid = 0; 208 root_inode->i_gid = 0;
213 s->s_root = d_alloc_root(root_inode); 209 s->s_root = d_alloc_root(root_inode);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 68896283c8..c3fd361111 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -80,16 +80,16 @@ void __init proc_root_init(void)
80 proc_bus = proc_mkdir("bus", NULL); 80 proc_bus = proc_mkdir("bus", NULL);
81} 81}
82 82
83static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 83static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
84)
84{ 85{
85 /* 86 generic_fillattr(dentry->d_inode, stat);
86 * nr_threads is actually protected by the tasklist_lock; 87 stat->nlink = proc_root.nlink + nr_processes();
87 * however, it's conventional to do reads, especially for 88 return 0;
88 * reporting, without any locking whatsoever. 89}
89 */
90 if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */
91 dir->i_nlink = proc_root.nlink + nr_threads;
92 90
91static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
92{
93 if (!proc_lookup(dir, dentry, nd)) { 93 if (!proc_lookup(dir, dentry, nd)) {
94 return NULL; 94 return NULL;
95 } 95 }
@@ -134,6 +134,7 @@ static struct file_operations proc_root_operations = {
134 */ 134 */
135static struct inode_operations proc_root_inode_operations = { 135static struct inode_operations proc_root_inode_operations = {
136 .lookup = proc_root_lookup, 136 .lookup = proc_root_lookup,
137 .getattr = proc_root_getattr,
137}; 138};
138 139
139/* 140/*
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c66bd5e4c0..cde5d48994 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -27,6 +27,7 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/time.h>
30#include <linux/init.h> 31#include <linux/init.h>
31#include <linux/string.h> 32#include <linux/string.h>
32#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
@@ -104,6 +105,7 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
104 d_instantiate(dentry, inode); 105 d_instantiate(dentry, inode);
105 dget(dentry); /* Extra count - pin the dentry in core */ 106 dget(dentry); /* Extra count - pin the dentry in core */
106 error = 0; 107 error = 0;
108 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
107 } 109 }
108 return error; 110 return error;
109} 111}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ef5e5414e7..d63da756eb 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1124,8 +1124,6 @@ static void handle_attrs(struct super_block *s)
1124 "reiserfs: cannot support attributes until flag is set in super-block"); 1124 "reiserfs: cannot support attributes until flag is set in super-block");
1125 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); 1125 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1126 } 1126 }
1127 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) {
1128 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ATTRS);
1129 } 1127 }
1130} 1128}
1131 1129
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 43de3ba833..ab8894c3b9 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -228,7 +228,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
228 acl = ERR_PTR(retval); 228 acl = ERR_PTR(retval);
229 } else { 229 } else {
230 acl = posix_acl_from_disk(value, retval); 230 acl = posix_acl_from_disk(value, retval);
231 *p_acl = posix_acl_dup(acl); 231 if (!IS_ERR(acl))
232 *p_acl = posix_acl_dup(acl);
232 } 233 }
233 234
234 kfree(value); 235 kfree(value);
diff --git a/fs/select.c b/fs/select.c
index bc60a3e14e..1815a57d22 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -398,11 +398,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
398 ret = core_sys_select(n, inp, outp, exp, &timeout); 398 ret = core_sys_select(n, inp, outp, exp, &timeout);
399 399
400 if (tvp) { 400 if (tvp) {
401 struct timeval rtv;
402
401 if (current->personality & STICKY_TIMEOUTS) 403 if (current->personality & STICKY_TIMEOUTS)
402 goto sticky; 404 goto sticky;
403 tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); 405 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
404 tv.tv_sec = timeout; 406 rtv.tv_sec = timeout;
405 if (copy_to_user(tvp, &tv, sizeof(tv))) { 407 if (timeval_compare(&rtv, &tv) >= 0)
408 rtv = tv;
409 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
406sticky: 410sticky:
407 /* 411 /*
408 * If an application puts its timeval in read-only 412 * If an application puts its timeval in read-only
@@ -460,11 +464,16 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
460 ret = core_sys_select(n, inp, outp, exp, &timeout); 464 ret = core_sys_select(n, inp, outp, exp, &timeout);
461 465
462 if (tsp) { 466 if (tsp) {
467 struct timespec rts;
468
463 if (current->personality & STICKY_TIMEOUTS) 469 if (current->personality & STICKY_TIMEOUTS)
464 goto sticky; 470 goto sticky;
465 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 471 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
466 ts.tv_sec = timeout; 472 1000;
467 if (copy_to_user(tsp, &ts, sizeof(ts))) { 473 rts.tv_sec = timeout;
474 if (timespec_compare(&rts, &ts) >= 0)
475 rts = ts;
476 if (copy_to_user(tsp, &rts, sizeof(rts))) {
468sticky: 477sticky:
469 /* 478 /*
470 * If an application puts its timeval in read-only 479 * If an application puts its timeval in read-only
@@ -758,12 +767,17 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
758 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 767 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
759 768
760 if (tsp && timeout >= 0) { 769 if (tsp && timeout >= 0) {
770 struct timespec rts;
771
761 if (current->personality & STICKY_TIMEOUTS) 772 if (current->personality & STICKY_TIMEOUTS)
762 goto sticky; 773 goto sticky;
763 /* Yes, we know it's actually an s64, but it's also positive. */ 774 /* Yes, we know it's actually an s64, but it's also positive. */
764 ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; 775 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
765 ts.tv_sec = timeout; 776 1000;
766 if (copy_to_user(tsp, &ts, sizeof(ts))) { 777 rts.tv_sec = timeout;
778 if (timespec_compare(&rts, &ts) >= 0)
779 rts = ts;
780 if (copy_to_user(tsp, &rts, sizeof(rts))) {
767 sticky: 781 sticky:
768 /* 782 /*
769 * If an application puts its timeval in read-only 783 * If an application puts its timeval in read-only
diff --git a/fs/stat.c b/fs/stat.c
index 24211b030f..9948cc1685 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -261,6 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf)
261 return error; 261 return error;
262} 262}
263 263
264#ifndef __ARCH_WANT_STAT64
264asmlinkage long sys_newfstatat(int dfd, char __user *filename, 265asmlinkage long sys_newfstatat(int dfd, char __user *filename,
265 struct stat __user *statbuf, int flag) 266 struct stat __user *statbuf, int flag)
266{ 267{
@@ -281,6 +282,7 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename,
281out: 282out:
282 return error; 283 return error;
283} 284}
285#endif
284 286
285asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) 287asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
286{ 288{
@@ -395,6 +397,26 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf)
395 return error; 397 return error;
396} 398}
397 399
400asmlinkage long sys_fstatat64(int dfd, char __user *filename,
401 struct stat64 __user *statbuf, int flag)
402{
403 struct kstat stat;
404 int error = -EINVAL;
405
406 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
407 goto out;
408
409 if (flag & AT_SYMLINK_NOFOLLOW)
410 error = vfs_lstat_fd(dfd, filename, &stat);
411 else
412 error = vfs_stat_fd(dfd, filename, &stat);
413
414 if (!error)
415 error = cp_new_stat64(&stat, statbuf);
416
417out:
418 return error;
419}
398#endif /* __ARCH_WANT_STAT64 */ 420#endif /* __ARCH_WANT_STAT64 */
399 421
400void inode_add_bytes(struct inode *inode, loff_t bytes) 422void inode_add_bytes(struct inode *inode, loff_t bytes)
diff --git a/fs/super.c b/fs/super.c
index 30294218fa..e20b5580af 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -666,6 +666,16 @@ static int test_bdev_super(struct super_block *s, void *data)
666 return (void *)s->s_bdev == data; 666 return (void *)s->s_bdev == data;
667} 667}
668 668
669static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
670{
671 if (bdev->bd_disk) {
672 if (bdev->bd_part)
673 kobject_uevent(&bdev->bd_part->kobj, action);
674 else
675 kobject_uevent(&bdev->bd_disk->kobj, action);
676 }
677}
678
669struct super_block *get_sb_bdev(struct file_system_type *fs_type, 679struct super_block *get_sb_bdev(struct file_system_type *fs_type,
670 int flags, const char *dev_name, void *data, 680 int flags, const char *dev_name, void *data,
671 int (*fill_super)(struct super_block *, void *, int)) 681 int (*fill_super)(struct super_block *, void *, int))
@@ -707,8 +717,10 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
707 up_write(&s->s_umount); 717 up_write(&s->s_umount);
708 deactivate_super(s); 718 deactivate_super(s);
709 s = ERR_PTR(error); 719 s = ERR_PTR(error);
710 } else 720 } else {
711 s->s_flags |= MS_ACTIVE; 721 s->s_flags |= MS_ACTIVE;
722 bdev_uevent(bdev, KOBJ_MOUNT);
723 }
712 } 724 }
713 725
714 return s; 726 return s;
@@ -724,6 +736,7 @@ void kill_block_super(struct super_block *sb)
724{ 736{
725 struct block_device *bdev = sb->s_bdev; 737 struct block_device *bdev = sb->s_bdev;
726 738
739 bdev_uevent(bdev, KOBJ_UMOUNT);
727 generic_shutdown_super(sb); 740 generic_shutdown_super(sb);
728 sync_blockdev(bdev); 741 sync_blockdev(bdev);
729 close_bdev_excl(bdev); 742 close_bdev_excl(bdev);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8f2beec526..74d8be87f9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -540,7 +540,7 @@ xfs_probe_cluster(
540 540
541 /* First sum forwards in this page */ 541 /* First sum forwards in this page */
542 do { 542 do {
543 if (mapped != buffer_mapped(bh)) 543 if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
544 return total; 544 return total;
545 total += bh->b_size; 545 total += bh->b_size;
546 } while ((bh = bh->b_this_page) != head); 546 } while ((bh = bh->b_this_page) != head);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 53a00fb217..7c0e39dc61 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -68,6 +68,9 @@ kmem_zone_t *qm_dqzone;
68kmem_zone_t *qm_dqtrxzone; 68kmem_zone_t *qm_dqtrxzone;
69STATIC kmem_shaker_t xfs_qm_shaker; 69STATIC kmem_shaker_t xfs_qm_shaker;
70 70
71STATIC cred_t xfs_zerocr;
72STATIC xfs_inode_t xfs_zeroino;
73
71STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 74STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
72STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 75STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
73 76
@@ -1393,8 +1396,6 @@ xfs_qm_qino_alloc(
1393 xfs_trans_t *tp; 1396 xfs_trans_t *tp;
1394 int error; 1397 int error;
1395 unsigned long s; 1398 unsigned long s;
1396 cred_t zerocr;
1397 xfs_inode_t zeroino;
1398 int committed; 1399 int committed;
1399 1400
1400 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); 1401 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
@@ -1406,11 +1407,9 @@ xfs_qm_qino_alloc(
1406 xfs_trans_cancel(tp, 0); 1407 xfs_trans_cancel(tp, 0);
1407 return error; 1408 return error;
1408 } 1409 }
1409 memset(&zerocr, 0, sizeof(zerocr));
1410 memset(&zeroino, 0, sizeof(zeroino));
1411 1410
1412 if ((error = xfs_dir_ialloc(&tp, &zeroino, S_IFREG, 1, 0, 1411 if ((error = xfs_dir_ialloc(&tp, &xfs_zeroino, S_IFREG, 1, 0,
1413 &zerocr, 0, 1, ip, &committed))) { 1412 &xfs_zerocr, 0, 1, ip, &committed))) {
1414 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 1413 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1415 XFS_TRANS_ABORT); 1414 XFS_TRANS_ABORT);
1416 return error; 1415 return error;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 06fc061c50..5b413946b1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -130,7 +130,8 @@ xfs_growfs_rt_alloc(
130 /* 130 /*
131 * Lock the inode. 131 * Lock the inode.
132 */ 132 */
133 if ((error = xfs_trans_iget(mp, tp, ino, 0, XFS_ILOCK_EXCL, &ip))) 133 if ((error = xfs_trans_iget(mp, tp, ino, 0,
134 XFS_ILOCK_EXCL, &ip)))
134 goto error_exit; 135 goto error_exit;
135 XFS_BMAP_INIT(&flist, &firstblock); 136 XFS_BMAP_INIT(&flist, &firstblock);
136 /* 137 /*
@@ -170,8 +171,8 @@ xfs_growfs_rt_alloc(
170 /* 171 /*
171 * Lock the bitmap inode. 172 * Lock the bitmap inode.
172 */ 173 */
173 if ((error = xfs_trans_iget(mp, tp, ino, 0, XFS_ILOCK_EXCL, 174 if ((error = xfs_trans_iget(mp, tp, ino, 0,
174 &ip))) 175 XFS_ILOCK_EXCL, &ip)))
175 goto error_exit; 176 goto error_exit;
176 /* 177 /*
177 * Get a buffer for the block. 178 * Get a buffer for the block.
@@ -2023,8 +2024,8 @@ xfs_growfs_rt(
2023 /* 2024 /*
2024 * Lock out other callers by grabbing the bitmap inode lock. 2025 * Lock out other callers by grabbing the bitmap inode lock.
2025 */ 2026 */
2026 if ((error = xfs_trans_iget(mp, tp, 0, mp->m_sb.sb_rbmino, 2027 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2027 XFS_ILOCK_EXCL, &ip))) 2028 XFS_ILOCK_EXCL, &ip)))
2028 goto error_exit; 2029 goto error_exit;
2029 ASSERT(ip == mp->m_rbmip); 2030 ASSERT(ip == mp->m_rbmip);
2030 /* 2031 /*
@@ -2037,8 +2038,8 @@ xfs_growfs_rt(
2037 /* 2038 /*
2038 * Get the summary inode into the transaction. 2039 * Get the summary inode into the transaction.
2039 */ 2040 */
2040 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 2041 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0,
2041 0, XFS_ILOCK_EXCL, &ip))) 2042 XFS_ILOCK_EXCL, &ip)))
2042 goto error_exit; 2043 goto error_exit;
2043 ASSERT(ip == mp->m_rsumip); 2044 ASSERT(ip == mp->m_rsumip);
2044 /* 2045 /*
@@ -2158,10 +2159,9 @@ xfs_rtallocate_extent(
2158 /* 2159 /*
2159 * Lock out other callers by grabbing the bitmap inode lock. 2160 * Lock out other callers by grabbing the bitmap inode lock.
2160 */ 2161 */
2161 error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip); 2162 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2162 if (error) { 2163 XFS_ILOCK_EXCL, &ip)))
2163 return error; 2164 return error;
2164 }
2165 sumbp = NULL; 2165 sumbp = NULL;
2166 /* 2166 /*
2167 * Allocate by size, or near another block, or exactly at some block. 2167 * Allocate by size, or near another block, or exactly at some block.
@@ -2221,10 +2221,9 @@ xfs_rtfree_extent(
2221 /* 2221 /*
2222 * Synchronize by locking the bitmap inode. 2222 * Synchronize by locking the bitmap inode.
2223 */ 2223 */
2224 error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip); 2224 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2225 if (error) { 2225 XFS_ILOCK_EXCL, &ip)))
2226 return error; 2226 return error;
2227 }
2228#if defined(__KERNEL__) && defined(DEBUG) 2227#if defined(__KERNEL__) && defined(DEBUG)
2229 /* 2228 /*
2230 * Check to see that this whole range is currently allocated. 2229 * Check to see that this whole range is currently allocated.
@@ -2365,8 +2364,8 @@ xfs_rtpick_extent(
2365 __uint64_t seq; /* sequence number of file creation */ 2364 __uint64_t seq; /* sequence number of file creation */
2366 __uint64_t *seqp; /* pointer to seqno in inode */ 2365 __uint64_t *seqp; /* pointer to seqno in inode */
2367 2366
2368 error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip); 2367 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2369 if (error) 2368 XFS_ILOCK_EXCL, &ip)))
2370 return error; 2369 return error;
2371 ASSERT(ip == mp->m_rbmip); 2370 ASSERT(ip == mp->m_rbmip);
2372 seqp = (__uint64_t *)&ip->i_d.di_atime; 2371 seqp = (__uint64_t *)&ip->i_d.di_atime;