diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/v9fs.c | 2 | ||||
-rw-r--r-- | fs/cifs/cifssmb.c | 7 | ||||
-rw-r--r-- | fs/cifs/connect.c | 8 | ||||
-rw-r--r-- | fs/cifs/file.c | 14 | ||||
-rw-r--r-- | fs/compat.c | 37 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/ext2/xattr.c | 6 | ||||
-rw-r--r-- | fs/fuse/dev.c | 6 | ||||
-rw-r--r-- | fs/fuse/file.c | 11 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 418 | ||||
-rw-r--r-- | fs/jbd/commit.c | 3 | ||||
-rw-r--r-- | fs/lockd/clntlock.c | 27 | ||||
-rw-r--r-- | fs/lockd/svc4proc.c | 2 | ||||
-rw-r--r-- | fs/lockd/svcproc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmconvert.c | 12 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmlock.c | 25 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 42 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 2 | ||||
-rw-r--r-- | fs/proc/inode.c | 4 | ||||
-rw-r--r-- | fs/proc/root.c | 17 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 2 | ||||
-rw-r--r-- | fs/reiserfs/xattr_acl.c | 3 | ||||
-rw-r--r-- | fs/select.c | 32 | ||||
-rw-r--r-- | fs/stat.c | 22 | ||||
-rw-r--r-- | fs/super.c | 15 |
28 files changed, 416 insertions, 323 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 5250c428fc1f..ef3386549140 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -66,7 +66,7 @@ static match_table_t tokens = { | |||
66 | {Opt_afid, "afid=%u"}, | 66 | {Opt_afid, "afid=%u"}, |
67 | {Opt_rfdno, "rfdno=%u"}, | 67 | {Opt_rfdno, "rfdno=%u"}, |
68 | {Opt_wfdno, "wfdno=%u"}, | 68 | {Opt_wfdno, "wfdno=%u"}, |
69 | {Opt_debug, "debug=%u"}, | 69 | {Opt_debug, "debug=%x"}, |
70 | {Opt_name, "name=%s"}, | 70 | {Opt_name, "name=%s"}, |
71 | {Opt_remotename, "aname=%s"}, | 71 | {Opt_remotename, "aname=%s"}, |
72 | {Opt_unix, "proto=unix"}, | 72 | {Opt_unix, "proto=unix"}, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 217323b0c896..b41e8b379652 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -1048,13 +1048,14 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, | |||
1048 | cifs_small_buf_release(iov[0].iov_base); | 1048 | cifs_small_buf_release(iov[0].iov_base); |
1049 | else if(resp_buf_type == CIFS_LARGE_BUFFER) | 1049 | else if(resp_buf_type == CIFS_LARGE_BUFFER) |
1050 | cifs_buf_release(iov[0].iov_base); | 1050 | cifs_buf_release(iov[0].iov_base); |
1051 | } else /* return buffer to caller to free */ /* BB FIXME how do we tell caller if it is not a large buffer */ { | 1051 | } else if(resp_buf_type != CIFS_NO_BUFFER) { |
1052 | *buf = iov[0].iov_base; | 1052 | /* return buffer to caller to free */ |
1053 | *buf = iov[0].iov_base; | ||
1053 | if(resp_buf_type == CIFS_SMALL_BUFFER) | 1054 | if(resp_buf_type == CIFS_SMALL_BUFFER) |
1054 | *pbuf_type = CIFS_SMALL_BUFFER; | 1055 | *pbuf_type = CIFS_SMALL_BUFFER; |
1055 | else if(resp_buf_type == CIFS_LARGE_BUFFER) | 1056 | else if(resp_buf_type == CIFS_LARGE_BUFFER) |
1056 | *pbuf_type = CIFS_LARGE_BUFFER; | 1057 | *pbuf_type = CIFS_LARGE_BUFFER; |
1057 | } | 1058 | } /* else no valid buffer on return - leave as null */ |
1058 | 1059 | ||
1059 | /* Note: On -EAGAIN error only caller can retry on handle based calls | 1060 | /* Note: On -EAGAIN error only caller can retry on handle based calls |
1060 | since file handle passed in no longer valid */ | 1061 | since file handle passed in no longer valid */ |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e488603fb1e7..ef5ae6f93c75 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1795,10 +1795,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
1795 | conjunction with 52K kvec constraint on arch with 4K | 1795 | conjunction with 52K kvec constraint on arch with 4K |
1796 | page size */ | 1796 | page size */ |
1797 | 1797 | ||
1798 | if(cifs_sb->rsize < PAGE_CACHE_SIZE) { | 1798 | if(cifs_sb->rsize < 2048) { |
1799 | cifs_sb->rsize = PAGE_CACHE_SIZE; | 1799 | cifs_sb->rsize = 2048; |
1800 | /* Windows ME does this */ | 1800 | /* Windows ME may prefer this */ |
1801 | cFYI(1,("Attempt to set readsize for mount to less than one page (4096)")); | 1801 | cFYI(1,("readsize set to minimum 2048")); |
1802 | } | 1802 | } |
1803 | cifs_sb->mnt_uid = volume_info.linux_uid; | 1803 | cifs_sb->mnt_uid = volume_info.linux_uid; |
1804 | cifs_sb->mnt_gid = volume_info.linux_gid; | 1804 | cifs_sb->mnt_gid = volume_info.linux_gid; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d17c97d07c80..675bd2568297 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1442,13 +1442,15 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1442 | &bytes_read, &smb_read_data, | 1442 | &bytes_read, &smb_read_data, |
1443 | &buf_type); | 1443 | &buf_type); |
1444 | pSMBr = (struct smb_com_read_rsp *)smb_read_data; | 1444 | pSMBr = (struct smb_com_read_rsp *)smb_read_data; |
1445 | if (copy_to_user(current_offset, | ||
1446 | smb_read_data + 4 /* RFC1001 hdr */ | ||
1447 | + le16_to_cpu(pSMBr->DataOffset), | ||
1448 | bytes_read)) { | ||
1449 | rc = -EFAULT; | ||
1450 | } | ||
1451 | if (smb_read_data) { | 1445 | if (smb_read_data) { |
1446 | if (copy_to_user(current_offset, | ||
1447 | smb_read_data + | ||
1448 | 4 /* RFC1001 length field */ + | ||
1449 | le16_to_cpu(pSMBr->DataOffset), | ||
1450 | bytes_read)) { | ||
1451 | rc = -EFAULT; | ||
1452 | } | ||
1453 | |||
1452 | if(buf_type == CIFS_SMALL_BUFFER) | 1454 | if(buf_type == CIFS_SMALL_BUFFER) |
1453 | cifs_small_buf_release(smb_read_data); | 1455 | cifs_small_buf_release(smb_read_data); |
1454 | else if(buf_type == CIFS_LARGE_BUFFER) | 1456 | else if(buf_type == CIFS_LARGE_BUFFER) |
diff --git a/fs/compat.c b/fs/compat.c index 70c5af4cc270..5333c7d7427f 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1751,11 +1751,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, | |||
1751 | ret = compat_core_sys_select(n, inp, outp, exp, &timeout); | 1751 | ret = compat_core_sys_select(n, inp, outp, exp, &timeout); |
1752 | 1752 | ||
1753 | if (tvp) { | 1753 | if (tvp) { |
1754 | struct compat_timeval rtv; | ||
1755 | |||
1754 | if (current->personality & STICKY_TIMEOUTS) | 1756 | if (current->personality & STICKY_TIMEOUTS) |
1755 | goto sticky; | 1757 | goto sticky; |
1756 | tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); | 1758 | rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); |
1757 | tv.tv_sec = timeout; | 1759 | rtv.tv_sec = timeout; |
1758 | if (copy_to_user(tvp, &tv, sizeof(tv))) { | 1760 | if (compat_timeval_compare(&rtv, &tv) >= 0) |
1761 | rtv = tv; | ||
1762 | if (copy_to_user(tvp, &rtv, sizeof(rtv))) { | ||
1759 | sticky: | 1763 | sticky: |
1760 | /* | 1764 | /* |
1761 | * If an application puts its timeval in read-only | 1765 | * If an application puts its timeval in read-only |
@@ -1822,13 +1826,17 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, | |||
1822 | } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); | 1826 | } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); |
1823 | 1827 | ||
1824 | if (tsp && !(current->personality & STICKY_TIMEOUTS)) { | 1828 | if (tsp && !(current->personality & STICKY_TIMEOUTS)) { |
1825 | ts.tv_sec += timeout / HZ; | 1829 | struct compat_timespec rts; |
1826 | ts.tv_nsec += (timeout % HZ) * (1000000000/HZ); | 1830 | |
1827 | if (ts.tv_nsec >= 1000000000) { | 1831 | rts.tv_sec = timeout / HZ; |
1828 | ts.tv_sec++; | 1832 | rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); |
1829 | ts.tv_nsec -= 1000000000; | 1833 | if (rts.tv_nsec >= NSEC_PER_SEC) { |
1834 | rts.tv_sec++; | ||
1835 | rts.tv_nsec -= NSEC_PER_SEC; | ||
1830 | } | 1836 | } |
1831 | (void)copy_to_user(tsp, &ts, sizeof(ts)); | 1837 | if (compat_timespec_compare(&rts, &ts) >= 0) |
1838 | rts = ts; | ||
1839 | copy_to_user(tsp, &rts, sizeof(rts)); | ||
1832 | } | 1840 | } |
1833 | 1841 | ||
1834 | if (ret == -ERESTARTNOHAND) { | 1842 | if (ret == -ERESTARTNOHAND) { |
@@ -1918,12 +1926,17 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1918 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1926 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
1919 | 1927 | ||
1920 | if (tsp && timeout >= 0) { | 1928 | if (tsp && timeout >= 0) { |
1929 | struct compat_timespec rts; | ||
1930 | |||
1921 | if (current->personality & STICKY_TIMEOUTS) | 1931 | if (current->personality & STICKY_TIMEOUTS) |
1922 | goto sticky; | 1932 | goto sticky; |
1923 | /* Yes, we know it's actually an s64, but it's also positive. */ | 1933 | /* Yes, we know it's actually an s64, but it's also positive. */ |
1924 | ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; | 1934 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * |
1925 | ts.tv_sec = timeout; | 1935 | 1000; |
1926 | if (copy_to_user(tsp, &ts, sizeof(ts))) { | 1936 | rts.tv_sec = timeout; |
1937 | if (compat_timespec_compare(&rts, &ts) >= 0) | ||
1938 | rts = ts; | ||
1939 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
1927 | sticky: | 1940 | sticky: |
1928 | /* | 1941 | /* |
1929 | * If an application puts its timeval in read-only | 1942 | * If an application puts its timeval in read-only |
@@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm) | |||
1403 | do_each_thread(g,p) { | 1403 | do_each_thread(g,p) { |
1404 | if (mm == p->mm && p != tsk && | 1404 | if (mm == p->mm && p != tsk && |
1405 | p->ptrace && p->parent->mm == mm) { | 1405 | p->ptrace && p->parent->mm == mm) { |
1406 | __ptrace_unlink(p); | 1406 | __ptrace_detach(p, 0); |
1407 | } | 1407 | } |
1408 | } while_each_thread(g,p); | 1408 | } while_each_thread(g,p); |
1409 | write_unlock_irq(&tasklist_lock); | 1409 | write_unlock_irq(&tasklist_lock); |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index a2ca3107d475..86ae8e93adb9 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -792,18 +792,20 @@ ext2_xattr_delete_inode(struct inode *inode) | |||
792 | ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); | 792 | ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1); |
793 | get_bh(bh); | 793 | get_bh(bh); |
794 | bforget(bh); | 794 | bforget(bh); |
795 | unlock_buffer(bh); | ||
795 | } else { | 796 | } else { |
796 | HDR(bh)->h_refcount = cpu_to_le32( | 797 | HDR(bh)->h_refcount = cpu_to_le32( |
797 | le32_to_cpu(HDR(bh)->h_refcount) - 1); | 798 | le32_to_cpu(HDR(bh)->h_refcount) - 1); |
798 | if (ce) | 799 | if (ce) |
799 | mb_cache_entry_release(ce); | 800 | mb_cache_entry_release(ce); |
801 | ea_bdebug(bh, "refcount now=%d", | ||
802 | le32_to_cpu(HDR(bh)->h_refcount)); | ||
803 | unlock_buffer(bh); | ||
800 | mark_buffer_dirty(bh); | 804 | mark_buffer_dirty(bh); |
801 | if (IS_SYNC(inode)) | 805 | if (IS_SYNC(inode)) |
802 | sync_dirty_buffer(bh); | 806 | sync_dirty_buffer(bh); |
803 | DQUOT_FREE_BLOCK(inode, 1); | 807 | DQUOT_FREE_BLOCK(inode, 1); |
804 | } | 808 | } |
805 | ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); | ||
806 | unlock_buffer(bh); | ||
807 | EXT2_I(inode)->i_file_acl = 0; | 809 | EXT2_I(inode)->i_file_acl = 0; |
808 | 810 | ||
809 | cleanup: | 811 | cleanup: |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f556a0d5c0d3..0c9a2ee54c91 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -66,6 +66,12 @@ static void restore_sigs(sigset_t *oldset) | |||
66 | sigprocmask(SIG_SETMASK, oldset, NULL); | 66 | sigprocmask(SIG_SETMASK, oldset, NULL); |
67 | } | 67 | } |
68 | 68 | ||
69 | /* | ||
70 | * Reset request, so that it can be reused | ||
71 | * | ||
72 | * The caller must be _very_ careful to make sure, that it is holding | ||
73 | * the only reference to req | ||
74 | */ | ||
69 | void fuse_reset_request(struct fuse_req *req) | 75 | void fuse_reset_request(struct fuse_req *req) |
70 | { | 76 | { |
71 | int preallocated = req->preallocated; | 77 | int preallocated = req->preallocated; |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 296351615b00..6f05379b0a0d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -116,9 +116,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) | |||
116 | /* Special case for failed iget in CREATE */ | 116 | /* Special case for failed iget in CREATE */ |
117 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) | 117 | static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) |
118 | { | 118 | { |
119 | u64 nodeid = req->in.h.nodeid; | 119 | /* If called from end_io_requests(), req has more than one |
120 | fuse_reset_request(req); | 120 | reference and fuse_reset_request() cannot work */ |
121 | fuse_send_forget(fc, req, nodeid, 1); | 121 | if (fc->connected) { |
122 | u64 nodeid = req->in.h.nodeid; | ||
123 | fuse_reset_request(req); | ||
124 | fuse_send_forget(fc, req, nodeid, 1); | ||
125 | } else | ||
126 | fuse_put_request(fc, req); | ||
122 | } | 127 | } |
123 | 128 | ||
124 | void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, | 129 | void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, |
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e6265a0b56b8..543ed543d1e5 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -24,75 +24,29 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Unlink a buffer from a transaction checkpoint list. | 27 | * Unlink a buffer from a transaction. |
28 | * | 28 | * |
29 | * Called with j_list_lock held. | 29 | * Called with j_list_lock held. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | static void __buffer_unlink_first(struct journal_head *jh) | 32 | static inline void __buffer_unlink(struct journal_head *jh) |
33 | { | 33 | { |
34 | transaction_t *transaction; | 34 | transaction_t *transaction; |
35 | 35 | ||
36 | transaction = jh->b_cp_transaction; | 36 | transaction = jh->b_cp_transaction; |
37 | jh->b_cp_transaction = NULL; | ||
37 | 38 | ||
38 | jh->b_cpnext->b_cpprev = jh->b_cpprev; | 39 | jh->b_cpnext->b_cpprev = jh->b_cpprev; |
39 | jh->b_cpprev->b_cpnext = jh->b_cpnext; | 40 | jh->b_cpprev->b_cpnext = jh->b_cpnext; |
40 | if (transaction->t_checkpoint_list == jh) { | 41 | if (transaction->t_checkpoint_list == jh) |
41 | transaction->t_checkpoint_list = jh->b_cpnext; | 42 | transaction->t_checkpoint_list = jh->b_cpnext; |
42 | if (transaction->t_checkpoint_list == jh) | 43 | if (transaction->t_checkpoint_list == jh) |
43 | transaction->t_checkpoint_list = NULL; | 44 | transaction->t_checkpoint_list = NULL; |
44 | } | ||
45 | } | ||
46 | |||
47 | /* | ||
48 | * Unlink a buffer from a transaction checkpoint(io) list. | ||
49 | * | ||
50 | * Called with j_list_lock held. | ||
51 | */ | ||
52 | |||
53 | static inline void __buffer_unlink(struct journal_head *jh) | ||
54 | { | ||
55 | transaction_t *transaction; | ||
56 | |||
57 | transaction = jh->b_cp_transaction; | ||
58 | |||
59 | __buffer_unlink_first(jh); | ||
60 | if (transaction->t_checkpoint_io_list == jh) { | ||
61 | transaction->t_checkpoint_io_list = jh->b_cpnext; | ||
62 | if (transaction->t_checkpoint_io_list == jh) | ||
63 | transaction->t_checkpoint_io_list = NULL; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* | ||
68 | * Move a buffer from the checkpoint list to the checkpoint io list | ||
69 | * | ||
70 | * Called with j_list_lock held | ||
71 | */ | ||
72 | |||
73 | static inline void __buffer_relink_io(struct journal_head *jh) | ||
74 | { | ||
75 | transaction_t *transaction; | ||
76 | |||
77 | transaction = jh->b_cp_transaction; | ||
78 | __buffer_unlink_first(jh); | ||
79 | |||
80 | if (!transaction->t_checkpoint_io_list) { | ||
81 | jh->b_cpnext = jh->b_cpprev = jh; | ||
82 | } else { | ||
83 | jh->b_cpnext = transaction->t_checkpoint_io_list; | ||
84 | jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; | ||
85 | jh->b_cpprev->b_cpnext = jh; | ||
86 | jh->b_cpnext->b_cpprev = jh; | ||
87 | } | ||
88 | transaction->t_checkpoint_io_list = jh; | ||
89 | } | 45 | } |
90 | 46 | ||
91 | /* | 47 | /* |
92 | * Try to release a checkpointed buffer from its transaction. | 48 | * Try to release a checkpointed buffer from its transaction. |
93 | * Returns 1 if we released it and 2 if we also released the | 49 | * Returns 1 if we released it. |
94 | * whole transaction. | ||
95 | * | ||
96 | * Requires j_list_lock | 50 | * Requires j_list_lock |
97 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 51 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
98 | */ | 52 | */ |
@@ -103,11 +57,12 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
103 | 57 | ||
104 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 58 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { |
105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 59 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
106 | ret = __journal_remove_checkpoint(jh) + 1; | 60 | __journal_remove_checkpoint(jh); |
107 | jbd_unlock_bh_state(bh); | 61 | jbd_unlock_bh_state(bh); |
108 | journal_remove_journal_head(bh); | 62 | journal_remove_journal_head(bh); |
109 | BUFFER_TRACE(bh, "release"); | 63 | BUFFER_TRACE(bh, "release"); |
110 | __brelse(bh); | 64 | __brelse(bh); |
65 | ret = 1; | ||
111 | } else { | 66 | } else { |
112 | jbd_unlock_bh_state(bh); | 67 | jbd_unlock_bh_state(bh); |
113 | } | 68 | } |
@@ -162,53 +117,83 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
162 | } | 117 | } |
163 | 118 | ||
164 | /* | 119 | /* |
165 | * Clean up transaction's list of buffers submitted for io. | 120 | * Clean up a transaction's checkpoint list. |
166 | * We wait for any pending IO to complete and remove any clean | 121 | * |
167 | * buffers. Note that we take the buffers in the opposite ordering | 122 | * We wait for any pending IO to complete and make sure any clean |
168 | * from the one in which they were submitted for IO. | 123 | * buffers are removed from the transaction. |
124 | * | ||
125 | * Return 1 if we performed any actions which might have destroyed the | ||
126 | * checkpoint. (journal_remove_checkpoint() deletes the transaction when | ||
127 | * the last checkpoint buffer is cleansed) | ||
169 | * | 128 | * |
170 | * Called with j_list_lock held. | 129 | * Called with j_list_lock held. |
171 | */ | 130 | */ |
172 | 131 | static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) | |
173 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | ||
174 | { | 132 | { |
175 | struct journal_head *jh; | 133 | struct journal_head *jh, *next_jh, *last_jh; |
176 | struct buffer_head *bh; | 134 | struct buffer_head *bh; |
177 | tid_t this_tid; | 135 | int ret = 0; |
178 | int released = 0; | 136 | |
179 | 137 | assert_spin_locked(&journal->j_list_lock); | |
180 | this_tid = transaction->t_tid; | 138 | jh = transaction->t_checkpoint_list; |
181 | restart: | 139 | if (!jh) |
182 | /* Didn't somebody clean up the transaction in the meanwhile */ | 140 | return 0; |
183 | if (journal->j_checkpoint_transactions != transaction || | 141 | |
184 | transaction->t_tid != this_tid) | 142 | last_jh = jh->b_cpprev; |
185 | return; | 143 | next_jh = jh; |
186 | while (!released && transaction->t_checkpoint_io_list) { | 144 | do { |
187 | jh = transaction->t_checkpoint_io_list; | 145 | jh = next_jh; |
188 | bh = jh2bh(jh); | 146 | bh = jh2bh(jh); |
189 | if (!jbd_trylock_bh_state(bh)) { | ||
190 | jbd_sync_bh(journal, bh); | ||
191 | spin_lock(&journal->j_list_lock); | ||
192 | goto restart; | ||
193 | } | ||
194 | if (buffer_locked(bh)) { | 147 | if (buffer_locked(bh)) { |
195 | atomic_inc(&bh->b_count); | 148 | atomic_inc(&bh->b_count); |
196 | spin_unlock(&journal->j_list_lock); | 149 | spin_unlock(&journal->j_list_lock); |
197 | jbd_unlock_bh_state(bh); | ||
198 | wait_on_buffer(bh); | 150 | wait_on_buffer(bh); |
199 | /* the journal_head may have gone by now */ | 151 | /* the journal_head may have gone by now */ |
200 | BUFFER_TRACE(bh, "brelse"); | 152 | BUFFER_TRACE(bh, "brelse"); |
201 | __brelse(bh); | 153 | __brelse(bh); |
202 | spin_lock(&journal->j_list_lock); | 154 | goto out_return_1; |
203 | goto restart; | ||
204 | } | 155 | } |
156 | |||
205 | /* | 157 | /* |
206 | * Now in whatever state the buffer currently is, we know that | 158 | * This is foul |
207 | * it has been written out and so we can drop it from the list | ||
208 | */ | 159 | */ |
209 | released = __journal_remove_checkpoint(jh); | 160 | if (!jbd_trylock_bh_state(bh)) { |
210 | jbd_unlock_bh_state(bh); | 161 | jbd_sync_bh(journal, bh); |
211 | } | 162 | goto out_return_1; |
163 | } | ||
164 | |||
165 | if (jh->b_transaction != NULL) { | ||
166 | transaction_t *t = jh->b_transaction; | ||
167 | tid_t tid = t->t_tid; | ||
168 | |||
169 | spin_unlock(&journal->j_list_lock); | ||
170 | jbd_unlock_bh_state(bh); | ||
171 | log_start_commit(journal, tid); | ||
172 | log_wait_commit(journal, tid); | ||
173 | goto out_return_1; | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * AKPM: I think the buffer_jbddirty test is redundant - it | ||
178 | * shouldn't have NULL b_transaction? | ||
179 | */ | ||
180 | next_jh = jh->b_cpnext; | ||
181 | if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { | ||
182 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
183 | __journal_remove_checkpoint(jh); | ||
184 | jbd_unlock_bh_state(bh); | ||
185 | journal_remove_journal_head(bh); | ||
186 | __brelse(bh); | ||
187 | ret = 1; | ||
188 | } else { | ||
189 | jbd_unlock_bh_state(bh); | ||
190 | } | ||
191 | } while (jh != last_jh); | ||
192 | |||
193 | return ret; | ||
194 | out_return_1: | ||
195 | spin_lock(&journal->j_list_lock); | ||
196 | return 1; | ||
212 | } | 197 | } |
213 | 198 | ||
214 | #define NR_BATCH 64 | 199 | #define NR_BATCH 64 |
@@ -218,7 +203,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
218 | { | 203 | { |
219 | int i; | 204 | int i; |
220 | 205 | ||
206 | spin_unlock(&journal->j_list_lock); | ||
221 | ll_rw_block(SWRITE, *batch_count, bhs); | 207 | ll_rw_block(SWRITE, *batch_count, bhs); |
208 | spin_lock(&journal->j_list_lock); | ||
222 | for (i = 0; i < *batch_count; i++) { | 209 | for (i = 0; i < *batch_count; i++) { |
223 | struct buffer_head *bh = bhs[i]; | 210 | struct buffer_head *bh = bhs[i]; |
224 | clear_buffer_jwrite(bh); | 211 | clear_buffer_jwrite(bh); |
@@ -234,46 +221,19 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
234 | * Return 1 if something happened which requires us to abort the current | 221 | * Return 1 if something happened which requires us to abort the current |
235 | * scan of the checkpoint list. | 222 | * scan of the checkpoint list. |
236 | * | 223 | * |
237 | * Called with j_list_lock held and drops it if 1 is returned | 224 | * Called with j_list_lock held. |
238 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 225 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
239 | */ | 226 | */ |
240 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | 227 | static int __flush_buffer(journal_t *journal, struct journal_head *jh, |
241 | struct buffer_head **bhs, int *batch_count) | 228 | struct buffer_head **bhs, int *batch_count, |
229 | int *drop_count) | ||
242 | { | 230 | { |
243 | struct buffer_head *bh = jh2bh(jh); | 231 | struct buffer_head *bh = jh2bh(jh); |
244 | int ret = 0; | 232 | int ret = 0; |
245 | 233 | ||
246 | if (buffer_locked(bh)) { | 234 | if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { |
247 | get_bh(bh); | 235 | J_ASSERT_JH(jh, jh->b_transaction == NULL); |
248 | spin_unlock(&journal->j_list_lock); | ||
249 | jbd_unlock_bh_state(bh); | ||
250 | wait_on_buffer(bh); | ||
251 | /* the journal_head may have gone by now */ | ||
252 | BUFFER_TRACE(bh, "brelse"); | ||
253 | put_bh(bh); | ||
254 | ret = 1; | ||
255 | } | ||
256 | else if (jh->b_transaction != NULL) { | ||
257 | transaction_t *t = jh->b_transaction; | ||
258 | tid_t tid = t->t_tid; | ||
259 | 236 | ||
260 | spin_unlock(&journal->j_list_lock); | ||
261 | jbd_unlock_bh_state(bh); | ||
262 | log_start_commit(journal, tid); | ||
263 | log_wait_commit(journal, tid); | ||
264 | ret = 1; | ||
265 | } | ||
266 | else if (!buffer_dirty(bh)) { | ||
267 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | ||
268 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
269 | __journal_remove_checkpoint(jh); | ||
270 | spin_unlock(&journal->j_list_lock); | ||
271 | jbd_unlock_bh_state(bh); | ||
272 | journal_remove_journal_head(bh); | ||
273 | put_bh(bh); | ||
274 | ret = 1; | ||
275 | } | ||
276 | else { | ||
277 | /* | 237 | /* |
278 | * Important: we are about to write the buffer, and | 238 | * Important: we are about to write the buffer, and |
279 | * possibly block, while still holding the journal lock. | 239 | * possibly block, while still holding the journal lock. |
@@ -286,30 +246,45 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
286 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | 246 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); |
287 | set_buffer_jwrite(bh); | 247 | set_buffer_jwrite(bh); |
288 | bhs[*batch_count] = bh; | 248 | bhs[*batch_count] = bh; |
289 | __buffer_relink_io(jh); | ||
290 | jbd_unlock_bh_state(bh); | 249 | jbd_unlock_bh_state(bh); |
291 | (*batch_count)++; | 250 | (*batch_count)++; |
292 | if (*batch_count == NR_BATCH) { | 251 | if (*batch_count == NR_BATCH) { |
293 | spin_unlock(&journal->j_list_lock); | ||
294 | __flush_batch(journal, bhs, batch_count); | 252 | __flush_batch(journal, bhs, batch_count); |
295 | ret = 1; | 253 | ret = 1; |
296 | } | 254 | } |
255 | } else { | ||
256 | int last_buffer = 0; | ||
257 | if (jh->b_cpnext == jh) { | ||
258 | /* We may be about to drop the transaction. Tell the | ||
259 | * caller that the lists have changed. | ||
260 | */ | ||
261 | last_buffer = 1; | ||
262 | } | ||
263 | if (__try_to_free_cp_buf(jh)) { | ||
264 | (*drop_count)++; | ||
265 | ret = last_buffer; | ||
266 | } | ||
297 | } | 267 | } |
298 | return ret; | 268 | return ret; |
299 | } | 269 | } |
300 | 270 | ||
301 | /* | 271 | /* |
302 | * Perform an actual checkpoint. We take the first transaction on the | 272 | * Perform an actual checkpoint. We don't write out only enough to |
303 | * list of transactions to be checkpointed and send all its buffers | 273 | * satisfy the current blocked requests: rather we submit a reasonably |
304 | * to disk. We submit larger chunks of data at once. | 274 | * sized chunk of the outstanding data to disk at once for |
275 | * efficiency. __log_wait_for_space() will retry if we didn't free enough. | ||
305 | * | 276 | * |
277 | * However, we _do_ take into account the amount requested so that once | ||
278 | * the IO has been queued, we can return as soon as enough of it has | ||
279 | * completed to disk. | ||
280 | * | ||
306 | * The journal should be locked before calling this function. | 281 | * The journal should be locked before calling this function. |
307 | */ | 282 | */ |
308 | int log_do_checkpoint(journal_t *journal) | 283 | int log_do_checkpoint(journal_t *journal) |
309 | { | 284 | { |
310 | transaction_t *transaction; | ||
311 | tid_t this_tid; | ||
312 | int result; | 285 | int result; |
286 | int batch_count = 0; | ||
287 | struct buffer_head *bhs[NR_BATCH]; | ||
313 | 288 | ||
314 | jbd_debug(1, "Start checkpoint\n"); | 289 | jbd_debug(1, "Start checkpoint\n"); |
315 | 290 | ||
@@ -324,70 +299,79 @@ int log_do_checkpoint(journal_t *journal) | |||
324 | return result; | 299 | return result; |
325 | 300 | ||
326 | /* | 301 | /* |
327 | * OK, we need to start writing disk blocks. Take one transaction | 302 | * OK, we need to start writing disk blocks. Try to free up a |
328 | * and write it. | 303 | * quarter of the log in a single checkpoint if we can. |
329 | */ | 304 | */ |
330 | spin_lock(&journal->j_list_lock); | ||
331 | if (!journal->j_checkpoint_transactions) | ||
332 | goto out; | ||
333 | transaction = journal->j_checkpoint_transactions; | ||
334 | this_tid = transaction->t_tid; | ||
335 | restart: | ||
336 | /* | 305 | /* |
337 | * If someone cleaned up this transaction while we slept, we're | 306 | * AKPM: check this code. I had a feeling a while back that it |
338 | * done (maybe it's a new transaction, but it fell at the same | 307 | * degenerates into a busy loop at unmount time. |
339 | * address). | ||
340 | */ | 308 | */ |
341 | if (journal->j_checkpoint_transactions == transaction && | 309 | spin_lock(&journal->j_list_lock); |
342 | transaction->t_tid == this_tid) { | 310 | while (journal->j_checkpoint_transactions) { |
343 | int batch_count = 0; | 311 | transaction_t *transaction; |
344 | struct buffer_head *bhs[NR_BATCH]; | 312 | struct journal_head *jh, *last_jh, *next_jh; |
345 | struct journal_head *jh; | 313 | int drop_count = 0; |
346 | int retry = 0; | 314 | int cleanup_ret, retry = 0; |
347 | 315 | tid_t this_tid; | |
348 | while (!retry && transaction->t_checkpoint_list) { | 316 | |
317 | transaction = journal->j_checkpoint_transactions; | ||
318 | this_tid = transaction->t_tid; | ||
319 | jh = transaction->t_checkpoint_list; | ||
320 | last_jh = jh->b_cpprev; | ||
321 | next_jh = jh; | ||
322 | do { | ||
349 | struct buffer_head *bh; | 323 | struct buffer_head *bh; |
350 | 324 | ||
351 | jh = transaction->t_checkpoint_list; | 325 | jh = next_jh; |
326 | next_jh = jh->b_cpnext; | ||
352 | bh = jh2bh(jh); | 327 | bh = jh2bh(jh); |
353 | if (!jbd_trylock_bh_state(bh)) { | 328 | if (!jbd_trylock_bh_state(bh)) { |
354 | jbd_sync_bh(journal, bh); | 329 | jbd_sync_bh(journal, bh); |
330 | spin_lock(&journal->j_list_lock); | ||
355 | retry = 1; | 331 | retry = 1; |
356 | break; | 332 | break; |
357 | } | 333 | } |
358 | retry = __process_buffer(journal, jh, bhs, | 334 | retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); |
359 | &batch_count); | 335 | if (cond_resched_lock(&journal->j_list_lock)) { |
360 | if (!retry && | ||
361 | lock_need_resched(&journal->j_list_lock)) { | ||
362 | spin_unlock(&journal->j_list_lock); | ||
363 | retry = 1; | 336 | retry = 1; |
364 | break; | 337 | break; |
365 | } | 338 | } |
366 | } | 339 | } while (jh != last_jh && !retry); |
367 | 340 | ||
368 | if (batch_count) { | 341 | if (batch_count) { |
369 | if (!retry) { | ||
370 | spin_unlock(&journal->j_list_lock); | ||
371 | retry = 1; | ||
372 | } | ||
373 | __flush_batch(journal, bhs, &batch_count); | 342 | __flush_batch(journal, bhs, &batch_count); |
343 | retry = 1; | ||
374 | } | 344 | } |
375 | 345 | ||
376 | if (retry) { | ||
377 | spin_lock(&journal->j_list_lock); | ||
378 | goto restart; | ||
379 | } | ||
380 | /* | 346 | /* |
381 | * Now we have cleaned up the first transaction's checkpoint | 347 | * If someone cleaned up this transaction while we slept, we're |
382 | * list. Let's clean up the second one. | 348 | * done |
349 | */ | ||
350 | if (journal->j_checkpoint_transactions != transaction) | ||
351 | break; | ||
352 | if (retry) | ||
353 | continue; | ||
354 | /* | ||
355 | * Maybe it's a new transaction, but it fell at the same | ||
356 | * address | ||
383 | */ | 357 | */ |
384 | __wait_cp_io(journal, transaction); | 358 | if (transaction->t_tid != this_tid) |
359 | continue; | ||
360 | /* | ||
361 | * We have walked the whole transaction list without | ||
362 | * finding anything to write to disk. We had better be | ||
363 | * able to make some progress or we are in trouble. | ||
364 | */ | ||
365 | cleanup_ret = __cleanup_transaction(journal, transaction); | ||
366 | J_ASSERT(drop_count != 0 || cleanup_ret != 0); | ||
367 | if (journal->j_checkpoint_transactions != transaction) | ||
368 | break; | ||
385 | } | 369 | } |
386 | out: | ||
387 | spin_unlock(&journal->j_list_lock); | 370 | spin_unlock(&journal->j_list_lock); |
388 | result = cleanup_journal_tail(journal); | 371 | result = cleanup_journal_tail(journal); |
389 | if (result < 0) | 372 | if (result < 0) |
390 | return result; | 373 | return result; |
374 | |||
391 | return 0; | 375 | return 0; |
392 | } | 376 | } |
393 | 377 | ||
@@ -472,91 +456,52 @@ int cleanup_journal_tail(journal_t *journal) | |||
472 | /* Checkpoint list management */ | 456 | /* Checkpoint list management */ |
473 | 457 | ||
474 | /* | 458 | /* |
475 | * journal_clean_one_cp_list | ||
476 | * | ||
477 | * Find all the written-back checkpoint buffers in the given list and release them. | ||
478 | * | ||
479 | * Called with the journal locked. | ||
480 | * Called with j_list_lock held. | ||
481 | * Returns number of bufers reaped (for debug) | ||
482 | */ | ||
483 | |||
484 | static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | ||
485 | { | ||
486 | struct journal_head *last_jh; | ||
487 | struct journal_head *next_jh = jh; | ||
488 | int ret, freed = 0; | ||
489 | |||
490 | *released = 0; | ||
491 | if (!jh) | ||
492 | return 0; | ||
493 | |||
494 | last_jh = jh->b_cpprev; | ||
495 | do { | ||
496 | jh = next_jh; | ||
497 | next_jh = jh->b_cpnext; | ||
498 | /* Use trylock because of the ranking */ | ||
499 | if (jbd_trylock_bh_state(jh2bh(jh))) { | ||
500 | ret = __try_to_free_cp_buf(jh); | ||
501 | if (ret) { | ||
502 | freed++; | ||
503 | if (ret == 2) { | ||
504 | *released = 1; | ||
505 | return freed; | ||
506 | } | ||
507 | } | ||
508 | } | ||
509 | /* | ||
510 | * This function only frees up some memory if possible so we | ||
511 | * dont have an obligation to finish processing. Bail out if | ||
512 | * preemption requested: | ||
513 | */ | ||
514 | if (need_resched()) | ||
515 | return freed; | ||
516 | } while (jh != last_jh); | ||
517 | |||
518 | return freed; | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * journal_clean_checkpoint_list | 459 | * journal_clean_checkpoint_list |
523 | * | 460 | * |
524 | * Find all the written-back checkpoint buffers in the journal and release them. | 461 | * Find all the written-back checkpoint buffers in the journal and release them. |
525 | * | 462 | * |
526 | * Called with the journal locked. | 463 | * Called with the journal locked. |
527 | * Called with j_list_lock held. | 464 | * Called with j_list_lock held. |
528 | * Returns number of buffers reaped (for debug) | 465 | * Returns number of bufers reaped (for debug) |
529 | */ | 466 | */ |
530 | 467 | ||
531 | int __journal_clean_checkpoint_list(journal_t *journal) | 468 | int __journal_clean_checkpoint_list(journal_t *journal) |
532 | { | 469 | { |
533 | transaction_t *transaction, *last_transaction, *next_transaction; | 470 | transaction_t *transaction, *last_transaction, *next_transaction; |
534 | int ret = 0, released; | 471 | int ret = 0; |
535 | 472 | ||
536 | transaction = journal->j_checkpoint_transactions; | 473 | transaction = journal->j_checkpoint_transactions; |
537 | if (!transaction) | 474 | if (transaction == 0) |
538 | goto out; | 475 | goto out; |
539 | 476 | ||
540 | last_transaction = transaction->t_cpprev; | 477 | last_transaction = transaction->t_cpprev; |
541 | next_transaction = transaction; | 478 | next_transaction = transaction; |
542 | do { | 479 | do { |
480 | struct journal_head *jh; | ||
481 | |||
543 | transaction = next_transaction; | 482 | transaction = next_transaction; |
544 | next_transaction = transaction->t_cpnext; | 483 | next_transaction = transaction->t_cpnext; |
545 | ret += journal_clean_one_cp_list(transaction-> | 484 | jh = transaction->t_checkpoint_list; |
546 | t_checkpoint_list, &released); | 485 | if (jh) { |
547 | if (need_resched()) | 486 | struct journal_head *last_jh = jh->b_cpprev; |
548 | goto out; | 487 | struct journal_head *next_jh = jh; |
549 | if (released) | 488 | |
550 | continue; | 489 | do { |
551 | /* | 490 | jh = next_jh; |
552 | * It is essential that we are as careful as in the case of | 491 | next_jh = jh->b_cpnext; |
553 | * t_checkpoint_list with removing the buffer from the list as | 492 | /* Use trylock because of the ranknig */ |
554 | * we can possibly see not yet submitted buffers on io_list | 493 | if (jbd_trylock_bh_state(jh2bh(jh))) |
555 | */ | 494 | ret += __try_to_free_cp_buf(jh); |
556 | ret += journal_clean_one_cp_list(transaction-> | 495 | /* |
557 | t_checkpoint_io_list, &released); | 496 | * This function only frees up some memory |
558 | if (need_resched()) | 497 | * if possible so we dont have an obligation |
559 | goto out; | 498 | * to finish processing. Bail out if preemption |
499 | * requested: | ||
500 | */ | ||
501 | if (need_resched()) | ||
502 | goto out; | ||
503 | } while (jh != last_jh); | ||
504 | } | ||
560 | } while (transaction != last_transaction); | 505 | } while (transaction != last_transaction); |
561 | out: | 506 | out: |
562 | return ret; | 507 | return ret; |
@@ -571,22 +516,18 @@ out: | |||
571 | * buffer updates committed in that transaction have safely been stored | 516 | * buffer updates committed in that transaction have safely been stored |
572 | * elsewhere on disk. To achieve this, all of the buffers in a | 517 | * elsewhere on disk. To achieve this, all of the buffers in a |
573 | * transaction need to be maintained on the transaction's checkpoint | 518 | * transaction need to be maintained on the transaction's checkpoint |
574 | * lists until they have been rewritten, at which point this function is | 519 | * list until they have been rewritten, at which point this function is |
575 | * called to remove the buffer from the existing transaction's | 520 | * called to remove the buffer from the existing transaction's |
576 | * checkpoint lists. | 521 | * checkpoint list. |
577 | * | ||
578 | * The function returns 1 if it frees the transaction, 0 otherwise. | ||
579 | * | 522 | * |
580 | * This function is called with the journal locked. | 523 | * This function is called with the journal locked. |
581 | * This function is called with j_list_lock held. | 524 | * This function is called with j_list_lock held. |
582 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | ||
583 | */ | 525 | */ |
584 | 526 | ||
585 | int __journal_remove_checkpoint(struct journal_head *jh) | 527 | void __journal_remove_checkpoint(struct journal_head *jh) |
586 | { | 528 | { |
587 | transaction_t *transaction; | 529 | transaction_t *transaction; |
588 | journal_t *journal; | 530 | journal_t *journal; |
589 | int ret = 0; | ||
590 | 531 | ||
591 | JBUFFER_TRACE(jh, "entry"); | 532 | JBUFFER_TRACE(jh, "entry"); |
592 | 533 | ||
@@ -597,10 +538,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
597 | journal = transaction->t_journal; | 538 | journal = transaction->t_journal; |
598 | 539 | ||
599 | __buffer_unlink(jh); | 540 | __buffer_unlink(jh); |
600 | jh->b_cp_transaction = NULL; | ||
601 | 541 | ||
602 | if (transaction->t_checkpoint_list != NULL || | 542 | if (transaction->t_checkpoint_list != NULL) |
603 | transaction->t_checkpoint_io_list != NULL) | ||
604 | goto out; | 543 | goto out; |
605 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | 544 | JBUFFER_TRACE(jh, "transaction has no more buffers"); |
606 | 545 | ||
@@ -626,10 +565,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
626 | /* Just in case anybody was waiting for more transactions to be | 565 | /* Just in case anybody was waiting for more transactions to be |
627 | checkpointed... */ | 566 | checkpointed... */ |
628 | wake_up(&journal->j_wait_logspace); | 567 | wake_up(&journal->j_wait_logspace); |
629 | ret = 1; | ||
630 | out: | 568 | out: |
631 | JBUFFER_TRACE(jh, "exit"); | 569 | JBUFFER_TRACE(jh, "exit"); |
632 | return ret; | ||
633 | } | 570 | } |
634 | 571 | ||
635 | /* | 572 | /* |
@@ -691,7 +628,6 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) | |||
691 | J_ASSERT(transaction->t_shadow_list == NULL); | 628 | J_ASSERT(transaction->t_shadow_list == NULL); |
692 | J_ASSERT(transaction->t_log_list == NULL); | 629 | J_ASSERT(transaction->t_log_list == NULL); |
693 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 630 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
694 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); | ||
695 | J_ASSERT(transaction->t_updates == 0); | 631 | J_ASSERT(transaction->t_updates == 0); |
696 | J_ASSERT(journal->j_committing_transaction != transaction); | 632 | J_ASSERT(journal->j_committing_transaction != transaction); |
697 | J_ASSERT(journal->j_running_transaction != transaction); | 633 | J_ASSERT(journal->j_running_transaction != transaction); |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 29e62d98bae6..002ad2bbc769 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -829,8 +829,7 @@ restart_loop: | |||
829 | journal->j_committing_transaction = NULL; | 829 | journal->j_committing_transaction = NULL; |
830 | spin_unlock(&journal->j_state_lock); | 830 | spin_unlock(&journal->j_state_lock); |
831 | 831 | ||
832 | if (commit_transaction->t_checkpoint_list == NULL && | 832 | if (commit_transaction->t_checkpoint_list == NULL) { |
833 | commit_transaction->t_checkpoint_io_list == NULL) { | ||
834 | __journal_drop_transaction(journal, commit_transaction); | 833 | __journal_drop_transaction(journal, commit_transaction); |
835 | } else { | 834 | } else { |
836 | if (journal->j_checkpoint_transactions == NULL) { | 835 | if (journal->j_checkpoint_transactions == NULL) { |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 3eaf6e701087..da6354baa0b8 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -111,9 +111,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout) | |||
111 | /* | 111 | /* |
112 | * The server lockd has called us back to tell us the lock was granted | 112 | * The server lockd has called us back to tell us the lock was granted |
113 | */ | 113 | */ |
114 | u32 | 114 | u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) |
115 | nlmclnt_grant(struct nlm_lock *lock) | ||
116 | { | 115 | { |
116 | const struct file_lock *fl = &lock->fl; | ||
117 | const struct nfs_fh *fh = &lock->fh; | ||
117 | struct nlm_wait *block; | 118 | struct nlm_wait *block; |
118 | u32 res = nlm_lck_denied; | 119 | u32 res = nlm_lck_denied; |
119 | 120 | ||
@@ -122,14 +123,20 @@ nlmclnt_grant(struct nlm_lock *lock) | |||
122 | * Warning: must not use cookie to match it! | 123 | * Warning: must not use cookie to match it! |
123 | */ | 124 | */ |
124 | list_for_each_entry(block, &nlm_blocked, b_list) { | 125 | list_for_each_entry(block, &nlm_blocked, b_list) { |
125 | if (nlm_compare_locks(block->b_lock, &lock->fl)) { | 126 | struct file_lock *fl_blocked = block->b_lock; |
126 | /* Alright, we found a lock. Set the return status | 127 | |
127 | * and wake up the caller | 128 | if (!nlm_compare_locks(fl_blocked, fl)) |
128 | */ | 129 | continue; |
129 | block->b_status = NLM_LCK_GRANTED; | 130 | if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) |
130 | wake_up(&block->b_wait); | 131 | continue; |
131 | res = nlm_granted; | 132 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_dentry->d_inode) ,fh) != 0) |
132 | } | 133 | continue; |
134 | /* Alright, we found a lock. Set the return status | ||
135 | * and wake up the caller | ||
136 | */ | ||
137 | block->b_status = NLM_LCK_GRANTED; | ||
138 | wake_up(&block->b_wait); | ||
139 | res = nlm_granted; | ||
133 | } | 140 | } |
134 | return res; | 141 | return res; |
135 | } | 142 | } |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4063095d849e..b10f913aa06a 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -228,7 +228,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
228 | resp->cookie = argp->cookie; | 228 | resp->cookie = argp->cookie; |
229 | 229 | ||
230 | dprintk("lockd: GRANTED called\n"); | 230 | dprintk("lockd: GRANTED called\n"); |
231 | resp->status = nlmclnt_grant(&argp->lock); | 231 | resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); |
232 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 232 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
233 | return rpc_success; | 233 | return rpc_success; |
234 | } | 234 | } |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 3bc437e0cf5b..35681d9cf1fc 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -256,7 +256,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
256 | resp->cookie = argp->cookie; | 256 | resp->cookie = argp->cookie; |
257 | 257 | ||
258 | dprintk("lockd: GRANTED called\n"); | 258 | dprintk("lockd: GRANTED called\n"); |
259 | resp->status = nlmclnt_grant(&argp->lock); | 259 | resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock); |
260 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 260 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
261 | return rpc_success; | 261 | return rpc_success; |
262 | } | 262 | } |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 42eb53b5293b..23ceaa7127b4 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -208,6 +208,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm, | |||
208 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 | 208 | #define DLM_LOCK_RES_IN_PROGRESS 0x00000010 |
209 | #define DLM_LOCK_RES_MIGRATING 0x00000020 | 209 | #define DLM_LOCK_RES_MIGRATING 0x00000020 |
210 | 210 | ||
211 | /* max milliseconds to wait to sync up a network failure with a node death */ | ||
212 | #define DLM_NODE_DEATH_WAIT_MAX (5 * 1000) | ||
213 | |||
211 | #define DLM_PURGE_INTERVAL_MS (8 * 1000) | 214 | #define DLM_PURGE_INTERVAL_MS (8 * 1000) |
212 | 215 | ||
213 | struct dlm_lock_resource | 216 | struct dlm_lock_resource |
@@ -658,6 +661,7 @@ int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | |||
658 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | 661 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); |
659 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); | 662 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); |
660 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); | 663 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); |
664 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); | ||
661 | 665 | ||
662 | void dlm_put(struct dlm_ctxt *dlm); | 666 | void dlm_put(struct dlm_ctxt *dlm); |
663 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); | 667 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); |
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 6001b22a997d..f66e2d818ccd 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c | |||
@@ -392,6 +392,11 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, | |||
392 | } else { | 392 | } else { |
393 | mlog_errno(tmpret); | 393 | mlog_errno(tmpret); |
394 | if (dlm_is_host_down(tmpret)) { | 394 | if (dlm_is_host_down(tmpret)) { |
395 | /* instead of logging the same network error over | ||
396 | * and over, sleep here and wait for the heartbeat | ||
397 | * to notice the node is dead. times out after 5s. */ | ||
398 | dlm_wait_for_node_death(dlm, res->owner, | ||
399 | DLM_NODE_DEATH_WAIT_MAX); | ||
395 | ret = DLM_RECOVERING; | 400 | ret = DLM_RECOVERING; |
396 | mlog(0, "node %u died so returning DLM_RECOVERING " | 401 | mlog(0, "node %u died so returning DLM_RECOVERING " |
397 | "from convert message!\n", res->owner); | 402 | "from convert message!\n", res->owner); |
@@ -421,7 +426,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
421 | struct dlm_lockstatus *lksb; | 426 | struct dlm_lockstatus *lksb; |
422 | enum dlm_status status = DLM_NORMAL; | 427 | enum dlm_status status = DLM_NORMAL; |
423 | u32 flags; | 428 | u32 flags; |
424 | int call_ast = 0, kick_thread = 0; | 429 | int call_ast = 0, kick_thread = 0, ast_reserved = 0; |
425 | 430 | ||
426 | if (!dlm_grab(dlm)) { | 431 | if (!dlm_grab(dlm)) { |
427 | dlm_error(DLM_REJECTED); | 432 | dlm_error(DLM_REJECTED); |
@@ -490,6 +495,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
490 | status = __dlm_lockres_state_to_status(res); | 495 | status = __dlm_lockres_state_to_status(res); |
491 | if (status == DLM_NORMAL) { | 496 | if (status == DLM_NORMAL) { |
492 | __dlm_lockres_reserve_ast(res); | 497 | __dlm_lockres_reserve_ast(res); |
498 | ast_reserved = 1; | ||
493 | res->state |= DLM_LOCK_RES_IN_PROGRESS; | 499 | res->state |= DLM_LOCK_RES_IN_PROGRESS; |
494 | status = __dlmconvert_master(dlm, res, lock, flags, | 500 | status = __dlmconvert_master(dlm, res, lock, flags, |
495 | cnv->requested_type, | 501 | cnv->requested_type, |
@@ -512,10 +518,10 @@ leave: | |||
512 | else | 518 | else |
513 | dlm_lock_put(lock); | 519 | dlm_lock_put(lock); |
514 | 520 | ||
515 | /* either queue the ast or release it */ | 521 | /* either queue the ast or release it, if reserved */ |
516 | if (call_ast) | 522 | if (call_ast) |
517 | dlm_queue_ast(dlm, lock); | 523 | dlm_queue_ast(dlm, lock); |
518 | else | 524 | else if (ast_reserved) |
519 | dlm_lockres_release_ast(dlm, res); | 525 | dlm_lockres_release_ast(dlm, res); |
520 | 526 | ||
521 | if (kick_thread) | 527 | if (kick_thread) |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index d1a0038557a3..671d4ff222cc 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -220,6 +220,17 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, | |||
220 | dlm_error(status); | 220 | dlm_error(status); |
221 | dlm_revert_pending_lock(res, lock); | 221 | dlm_revert_pending_lock(res, lock); |
222 | dlm_lock_put(lock); | 222 | dlm_lock_put(lock); |
223 | } else if (dlm_is_recovery_lock(res->lockname.name, | ||
224 | res->lockname.len)) { | ||
225 | /* special case for the $RECOVERY lock. | ||
226 | * there will never be an AST delivered to put | ||
227 | * this lock on the proper secondary queue | ||
228 | * (granted), so do it manually. */ | ||
229 | mlog(0, "%s: $RECOVERY lock for this node (%u) is " | ||
230 | "mastered by %u; got lock, manually granting (no ast)\n", | ||
231 | dlm->name, dlm->node_num, res->owner); | ||
232 | list_del_init(&lock->list); | ||
233 | list_add_tail(&lock->list, &res->granted); | ||
223 | } | 234 | } |
224 | spin_unlock(&res->spinlock); | 235 | spin_unlock(&res->spinlock); |
225 | 236 | ||
@@ -646,7 +657,19 @@ retry_lock: | |||
646 | mlog(0, "retrying lock with migration/" | 657 | mlog(0, "retrying lock with migration/" |
647 | "recovery/in progress\n"); | 658 | "recovery/in progress\n"); |
648 | msleep(100); | 659 | msleep(100); |
649 | dlm_wait_for_recovery(dlm); | 660 | /* no waiting for dlm_reco_thread */ |
661 | if (recovery) { | ||
662 | if (status == DLM_RECOVERING) { | ||
663 | mlog(0, "%s: got RECOVERING " | ||
664 | "for $REOCVERY lock, master " | ||
665 | "was %u\n", dlm->name, | ||
666 | res->owner); | ||
667 | dlm_wait_for_node_death(dlm, res->owner, | ||
668 | DLM_NODE_DEATH_WAIT_MAX); | ||
669 | } | ||
670 | } else { | ||
671 | dlm_wait_for_recovery(dlm); | ||
672 | } | ||
650 | goto retry_lock; | 673 | goto retry_lock; |
651 | } | 674 | } |
652 | 675 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a3194fe173d9..2e2e95e69499 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2482,7 +2482,9 @@ top: | |||
2482 | atomic_set(&mle->woken, 1); | 2482 | atomic_set(&mle->woken, 1); |
2483 | spin_unlock(&mle->spinlock); | 2483 | spin_unlock(&mle->spinlock); |
2484 | wake_up(&mle->wq); | 2484 | wake_up(&mle->wq); |
2485 | /* final put will take care of list removal */ | 2485 | /* do not need events any longer, so detach |
2486 | * from heartbeat */ | ||
2487 | __dlm_mle_detach_hb_events(dlm, mle); | ||
2486 | __dlm_put_mle(mle); | 2488 | __dlm_put_mle(mle); |
2487 | } | 2489 | } |
2488 | continue; | 2490 | continue; |
@@ -2537,6 +2539,9 @@ top: | |||
2537 | spin_unlock(&res->spinlock); | 2539 | spin_unlock(&res->spinlock); |
2538 | dlm_lockres_put(res); | 2540 | dlm_lockres_put(res); |
2539 | 2541 | ||
2542 | /* about to get rid of mle, detach from heartbeat */ | ||
2543 | __dlm_mle_detach_hb_events(dlm, mle); | ||
2544 | |||
2540 | /* dump the mle */ | 2545 | /* dump the mle */ |
2541 | spin_lock(&dlm->master_lock); | 2546 | spin_lock(&dlm->master_lock); |
2542 | __dlm_put_mle(mle); | 2547 | __dlm_put_mle(mle); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 186e9a76aa58..ed76bda1a534 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -278,6 +278,24 @@ int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node) | |||
278 | return dead; | 278 | return dead; |
279 | } | 279 | } |
280 | 280 | ||
281 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) | ||
282 | { | ||
283 | if (timeout) { | ||
284 | mlog(ML_NOTICE, "%s: waiting %dms for notification of " | ||
285 | "death of node %u\n", dlm->name, timeout, node); | ||
286 | wait_event_timeout(dlm->dlm_reco_thread_wq, | ||
287 | dlm_is_node_dead(dlm, node), | ||
288 | msecs_to_jiffies(timeout)); | ||
289 | } else { | ||
290 | mlog(ML_NOTICE, "%s: waiting indefinitely for notification " | ||
291 | "of death of node %u\n", dlm->name, node); | ||
292 | wait_event(dlm->dlm_reco_thread_wq, | ||
293 | dlm_is_node_dead(dlm, node)); | ||
294 | } | ||
295 | /* for now, return 0 */ | ||
296 | return 0; | ||
297 | } | ||
298 | |||
281 | /* callers of the top-level api calls (dlmlock/dlmunlock) should | 299 | /* callers of the top-level api calls (dlmlock/dlmunlock) should |
282 | * block on the dlm->reco.event when recovery is in progress. | 300 | * block on the dlm->reco.event when recovery is in progress. |
283 | * the dlm recovery thread will set this state when it begins | 301 | * the dlm recovery thread will set this state when it begins |
@@ -2032,6 +2050,30 @@ again: | |||
2032 | dlm->reco.new_master); | 2050 | dlm->reco.new_master); |
2033 | status = -EEXIST; | 2051 | status = -EEXIST; |
2034 | } else { | 2052 | } else { |
2053 | status = 0; | ||
2054 | |||
2055 | /* see if recovery was already finished elsewhere */ | ||
2056 | spin_lock(&dlm->spinlock); | ||
2057 | if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { | ||
2058 | status = -EINVAL; | ||
2059 | mlog(0, "%s: got reco EX lock, but " | ||
2060 | "node got recovered already\n", dlm->name); | ||
2061 | if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { | ||
2062 | mlog(ML_ERROR, "%s: new master is %u " | ||
2063 | "but no dead node!\n", | ||
2064 | dlm->name, dlm->reco.new_master); | ||
2065 | BUG(); | ||
2066 | } | ||
2067 | } | ||
2068 | spin_unlock(&dlm->spinlock); | ||
2069 | } | ||
2070 | |||
2071 | /* if this node has actually become the recovery master, | ||
2072 | * set the master and send the messages to begin recovery */ | ||
2073 | if (!status) { | ||
2074 | mlog(0, "%s: dead=%u, this=%u, sending " | ||
2075 | "begin_reco now\n", dlm->name, | ||
2076 | dlm->reco.dead_node, dlm->node_num); | ||
2035 | status = dlm_send_begin_reco_message(dlm, | 2077 | status = dlm_send_begin_reco_message(dlm, |
2036 | dlm->reco.dead_node); | 2078 | dlm->reco.dead_node); |
2037 | /* this always succeeds */ | 2079 | /* this always succeeds */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index fa0bcac5ceae..d329c9df90ae 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1584,10 +1584,9 @@ static int ocfs2_commit_thread(void *arg) | |||
1584 | while (!(kthread_should_stop() && | 1584 | while (!(kthread_should_stop() && |
1585 | atomic_read(&journal->j_num_trans) == 0)) { | 1585 | atomic_read(&journal->j_num_trans) == 0)) { |
1586 | 1586 | ||
1587 | wait_event_interruptible_timeout(osb->checkpoint_event, | 1587 | wait_event_interruptible(osb->checkpoint_event, |
1588 | atomic_read(&journal->j_num_trans) | 1588 | atomic_read(&journal->j_num_trans) |
1589 | || kthread_should_stop(), | 1589 | || kthread_should_stop()); |
1590 | OCFS2_CHECKPOINT_INTERVAL); | ||
1591 | 1590 | ||
1592 | status = ocfs2_commit_cache(osb); | 1591 | status = ocfs2_commit_cache(osb); |
1593 | if (status < 0) | 1592 | if (status < 0) |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 7d0a816184fa..2f3a6acdac45 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -29,8 +29,6 @@ | |||
29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
30 | #include <linux/jbd.h> | 30 | #include <linux/jbd.h> |
31 | 31 | ||
32 | #define OCFS2_CHECKPOINT_INTERVAL (8 * HZ) | ||
33 | |||
34 | enum ocfs2_journal_state { | 32 | enum ocfs2_journal_state { |
35 | OCFS2_JOURNAL_FREE = 0, | 33 | OCFS2_JOURNAL_FREE = 0, |
36 | OCFS2_JOURNAL_LOADED, | 34 | OCFS2_JOURNAL_LOADED, |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6573f31f1fd9..075d3e945602 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -204,10 +204,6 @@ int proc_fill_super(struct super_block *s, void *data, int silent) | |||
204 | root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); | 204 | root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); |
205 | if (!root_inode) | 205 | if (!root_inode) |
206 | goto out_no_root; | 206 | goto out_no_root; |
207 | /* | ||
208 | * Fixup the root inode's nlink value | ||
209 | */ | ||
210 | root_inode->i_nlink += nr_processes(); | ||
211 | root_inode->i_uid = 0; | 207 | root_inode->i_uid = 0; |
212 | root_inode->i_gid = 0; | 208 | root_inode->i_gid = 0; |
213 | s->s_root = d_alloc_root(root_inode); | 209 | s->s_root = d_alloc_root(root_inode); |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 68896283c8ae..c3fd3611112f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -80,16 +80,16 @@ void __init proc_root_init(void) | |||
80 | proc_bus = proc_mkdir("bus", NULL); | 80 | proc_bus = proc_mkdir("bus", NULL); |
81 | } | 81 | } |
82 | 82 | ||
83 | static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | 83 | static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat |
84 | ) | ||
84 | { | 85 | { |
85 | /* | 86 | generic_fillattr(dentry->d_inode, stat); |
86 | * nr_threads is actually protected by the tasklist_lock; | 87 | stat->nlink = proc_root.nlink + nr_processes(); |
87 | * however, it's conventional to do reads, especially for | 88 | return 0; |
88 | * reporting, without any locking whatsoever. | 89 | } |
89 | */ | ||
90 | if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ | ||
91 | dir->i_nlink = proc_root.nlink + nr_threads; | ||
92 | 90 | ||
91 | static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) | ||
92 | { | ||
93 | if (!proc_lookup(dir, dentry, nd)) { | 93 | if (!proc_lookup(dir, dentry, nd)) { |
94 | return NULL; | 94 | return NULL; |
95 | } | 95 | } |
@@ -134,6 +134,7 @@ static struct file_operations proc_root_operations = { | |||
134 | */ | 134 | */ |
135 | static struct inode_operations proc_root_inode_operations = { | 135 | static struct inode_operations proc_root_inode_operations = { |
136 | .lookup = proc_root_lookup, | 136 | .lookup = proc_root_lookup, |
137 | .getattr = proc_root_getattr, | ||
137 | }; | 138 | }; |
138 | 139 | ||
139 | /* | 140 | /* |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index ef5e5414e7a8..d63da756eb49 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -1124,8 +1124,6 @@ static void handle_attrs(struct super_block *s) | |||
1124 | "reiserfs: cannot support attributes until flag is set in super-block"); | 1124 | "reiserfs: cannot support attributes until flag is set in super-block"); |
1125 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); | 1125 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS); |
1126 | } | 1126 | } |
1127 | } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) { | ||
1128 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ATTRS); | ||
1129 | } | 1127 | } |
1130 | } | 1128 | } |
1131 | 1129 | ||
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 43de3ba83332..ab8894c3b9e5 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -228,7 +228,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) | |||
228 | acl = ERR_PTR(retval); | 228 | acl = ERR_PTR(retval); |
229 | } else { | 229 | } else { |
230 | acl = posix_acl_from_disk(value, retval); | 230 | acl = posix_acl_from_disk(value, retval); |
231 | *p_acl = posix_acl_dup(acl); | 231 | if (!IS_ERR(acl)) |
232 | *p_acl = posix_acl_dup(acl); | ||
232 | } | 233 | } |
233 | 234 | ||
234 | kfree(value); | 235 | kfree(value); |
diff --git a/fs/select.c b/fs/select.c index bc60a3e14ef3..1815a57d2255 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -398,11 +398,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, | |||
398 | ret = core_sys_select(n, inp, outp, exp, &timeout); | 398 | ret = core_sys_select(n, inp, outp, exp, &timeout); |
399 | 399 | ||
400 | if (tvp) { | 400 | if (tvp) { |
401 | struct timeval rtv; | ||
402 | |||
401 | if (current->personality & STICKY_TIMEOUTS) | 403 | if (current->personality & STICKY_TIMEOUTS) |
402 | goto sticky; | 404 | goto sticky; |
403 | tv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); | 405 | rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); |
404 | tv.tv_sec = timeout; | 406 | rtv.tv_sec = timeout; |
405 | if (copy_to_user(tvp, &tv, sizeof(tv))) { | 407 | if (timeval_compare(&rtv, &tv) >= 0) |
408 | rtv = tv; | ||
409 | if (copy_to_user(tvp, &rtv, sizeof(rtv))) { | ||
406 | sticky: | 410 | sticky: |
407 | /* | 411 | /* |
408 | * If an application puts its timeval in read-only | 412 | * If an application puts its timeval in read-only |
@@ -460,11 +464,16 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, | |||
460 | ret = core_sys_select(n, inp, outp, exp, &timeout); | 464 | ret = core_sys_select(n, inp, outp, exp, &timeout); |
461 | 465 | ||
462 | if (tsp) { | 466 | if (tsp) { |
467 | struct timespec rts; | ||
468 | |||
463 | if (current->personality & STICKY_TIMEOUTS) | 469 | if (current->personality & STICKY_TIMEOUTS) |
464 | goto sticky; | 470 | goto sticky; |
465 | ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; | 471 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * |
466 | ts.tv_sec = timeout; | 472 | 1000; |
467 | if (copy_to_user(tsp, &ts, sizeof(ts))) { | 473 | rts.tv_sec = timeout; |
474 | if (timespec_compare(&rts, &ts) >= 0) | ||
475 | rts = ts; | ||
476 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
468 | sticky: | 477 | sticky: |
469 | /* | 478 | /* |
470 | * If an application puts its timeval in read-only | 479 | * If an application puts its timeval in read-only |
@@ -758,12 +767,17 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, | |||
758 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 767 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
759 | 768 | ||
760 | if (tsp && timeout >= 0) { | 769 | if (tsp && timeout >= 0) { |
770 | struct timespec rts; | ||
771 | |||
761 | if (current->personality & STICKY_TIMEOUTS) | 772 | if (current->personality & STICKY_TIMEOUTS) |
762 | goto sticky; | 773 | goto sticky; |
763 | /* Yes, we know it's actually an s64, but it's also positive. */ | 774 | /* Yes, we know it's actually an s64, but it's also positive. */ |
764 | ts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * 1000; | 775 | rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * |
765 | ts.tv_sec = timeout; | 776 | 1000; |
766 | if (copy_to_user(tsp, &ts, sizeof(ts))) { | 777 | rts.tv_sec = timeout; |
778 | if (timespec_compare(&rts, &ts) >= 0) | ||
779 | rts = ts; | ||
780 | if (copy_to_user(tsp, &rts, sizeof(rts))) { | ||
767 | sticky: | 781 | sticky: |
768 | /* | 782 | /* |
769 | * If an application puts its timeval in read-only | 783 | * If an application puts its timeval in read-only |
@@ -261,6 +261,7 @@ asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf) | |||
261 | return error; | 261 | return error; |
262 | } | 262 | } |
263 | 263 | ||
264 | #ifndef __ARCH_WANT_STAT64 | ||
264 | asmlinkage long sys_newfstatat(int dfd, char __user *filename, | 265 | asmlinkage long sys_newfstatat(int dfd, char __user *filename, |
265 | struct stat __user *statbuf, int flag) | 266 | struct stat __user *statbuf, int flag) |
266 | { | 267 | { |
@@ -281,6 +282,7 @@ asmlinkage long sys_newfstatat(int dfd, char __user *filename, | |||
281 | out: | 282 | out: |
282 | return error; | 283 | return error; |
283 | } | 284 | } |
285 | #endif | ||
284 | 286 | ||
285 | asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) | 287 | asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) |
286 | { | 288 | { |
@@ -395,6 +397,26 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf) | |||
395 | return error; | 397 | return error; |
396 | } | 398 | } |
397 | 399 | ||
400 | asmlinkage long sys_fstatat64(int dfd, char __user *filename, | ||
401 | struct stat64 __user *statbuf, int flag) | ||
402 | { | ||
403 | struct kstat stat; | ||
404 | int error = -EINVAL; | ||
405 | |||
406 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
407 | goto out; | ||
408 | |||
409 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
410 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
411 | else | ||
412 | error = vfs_stat_fd(dfd, filename, &stat); | ||
413 | |||
414 | if (!error) | ||
415 | error = cp_new_stat64(&stat, statbuf); | ||
416 | |||
417 | out: | ||
418 | return error; | ||
419 | } | ||
398 | #endif /* __ARCH_WANT_STAT64 */ | 420 | #endif /* __ARCH_WANT_STAT64 */ |
399 | 421 | ||
400 | void inode_add_bytes(struct inode *inode, loff_t bytes) | 422 | void inode_add_bytes(struct inode *inode, loff_t bytes) |
diff --git a/fs/super.c b/fs/super.c index 30294218fa63..e20b5580afd5 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -666,6 +666,16 @@ static int test_bdev_super(struct super_block *s, void *data) | |||
666 | return (void *)s->s_bdev == data; | 666 | return (void *)s->s_bdev == data; |
667 | } | 667 | } |
668 | 668 | ||
669 | static void bdev_uevent(struct block_device *bdev, enum kobject_action action) | ||
670 | { | ||
671 | if (bdev->bd_disk) { | ||
672 | if (bdev->bd_part) | ||
673 | kobject_uevent(&bdev->bd_part->kobj, action); | ||
674 | else | ||
675 | kobject_uevent(&bdev->bd_disk->kobj, action); | ||
676 | } | ||
677 | } | ||
678 | |||
669 | struct super_block *get_sb_bdev(struct file_system_type *fs_type, | 679 | struct super_block *get_sb_bdev(struct file_system_type *fs_type, |
670 | int flags, const char *dev_name, void *data, | 680 | int flags, const char *dev_name, void *data, |
671 | int (*fill_super)(struct super_block *, void *, int)) | 681 | int (*fill_super)(struct super_block *, void *, int)) |
@@ -707,8 +717,10 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, | |||
707 | up_write(&s->s_umount); | 717 | up_write(&s->s_umount); |
708 | deactivate_super(s); | 718 | deactivate_super(s); |
709 | s = ERR_PTR(error); | 719 | s = ERR_PTR(error); |
710 | } else | 720 | } else { |
711 | s->s_flags |= MS_ACTIVE; | 721 | s->s_flags |= MS_ACTIVE; |
722 | bdev_uevent(bdev, KOBJ_MOUNT); | ||
723 | } | ||
712 | } | 724 | } |
713 | 725 | ||
714 | return s; | 726 | return s; |
@@ -724,6 +736,7 @@ void kill_block_super(struct super_block *sb) | |||
724 | { | 736 | { |
725 | struct block_device *bdev = sb->s_bdev; | 737 | struct block_device *bdev = sb->s_bdev; |
726 | 738 | ||
739 | bdev_uevent(bdev, KOBJ_UMOUNT); | ||
727 | generic_shutdown_super(sb); | 740 | generic_shutdown_super(sb); |
728 | sync_blockdev(bdev); | 741 | sync_blockdev(bdev); |
729 | close_bdev_excl(bdev); | 742 | close_bdev_excl(bdev); |