aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/aops.c9
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/dcache.c33
-rw-r--r--fs/ocfs2/dcache.h1
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c8
-rw-r--r--fs/ocfs2/dlmglue.c8
-rw-r--r--fs/ocfs2/file.c73
-rw-r--r--fs/ocfs2/inode.c1
-rw-r--r--fs/ocfs2/inode.h12
-rw-r--r--fs/ocfs2/ioctl.c356
-rw-r--r--fs/ocfs2/journal.c9
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/mmap.c7
-rw-r--r--fs/ocfs2/namei.c3
-rw-r--r--fs/ocfs2/ocfs2.h32
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h95
-rw-r--r--fs/ocfs2/refcounttree.c43
-rw-r--r--fs/ocfs2/refcounttree.h7
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/ocfs2/suballoc.c16
-rw-r--r--fs/ocfs2/super.c104
-rw-r--r--fs/ocfs2/sysfile.c60
23 files changed, 782 insertions, 108 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0de69c9a08be..5cfeee118158 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt {
883 * out in so that future reads from that region will get 883 * out in so that future reads from that region will get
884 * zero's. 884 * zero's.
885 */ 885 */
886 struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
887 unsigned int w_num_pages; 886 unsigned int w_num_pages;
887 struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
888 struct page *w_target_page; 888 struct page *w_target_page;
889 889
890 /* 890 /*
@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1642 return ret; 1642 return ret;
1643} 1643}
1644 1644
1645int ocfs2_write_begin_nolock(struct address_space *mapping, 1645int ocfs2_write_begin_nolock(struct file *filp,
1646 struct address_space *mapping,
1646 loff_t pos, unsigned len, unsigned flags, 1647 loff_t pos, unsigned len, unsigned flags,
1647 struct page **pagep, void **fsdata, 1648 struct page **pagep, void **fsdata,
1648 struct buffer_head *di_bh, struct page *mmap_page) 1649 struct buffer_head *di_bh, struct page *mmap_page)
@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1692 mlog_errno(ret); 1693 mlog_errno(ret);
1693 goto out; 1694 goto out;
1694 } else if (ret == 1) { 1695 } else if (ret == 1) {
1695 ret = ocfs2_refcount_cow(inode, di_bh, 1696 ret = ocfs2_refcount_cow(inode, filp, di_bh,
1696 wc->w_cpos, wc->w_clen, UINT_MAX); 1697 wc->w_cpos, wc->w_clen, UINT_MAX);
1697 if (ret) { 1698 if (ret) {
1698 mlog_errno(ret); 1699 mlog_errno(ret);
@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
1854 */ 1855 */
1855 down_write(&OCFS2_I(inode)->ip_alloc_sem); 1856 down_write(&OCFS2_I(inode)->ip_alloc_sem);
1856 1857
1857 ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, 1858 ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
1858 fsdata, di_bh, NULL); 1859 fsdata, di_bh, NULL);
1859 if (ret) { 1860 if (ret) {
1860 mlog_errno(ret); 1861 mlog_errno(ret);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index c48e93ffc513..7606f663da6d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
48 loff_t pos, unsigned len, unsigned copied, 48 loff_t pos, unsigned len, unsigned copied,
49 struct page *page, void *fsdata); 49 struct page *page, void *fsdata);
50 50
51int ocfs2_write_begin_nolock(struct address_space *mapping, 51int ocfs2_write_begin_nolock(struct file *filp,
52 struct address_space *mapping,
52 loff_t pos, unsigned len, unsigned flags, 53 loff_t pos, unsigned len, unsigned flags,
53 struct page **pagep, void **fsdata, 54 struct page **pagep, void **fsdata,
54 struct buffer_head *di_bh, struct page *mmap_page); 55 struct buffer_head *di_bh, struct page *mmap_page);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b4957c7d9fe2..edaded48e7e9 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -40,6 +40,14 @@
40#include "inode.h" 40#include "inode.h"
41#include "super.h" 41#include "super.h"
42 42
43void ocfs2_dentry_attach_gen(struct dentry *dentry)
44{
45 unsigned long gen =
46 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
47 BUG_ON(dentry->d_inode);
48 dentry->d_fsdata = (void *)gen;
49}
50
43 51
44static int ocfs2_dentry_revalidate(struct dentry *dentry, 52static int ocfs2_dentry_revalidate(struct dentry *dentry,
45 struct nameidata *nd) 53 struct nameidata *nd)
@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
51 mlog_entry("(0x%p, '%.*s')\n", dentry, 59 mlog_entry("(0x%p, '%.*s')\n", dentry,
52 dentry->d_name.len, dentry->d_name.name); 60 dentry->d_name.len, dentry->d_name.name);
53 61
54 /* Never trust a negative dentry - force a new lookup. */ 62 /* For a negative dentry -
63 * check the generation number of the parent and compare with the
64 * one stored in the inode.
65 */
55 if (inode == NULL) { 66 if (inode == NULL) {
56 mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, 67 unsigned long gen = (unsigned long) dentry->d_fsdata;
57 dentry->d_name.name); 68 unsigned long pgen =
58 goto bail; 69 OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
70 mlog(0, "negative dentry: %.*s parent gen: %lu "
71 "dentry gen: %lu\n",
72 dentry->d_name.len, dentry->d_name.name, pgen, gen);
73 if (gen != pgen)
74 goto bail;
75 goto valid;
59 } 76 }
60 77
61 BUG_ON(!osb); 78 BUG_ON(!osb);
@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
96 goto bail; 113 goto bail;
97 } 114 }
98 115
116valid:
99 ret = 1; 117 ret = 1;
100 118
101bail: 119bail:
@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
227 if (!inode) 245 if (!inode)
228 return 0; 246 return 0;
229 247
248 if (!dentry->d_inode && dentry->d_fsdata) {
249 /* Converting a negative dentry to positive
250 Clear dentry->d_fsdata */
251 dentry->d_fsdata = dl = NULL;
252 }
253
230 if (dl) { 254 if (dl) {
231 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, 255 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
232 " \"%.*s\": old parent: %llu, new: %llu\n", 256 " \"%.*s\": old parent: %llu, new: %llu\n",
@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
452 476
453out: 477out:
454 iput(inode); 478 iput(inode);
479 ocfs2_dentry_attach_gen(dentry);
455} 480}
456 481
457/* 482/*
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index f5dd1789acf1..b79eff709958 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
64 struct inode *old_dir, struct inode *new_dir); 64 struct inode *old_dir, struct inode *new_dir);
65 65
66extern spinlock_t dentry_attach_lock; 66extern spinlock_t dentry_attach_lock;
67void ocfs2_dentry_attach_gen(struct dentry *dentry);
67 68
68#endif /* OCFS2_DCACHE_H */ 69#endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index f693ab812f3e..272ec8631a51 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
493 struct hlist_head *bucket; 493 struct hlist_head *bucket;
494 struct hlist_node *list; 494 struct hlist_node *list;
495 int i, out = 0; 495 int i, out = 0;
496 unsigned long total = 0, longest = 0, bktcnt; 496 unsigned long total = 0, longest = 0, bucket_count = 0;
497 497
498 out += snprintf(db->buf + out, db->len - out, 498 out += snprintf(db->buf + out, db->len - out,
499 "Dumping MLEs for Domain: %s\n", dlm->name); 499 "Dumping MLEs for Domain: %s\n", dlm->name);
@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db)
505 mle = hlist_entry(list, struct dlm_master_list_entry, 505 mle = hlist_entry(list, struct dlm_master_list_entry,
506 master_hash_node); 506 master_hash_node);
507 ++total; 507 ++total;
508 ++bktcnt; 508 ++bucket_count;
509 if (db->len - out < 200) 509 if (db->len - out < 200)
510 continue; 510 continue;
511 out += dump_mle(mle, db->buf + out, db->len - out); 511 out += dump_mle(mle, db->buf + out, db->len - out);
512 } 512 }
513 longest = max(longest, bktcnt); 513 longest = max(longest, bucket_count);
514 bktcnt = 0; 514 bucket_count = 0;
515 } 515 }
516 spin_unlock(&dlm->master_lock); 516 spin_unlock(&dlm->master_lock);
517 517
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 5e02a893f46e..e8d94d722ecb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
3635{ 3635{
3636 struct inode *inode; 3636 struct inode *inode;
3637 struct address_space *mapping; 3637 struct address_space *mapping;
3638 struct ocfs2_inode_info *oi;
3638 3639
3639 inode = ocfs2_lock_res_inode(lockres); 3640 inode = ocfs2_lock_res_inode(lockres);
3640 mapping = inode->i_mapping; 3641 mapping = inode->i_mapping;
3641 3642
3643 if (S_ISDIR(inode->i_mode)) {
3644 oi = OCFS2_I(inode);
3645 oi->ip_dir_lock_gen++;
3646 mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
3647 goto out;
3648 }
3649
3642 if (!S_ISREG(inode->i_mode)) 3650 if (!S_ISREG(inode->i_mode))
3643 goto out; 3651 goto out;
3644 3652
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9a03c151b5ce..9e8cc4346b76 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -64,12 +64,6 @@
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
67static int ocfs2_sync_inode(struct inode *inode)
68{
69 filemap_fdatawrite(inode->i_mapping);
70 return sync_mapping_buffers(inode->i_mapping);
71}
72
73static int ocfs2_init_file_private(struct inode *inode, struct file *file) 67static int ocfs2_init_file_private(struct inode *inode, struct file *file)
74{ 68{
75 struct ocfs2_file_private *fp; 69 struct ocfs2_file_private *fp;
@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync)
180{ 174{
181 int err = 0; 175 int err = 0;
182 journal_t *journal; 176 journal_t *journal;
183 struct dentry *dentry = file->f_path.dentry;
184 struct inode *inode = file->f_mapping->host; 177 struct inode *inode = file->f_mapping->host;
185 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 178 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
186 179
187 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, 180 mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync,
188 dentry->d_name.len, dentry->d_name.name); 181 file->f_path.dentry, file->f_path.dentry->d_name.len,
189 182 file->f_path.dentry->d_name.name);
190 err = ocfs2_sync_inode(dentry->d_inode);
191 if (err)
192 goto bail;
193 183
194 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 184 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
195 /* 185 /*
@@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 360 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
371 goto out; 361 goto out;
372 362
373 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); 363 return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
374 364
375out: 365out:
376 return status; 366 return status;
@@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
913 zero_clusters = last_cpos - zero_cpos; 903 zero_clusters = last_cpos - zero_cpos;
914 904
915 if (needs_cow) { 905 if (needs_cow) {
916 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, 906 rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
917 UINT_MAX); 907 zero_clusters, UINT_MAX);
918 if (rc) { 908 if (rc) {
919 mlog_errno(rc); 909 mlog_errno(rc);
920 goto out; 910 goto out;
@@ -2062,6 +2052,7 @@ out:
2062} 2052}
2063 2053
2064static int ocfs2_prepare_inode_for_refcount(struct inode *inode, 2054static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
2055 struct file *file,
2065 loff_t pos, size_t count, 2056 loff_t pos, size_t count,
2066 int *meta_level) 2057 int *meta_level)
2067{ 2058{
@@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
2079 2070
2080 *meta_level = 1; 2071 *meta_level = 1;
2081 2072
2082 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); 2073 ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
2083 if (ret) 2074 if (ret)
2084 mlog_errno(ret); 2075 mlog_errno(ret);
2085out: 2076out:
@@ -2087,7 +2078,7 @@ out:
2087 return ret; 2078 return ret;
2088} 2079}
2089 2080
2090static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 2081static int ocfs2_prepare_inode_for_write(struct file *file,
2091 loff_t *ppos, 2082 loff_t *ppos,
2092 size_t count, 2083 size_t count,
2093 int appending, 2084 int appending,
@@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
2095 int *has_refcount) 2086 int *has_refcount)
2096{ 2087{
2097 int ret = 0, meta_level = 0; 2088 int ret = 0, meta_level = 0;
2089 struct dentry *dentry = file->f_path.dentry;
2098 struct inode *inode = dentry->d_inode; 2090 struct inode *inode = dentry->d_inode;
2099 loff_t saved_pos, end; 2091 loff_t saved_pos, end;
2100 2092
@@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
2150 meta_level = -1; 2142 meta_level = -1;
2151 2143
2152 ret = ocfs2_prepare_inode_for_refcount(inode, 2144 ret = ocfs2_prepare_inode_for_refcount(inode,
2145 file,
2153 saved_pos, 2146 saved_pos,
2154 count, 2147 count,
2155 &meta_level); 2148 &meta_level);
@@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2232 struct file *file = iocb->ki_filp; 2225 struct file *file = iocb->ki_filp;
2233 struct inode *inode = file->f_path.dentry->d_inode; 2226 struct inode *inode = file->f_path.dentry->d_inode;
2234 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2227 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2228 int full_coherency = !(osb->s_mount_opt &
2229 OCFS2_MOUNT_COHERENCY_BUFFERED);
2235 2230
2236 mlog_entry("(0x%p, %u, '%.*s')\n", file, 2231 mlog_entry("(0x%p, %u, '%.*s')\n", file,
2237 (unsigned int)nr_segs, 2232 (unsigned int)nr_segs,
@@ -2255,16 +2250,39 @@ relock:
2255 have_alloc_sem = 1; 2250 have_alloc_sem = 1;
2256 } 2251 }
2257 2252
2258 /* concurrent O_DIRECT writes are allowed */ 2253 /*
2259 rw_level = !direct_io; 2254 * Concurrent O_DIRECT writes are allowed with
2255 * mount_option "coherency=buffered".
2256 */
2257 rw_level = (!direct_io || full_coherency);
2258
2260 ret = ocfs2_rw_lock(inode, rw_level); 2259 ret = ocfs2_rw_lock(inode, rw_level);
2261 if (ret < 0) { 2260 if (ret < 0) {
2262 mlog_errno(ret); 2261 mlog_errno(ret);
2263 goto out_sems; 2262 goto out_sems;
2264 } 2263 }
2265 2264
2265 /*
2266 * O_DIRECT writes with "coherency=full" need to take EX cluster
2267 * inode_lock to guarantee coherency.
2268 */
2269 if (direct_io && full_coherency) {
2270 /*
2271 * We need to take and drop the inode lock to force
2272 * other nodes to drop their caches. Buffered I/O
2273 * already does this in write_begin().
2274 */
2275 ret = ocfs2_inode_lock(inode, NULL, 1);
2276 if (ret < 0) {
2277 mlog_errno(ret);
2278 goto out_sems;
2279 }
2280
2281 ocfs2_inode_unlock(inode, 1);
2282 }
2283
2266 can_do_direct = direct_io; 2284 can_do_direct = direct_io;
2267 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 2285 ret = ocfs2_prepare_inode_for_write(file, ppos,
2268 iocb->ki_left, appending, 2286 iocb->ki_left, appending,
2269 &can_do_direct, &has_refcount); 2287 &can_do_direct, &has_refcount);
2270 if (ret < 0) { 2288 if (ret < 0) {
@@ -2312,17 +2330,6 @@ relock:
2312 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2330 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2313 ppos, count, ocount); 2331 ppos, count, ocount);
2314 if (written < 0) { 2332 if (written < 0) {
2315 /*
2316 * direct write may have instantiated a few
2317 * blocks outside i_size. Trim these off again.
2318 * Don't need i_size_read because we hold i_mutex.
2319 *
2320 * XXX(truncate): this looks buggy because ocfs2 did not
2321 * actually implement ->truncate. Take a look at
2322 * the new truncate sequence and update this accordingly
2323 */
2324 if (*ppos + count > inode->i_size)
2325 truncate_setsize(inode, inode->i_size);
2326 ret = written; 2333 ret = written;
2327 goto out_dio; 2334 goto out_dio;
2328 } 2335 }
@@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2394{ 2401{
2395 int ret; 2402 int ret;
2396 2403
2397 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2404 ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
2398 sd->total_len, 0, NULL, NULL); 2405 sd->total_len, 0, NULL, NULL);
2399 if (ret < 0) { 2406 if (ret < 0) {
2400 mlog_errno(ret); 2407 mlog_errno(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index eece3e05d9d0..f935fd6600dd 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
335 else 335 else
336 inode->i_fop = &ocfs2_dops_no_plocks; 336 inode->i_fop = &ocfs2_dops_no_plocks;
337 i_size_write(inode, le64_to_cpu(fe->i_size)); 337 i_size_write(inode, le64_to_cpu(fe->i_size));
338 OCFS2_I(inode)->ip_dir_lock_gen = 1;
338 break; 339 break;
339 case S_IFLNK: 340 case S_IFLNK:
340 if (ocfs2_inode_is_fast_symlink(inode)) 341 if (ocfs2_inode_is_fast_symlink(inode))
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 6de5a869db30..1c508b149b3a 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -46,30 +46,28 @@ struct ocfs2_inode_info
46 /* These fields are protected by ip_lock */ 46 /* These fields are protected by ip_lock */
47 spinlock_t ip_lock; 47 spinlock_t ip_lock;
48 u32 ip_open_count; 48 u32 ip_open_count;
49 u32 ip_clusters;
50 struct list_head ip_io_markers; 49 struct list_head ip_io_markers;
50 u32 ip_clusters;
51 51
52 u16 ip_dyn_features;
52 struct mutex ip_io_mutex; 53 struct mutex ip_io_mutex;
53
54 u32 ip_flags; /* see below */ 54 u32 ip_flags; /* see below */
55 u32 ip_attr; /* inode attributes */ 55 u32 ip_attr; /* inode attributes */
56 u16 ip_dyn_features;
57 56
58 /* protected by recovery_lock. */ 57 /* protected by recovery_lock. */
59 struct inode *ip_next_orphan; 58 struct inode *ip_next_orphan;
60 59
61 u32 ip_dir_start_lookup;
62
63 struct ocfs2_caching_info ip_metadata_cache; 60 struct ocfs2_caching_info ip_metadata_cache;
64
65 struct ocfs2_extent_map ip_extent_map; 61 struct ocfs2_extent_map ip_extent_map;
66
67 struct inode vfs_inode; 62 struct inode vfs_inode;
68 struct jbd2_inode ip_jinode; 63 struct jbd2_inode ip_jinode;
69 64
65 u32 ip_dir_start_lookup;
66
70 /* Only valid if the inode is the dir. */ 67 /* Only valid if the inode is the dir. */
71 u32 ip_last_used_slot; 68 u32 ip_last_used_slot;
72 u64 ip_last_used_group; 69 u64 ip_last_used_group;
70 u32 ip_dir_lock_gen;
73 71
74 struct ocfs2_alloc_reservation ip_la_data_resv; 72 struct ocfs2_alloc_reservation ip_la_data_resv;
75}; 73};
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7d9d9c132cef..7a4868196152 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -26,6 +26,26 @@
26 26
27#include <linux/ext2_fs.h> 27#include <linux/ext2_fs.h>
28 28
29#define o2info_from_user(a, b) \
30 copy_from_user(&(a), (b), sizeof(a))
31#define o2info_to_user(a, b) \
32 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
33
34/*
35 * This call is void because we are already reporting an error that may
36 * be -EFAULT. The error will be returned from the ioctl(2) call. It's
37 * just a best-effort to tell userspace that this request caused the error.
38 */
39static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
40 struct ocfs2_info_request __user *req)
41{
42 kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
43 (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
44}
45
46#define o2info_set_request_error(a, b) \
47 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
48
29static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) 49static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
30{ 50{
31 int status; 51 int status;
@@ -109,6 +129,328 @@ bail:
109 return status; 129 return status;
110} 130}
111 131
132int ocfs2_info_handle_blocksize(struct inode *inode,
133 struct ocfs2_info_request __user *req)
134{
135 int status = -EFAULT;
136 struct ocfs2_info_blocksize oib;
137
138 if (o2info_from_user(oib, req))
139 goto bail;
140
141 oib.ib_blocksize = inode->i_sb->s_blocksize;
142 oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED;
143
144 if (o2info_to_user(oib, req))
145 goto bail;
146
147 status = 0;
148bail:
149 if (status)
150 o2info_set_request_error(oib, req);
151
152 return status;
153}
154
155int ocfs2_info_handle_clustersize(struct inode *inode,
156 struct ocfs2_info_request __user *req)
157{
158 int status = -EFAULT;
159 struct ocfs2_info_clustersize oic;
160 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
161
162 if (o2info_from_user(oic, req))
163 goto bail;
164
165 oic.ic_clustersize = osb->s_clustersize;
166 oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED;
167
168 if (o2info_to_user(oic, req))
169 goto bail;
170
171 status = 0;
172bail:
173 if (status)
174 o2info_set_request_error(oic, req);
175
176 return status;
177}
178
179int ocfs2_info_handle_maxslots(struct inode *inode,
180 struct ocfs2_info_request __user *req)
181{
182 int status = -EFAULT;
183 struct ocfs2_info_maxslots oim;
184 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
185
186 if (o2info_from_user(oim, req))
187 goto bail;
188
189 oim.im_max_slots = osb->max_slots;
190 oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED;
191
192 if (o2info_to_user(oim, req))
193 goto bail;
194
195 status = 0;
196bail:
197 if (status)
198 o2info_set_request_error(oim, req);
199
200 return status;
201}
202
203int ocfs2_info_handle_label(struct inode *inode,
204 struct ocfs2_info_request __user *req)
205{
206 int status = -EFAULT;
207 struct ocfs2_info_label oil;
208 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
209
210 if (o2info_from_user(oil, req))
211 goto bail;
212
213 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
214 oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED;
215
216 if (o2info_to_user(oil, req))
217 goto bail;
218
219 status = 0;
220bail:
221 if (status)
222 o2info_set_request_error(oil, req);
223
224 return status;
225}
226
227int ocfs2_info_handle_uuid(struct inode *inode,
228 struct ocfs2_info_request __user *req)
229{
230 int status = -EFAULT;
231 struct ocfs2_info_uuid oiu;
232 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
233
234 if (o2info_from_user(oiu, req))
235 goto bail;
236
237 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
238 oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED;
239
240 if (o2info_to_user(oiu, req))
241 goto bail;
242
243 status = 0;
244bail:
245 if (status)
246 o2info_set_request_error(oiu, req);
247
248 return status;
249}
250
251int ocfs2_info_handle_fs_features(struct inode *inode,
252 struct ocfs2_info_request __user *req)
253{
254 int status = -EFAULT;
255 struct ocfs2_info_fs_features oif;
256 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
257
258 if (o2info_from_user(oif, req))
259 goto bail;
260
261 oif.if_compat_features = osb->s_feature_compat;
262 oif.if_incompat_features = osb->s_feature_incompat;
263 oif.if_ro_compat_features = osb->s_feature_ro_compat;
264 oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED;
265
266 if (o2info_to_user(oif, req))
267 goto bail;
268
269 status = 0;
270bail:
271 if (status)
272 o2info_set_request_error(oif, req);
273
274 return status;
275}
276
277int ocfs2_info_handle_journal_size(struct inode *inode,
278 struct ocfs2_info_request __user *req)
279{
280 int status = -EFAULT;
281 struct ocfs2_info_journal_size oij;
282 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
283
284 if (o2info_from_user(oij, req))
285 goto bail;
286
287 oij.ij_journal_size = osb->journal->j_inode->i_size;
288
289 oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED;
290
291 if (o2info_to_user(oij, req))
292 goto bail;
293
294 status = 0;
295bail:
296 if (status)
297 o2info_set_request_error(oij, req);
298
299 return status;
300}
301
302int ocfs2_info_handle_unknown(struct inode *inode,
303 struct ocfs2_info_request __user *req)
304{
305 int status = -EFAULT;
306 struct ocfs2_info_request oir;
307
308 if (o2info_from_user(oir, req))
309 goto bail;
310
311 oir.ir_flags &= ~OCFS2_INFO_FL_FILLED;
312
313 if (o2info_to_user(oir, req))
314 goto bail;
315
316 status = 0;
317bail:
318 if (status)
319 o2info_set_request_error(oir, req);
320
321 return status;
322}
323
324/*
325 * Validate and distinguish OCFS2_IOC_INFO requests.
326 *
327 * - validate the magic number.
328 * - distinguish different requests.
329 * - validate size of different requests.
330 */
331int ocfs2_info_handle_request(struct inode *inode,
332 struct ocfs2_info_request __user *req)
333{
334 int status = -EFAULT;
335 struct ocfs2_info_request oir;
336
337 if (o2info_from_user(oir, req))
338 goto bail;
339
340 status = -EINVAL;
341 if (oir.ir_magic != OCFS2_INFO_MAGIC)
342 goto bail;
343
344 switch (oir.ir_code) {
345 case OCFS2_INFO_BLOCKSIZE:
346 if (oir.ir_size == sizeof(struct ocfs2_info_blocksize))
347 status = ocfs2_info_handle_blocksize(inode, req);
348 break;
349 case OCFS2_INFO_CLUSTERSIZE:
350 if (oir.ir_size == sizeof(struct ocfs2_info_clustersize))
351 status = ocfs2_info_handle_clustersize(inode, req);
352 break;
353 case OCFS2_INFO_MAXSLOTS:
354 if (oir.ir_size == sizeof(struct ocfs2_info_maxslots))
355 status = ocfs2_info_handle_maxslots(inode, req);
356 break;
357 case OCFS2_INFO_LABEL:
358 if (oir.ir_size == sizeof(struct ocfs2_info_label))
359 status = ocfs2_info_handle_label(inode, req);
360 break;
361 case OCFS2_INFO_UUID:
362 if (oir.ir_size == sizeof(struct ocfs2_info_uuid))
363 status = ocfs2_info_handle_uuid(inode, req);
364 break;
365 case OCFS2_INFO_FS_FEATURES:
366 if (oir.ir_size == sizeof(struct ocfs2_info_fs_features))
367 status = ocfs2_info_handle_fs_features(inode, req);
368 break;
369 case OCFS2_INFO_JOURNAL_SIZE:
370 if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
371 status = ocfs2_info_handle_journal_size(inode, req);
372 break;
373 default:
374 status = ocfs2_info_handle_unknown(inode, req);
375 break;
376 }
377
378bail:
379 return status;
380}
381
382int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
383 u64 *req_addr, int compat_flag)
384{
385 int status = -EFAULT;
386 u64 __user *bp = NULL;
387
388 if (compat_flag) {
389#ifdef CONFIG_COMPAT
390 /*
391 * pointer bp stores the base address of a pointers array,
392 * which collects all addresses of separate request.
393 */
394 bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests);
395#else
396 BUG();
397#endif
398 } else
399 bp = (u64 __user *)(unsigned long)(info->oi_requests);
400
401 if (o2info_from_user(*req_addr, bp + idx))
402 goto bail;
403
404 status = 0;
405bail:
406 return status;
407}
408
409/*
410 * OCFS2_IOC_INFO handles an array of requests passed from userspace.
411 *
412 * ocfs2_info_handle() recevies a large info aggregation, grab and
413 * validate the request count from header, then break it into small
414 * pieces, later specific handlers can handle them one by one.
415 *
416 * Idea here is to make each separate request small enough to ensure
417 * a better backward&forward compatibility, since a small piece of
418 * request will be less likely to be broken if disk layout get changed.
419 */
420int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
421 int compat_flag)
422{
423 int i, status = 0;
424 u64 req_addr;
425 struct ocfs2_info_request __user *reqp;
426
427 if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) ||
428 (!info->oi_requests)) {
429 status = -EINVAL;
430 goto bail;
431 }
432
433 for (i = 0; i < info->oi_count; i++) {
434
435 status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag);
436 if (status)
437 break;
438
439 reqp = (struct ocfs2_info_request *)(unsigned long)req_addr;
440 if (!reqp) {
441 status = -EINVAL;
442 goto bail;
443 }
444
445 status = ocfs2_info_handle_request(inode, reqp);
446 if (status)
447 break;
448 }
449
450bail:
451 return status;
452}
453
112long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 454long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
113{ 455{
114 struct inode *inode = filp->f_path.dentry->d_inode; 456 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
120 struct reflink_arguments args; 462 struct reflink_arguments args;
121 const char *old_path, *new_path; 463 const char *old_path, *new_path;
122 bool preserve; 464 bool preserve;
465 struct ocfs2_info info;
123 466
124 switch (cmd) { 467 switch (cmd) {
125 case OCFS2_IOC_GETFLAGS: 468 case OCFS2_IOC_GETFLAGS:
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
174 preserve = (args.preserve != 0); 517 preserve = (args.preserve != 0);
175 518
176 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); 519 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
520 case OCFS2_IOC_INFO:
521 if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
522 sizeof(struct ocfs2_info)))
523 return -EFAULT;
524
525 return ocfs2_info_handle(inode, &info, 0);
177 default: 526 default:
178 return -ENOTTY; 527 return -ENOTTY;
179 } 528 }
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
185 bool preserve; 534 bool preserve;
186 struct reflink_arguments args; 535 struct reflink_arguments args;
187 struct inode *inode = file->f_path.dentry->d_inode; 536 struct inode *inode = file->f_path.dentry->d_inode;
537 struct ocfs2_info info;
188 538
189 switch (cmd) { 539 switch (cmd) {
190 case OCFS2_IOC32_GETFLAGS: 540 case OCFS2_IOC32_GETFLAGS:
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
209 559
210 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), 560 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
211 compat_ptr(args.new_path), preserve); 561 compat_ptr(args.new_path), preserve);
562 case OCFS2_IOC_INFO:
563 if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
564 sizeof(struct ocfs2_info)))
565 return -EFAULT;
566
567 return ocfs2_info_handle(inode, &info, 1);
212 default: 568 default:
213 return -ENOIOCTLCMD; 569 return -ENOIOCTLCMD;
214 } 570 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9b57c0350ff9..faa2303dbf0a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
301{ 301{
302 int status = 0; 302 int status = 0;
303 unsigned int flushed; 303 unsigned int flushed;
304 unsigned long old_id;
305 struct ocfs2_journal *journal = NULL; 304 struct ocfs2_journal *journal = NULL;
306 305
307 mlog_entry_void(); 306 mlog_entry_void();
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
326 goto finally; 325 goto finally;
327 } 326 }
328 327
329 old_id = ocfs2_inc_trans_id(journal); 328 ocfs2_inc_trans_id(journal);
330 329
331 flushed = atomic_read(&journal->j_num_trans); 330 flushed = atomic_read(&journal->j_num_trans);
332 atomic_set(&journal->j_num_trans, 0); 331 atomic_set(&journal->j_num_trans, 0);
@@ -342,9 +341,6 @@ finally:
342 return status; 341 return status;
343} 342}
344 343
345/* pass it NULL and it will allocate a new handle object for you. If
346 * you pass it a handle however, it may still return error, in which
347 * case it has free'd the passed handle for you. */
348handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) 344handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
349{ 345{
350 journal_t *journal = osb->journal->j_journal; 346 journal_t *journal = osb->journal->j_journal;
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1888 1884
1889 os = &osb->osb_orphan_scan; 1885 os = &osb->osb_orphan_scan;
1890 1886
1887 mlog(0, "Begin orphan scan\n");
1888
1891 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1889 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1892 goto out; 1890 goto out;
1893 1891
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1920unlock: 1918unlock:
1921 ocfs2_orphan_scan_unlock(osb, seqno); 1919 ocfs2_orphan_scan_unlock(osb, seqno);
1922out: 1920out:
1921 mlog(0, "Orphan scan completed\n");
1923 return; 1922 return;
1924} 1923}
1925 1924
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index b5baaa8e710f..43e56b97f9c0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -67,11 +67,12 @@ struct ocfs2_journal {
67 struct buffer_head *j_bh; /* Journal disk inode block */ 67 struct buffer_head *j_bh; /* Journal disk inode block */
68 atomic_t j_num_trans; /* Number of transactions 68 atomic_t j_num_trans; /* Number of transactions
69 * currently in the system. */ 69 * currently in the system. */
70 spinlock_t j_lock;
70 unsigned long j_trans_id; 71 unsigned long j_trans_id;
71 struct rw_semaphore j_trans_barrier; 72 struct rw_semaphore j_trans_barrier;
72 wait_queue_head_t j_checkpointed; 73 wait_queue_head_t j_checkpointed;
73 74
74 spinlock_t j_lock; 75 /* both fields protected by j_lock*/
75 struct list_head j_la_cleanups; 76 struct list_head j_la_cleanups;
76 struct work_struct j_recovery_work; 77 struct work_struct j_recovery_work;
77}; 78};
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 4c18f4ad93b4..7e32db9c2c99 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
59 return ret; 59 return ret;
60} 60}
61 61
62static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, 62static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
63 struct page *page) 63 struct page *page)
64{ 64{
65 int ret; 65 int ret;
66 struct inode *inode = file->f_path.dentry->d_inode;
66 struct address_space *mapping = inode->i_mapping; 67 struct address_space *mapping = inode->i_mapping;
67 loff_t pos = page_offset(page); 68 loff_t pos = page_offset(page);
68 unsigned int len = PAGE_CACHE_SIZE; 69 unsigned int len = PAGE_CACHE_SIZE;
@@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
111 if (page->index == last_index) 112 if (page->index == last_index)
112 len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; 113 len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
113 114
114 ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, 115 ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
115 &fsdata, di_bh, page); 116 &fsdata, di_bh, page);
116 if (ret) { 117 if (ret) {
117 if (ret != -ENOSPC) 118 if (ret != -ENOSPC)
@@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
159 */ 160 */
160 down_write(&OCFS2_I(inode)->ip_alloc_sem); 161 down_write(&OCFS2_I(inode)->ip_alloc_sem);
161 162
162 ret = __ocfs2_page_mkwrite(inode, di_bh, page); 163 ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page);
163 164
164 up_write(&OCFS2_I(inode)->ip_alloc_sem); 165 up_write(&OCFS2_I(inode)->ip_alloc_sem);
165 166
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a00dda2e4f16..e7bde21149ae 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -171,7 +171,8 @@ bail_add:
171 ret = ERR_PTR(status); 171 ret = ERR_PTR(status);
172 goto bail_unlock; 172 goto bail_unlock;
173 } 173 }
174 } 174 } else
175 ocfs2_dentry_attach_gen(dentry);
175 176
176bail_unlock: 177bail_unlock:
177 /* Don't drop the cluster lock until *after* the d_add -- 178 /* Don't drop the cluster lock until *after* the d_add --
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 481387b90b21..d8408217e3bd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
150struct ocfs2_lock_res { 150struct ocfs2_lock_res {
151 void *l_priv; 151 void *l_priv;
152 struct ocfs2_lock_res_ops *l_ops; 152 struct ocfs2_lock_res_ops *l_ops;
153 spinlock_t l_lock; 153
154 154
155 struct list_head l_blocked_list; 155 struct list_head l_blocked_list;
156 struct list_head l_mask_waiters; 156 struct list_head l_mask_waiters;
157 157
158 enum ocfs2_lock_type l_type;
159 unsigned long l_flags; 158 unsigned long l_flags;
160 char l_name[OCFS2_LOCK_ID_MAX_LEN]; 159 char l_name[OCFS2_LOCK_ID_MAX_LEN];
161 int l_level;
162 unsigned int l_ro_holders; 160 unsigned int l_ro_holders;
163 unsigned int l_ex_holders; 161 unsigned int l_ex_holders;
164 struct ocfs2_dlm_lksb l_lksb; 162 unsigned char l_level;
163
164 /* Data packed - type enum ocfs2_lock_type */
165 unsigned char l_type;
165 166
166 /* used from AST/BAST funcs. */ 167 /* used from AST/BAST funcs. */
167 enum ocfs2_ast_action l_action; 168 /* Data packed - enum type ocfs2_ast_action */
168 enum ocfs2_unlock_action l_unlock_action; 169 unsigned char l_action;
169 int l_requested; 170 /* Data packed - enum type ocfs2_unlock_action */
170 int l_blocking; 171 unsigned char l_unlock_action;
172 unsigned char l_requested;
173 unsigned char l_blocking;
171 unsigned int l_pending_gen; 174 unsigned int l_pending_gen;
172 175
176 spinlock_t l_lock;
177
178 struct ocfs2_dlm_lksb l_lksb;
179
173 wait_queue_head_t l_event; 180 wait_queue_head_t l_event;
174 181
175 struct list_head l_debug_list; 182 struct list_head l_debug_list;
@@ -256,8 +263,10 @@ enum ocfs2_mount_options
256 control lists */ 263 control lists */
257 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ 264 OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
258 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ 265 OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
259 OCFS2_MOUNT_HB_NONE = 1 << 12, /* No heartbeat */ 266 OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT
260 OCFS2_MOUNT_HB_GLOBAL = 1 << 13, /* Global heartbeat */ 267 writes */
268 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
269 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
261}; 270};
262 271
263#define OCFS2_OSB_SOFT_RO 0x0001 272#define OCFS2_OSB_SOFT_RO 0x0001
@@ -279,7 +288,8 @@ struct ocfs2_super
279 struct super_block *sb; 288 struct super_block *sb;
280 struct inode *root_inode; 289 struct inode *root_inode;
281 struct inode *sys_root_inode; 290 struct inode *sys_root_inode;
282 struct inode *system_inodes[NUM_SYSTEM_INODES]; 291 struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES];
292 struct inode **local_system_inodes;
283 293
284 struct ocfs2_slot_info *slot_info; 294 struct ocfs2_slot_info *slot_info;
285 295
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 28ff536b4f8d..c2e4f8222e2f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -338,6 +338,7 @@ enum {
338 USER_QUOTA_SYSTEM_INODE, 338 USER_QUOTA_SYSTEM_INODE,
339 GROUP_QUOTA_SYSTEM_INODE, 339 GROUP_QUOTA_SYSTEM_INODE,
340#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE 340#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
341#define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE
341 ORPHAN_DIR_SYSTEM_INODE, 342 ORPHAN_DIR_SYSTEM_INODE,
342 EXTENT_ALLOC_SYSTEM_INODE, 343 EXTENT_ALLOC_SYSTEM_INODE,
343 INODE_ALLOC_SYSTEM_INODE, 344 INODE_ALLOC_SYSTEM_INODE,
@@ -346,8 +347,12 @@ enum {
346 TRUNCATE_LOG_SYSTEM_INODE, 347 TRUNCATE_LOG_SYSTEM_INODE,
347 LOCAL_USER_QUOTA_SYSTEM_INODE, 348 LOCAL_USER_QUOTA_SYSTEM_INODE,
348 LOCAL_GROUP_QUOTA_SYSTEM_INODE, 349 LOCAL_GROUP_QUOTA_SYSTEM_INODE,
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
349 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
350}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
351 356
352static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { 357static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
353 /* Global system inodes (single copy) */ 358 /* Global system inodes (single copy) */
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 5d241505690b..b46f39bf7438 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -76,4 +76,99 @@ struct reflink_arguments {
76}; 76};
77#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) 77#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
78 78
79/* Following definitions dedicated for ocfs2_info_request ioctls. */
80#define OCFS2_INFO_MAX_REQUEST (50)
81#define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2)
82
83/* Magic number of all requests */
84#define OCFS2_INFO_MAGIC (0x4F32494E)
85
86/*
87 * Always try to separate info request into small pieces to
88 * guarantee the backward&forward compatibility.
89 */
90struct ocfs2_info {
91 __u64 oi_requests; /* Array of __u64 pointers to requests */
92 __u32 oi_count; /* Number of requests in info_requests */
93 __u32 oi_pad;
94};
95
96struct ocfs2_info_request {
97/*00*/ __u32 ir_magic; /* Magic number */
98 __u32 ir_code; /* Info request code */
99 __u32 ir_size; /* Size of request */
100 __u32 ir_flags; /* Request flags */
101/*10*/ /* Request specific fields */
102};
103
104struct ocfs2_info_clustersize {
105 struct ocfs2_info_request ic_req;
106 __u32 ic_clustersize;
107 __u32 ic_pad;
108};
109
110struct ocfs2_info_blocksize {
111 struct ocfs2_info_request ib_req;
112 __u32 ib_blocksize;
113 __u32 ib_pad;
114};
115
116struct ocfs2_info_maxslots {
117 struct ocfs2_info_request im_req;
118 __u32 im_max_slots;
119 __u32 im_pad;
120};
121
122struct ocfs2_info_label {
123 struct ocfs2_info_request il_req;
124 __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN];
125} __attribute__ ((packed));
126
127struct ocfs2_info_uuid {
128 struct ocfs2_info_request iu_req;
129 __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1];
130} __attribute__ ((packed));
131
132struct ocfs2_info_fs_features {
133 struct ocfs2_info_request if_req;
134 __u32 if_compat_features;
135 __u32 if_incompat_features;
136 __u32 if_ro_compat_features;
137 __u32 if_pad;
138};
139
140struct ocfs2_info_journal_size {
141 struct ocfs2_info_request ij_req;
142 __u64 ij_journal_size;
143};
144
145/* Codes for ocfs2_info_request */
146enum ocfs2_info_type {
147 OCFS2_INFO_CLUSTERSIZE = 1,
148 OCFS2_INFO_BLOCKSIZE,
149 OCFS2_INFO_MAXSLOTS,
150 OCFS2_INFO_LABEL,
151 OCFS2_INFO_UUID,
152 OCFS2_INFO_FS_FEATURES,
153 OCFS2_INFO_JOURNAL_SIZE,
154 OCFS2_INFO_NUM_TYPES
155};
156
157/* Flags for struct ocfs2_info_request */
158/* Filled by the caller */
159#define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not
160 required. This is a hint.
161 It is up to ocfs2 whether
162 the request can be fulfilled
163 without locking. */
164/* Filled by ocfs2 */
165#define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood
166 this request and
167 filled in the answer */
168
169#define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during
170 request handling. */
171
172#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
173
79#endif /* OCFS2_IOCTL_H */ 174#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index efdd75607406..b5f9160e93e9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,6 +49,7 @@
49 49
50struct ocfs2_cow_context { 50struct ocfs2_cow_context {
51 struct inode *inode; 51 struct inode *inode;
52 struct file *file;
52 u32 cow_start; 53 u32 cow_start;
53 u32 cow_len; 54 u32 cow_len;
54 struct ocfs2_extent_tree data_et; 55 struct ocfs2_extent_tree data_et;
@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2932 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2933 struct page *page; 2934 struct page *page;
2934 pgoff_t page_index; 2935 pgoff_t page_index;
2935 unsigned int from, to; 2936 unsigned int from, to, readahead_pages;
2936 loff_t offset, end, map_end; 2937 loff_t offset, end, map_end;
2937 struct address_space *mapping = context->inode->i_mapping; 2938 struct address_space *mapping = context->inode->i_mapping;
2938 2939
2939 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, 2940 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
2940 new_cluster, new_len, cpos); 2941 new_cluster, new_len, cpos);
2941 2942
2943 readahead_pages =
2944 (ocfs2_cow_contig_clusters(sb) <<
2945 OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT;
2942 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 2946 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2943 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); 2947 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2944 /* 2948 /*
@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2969 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2973 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2970 BUG_ON(PageDirty(page)); 2974 BUG_ON(PageDirty(page));
2971 2975
2976 if (PageReadahead(page) && context->file) {
2977 page_cache_async_readahead(mapping,
2978 &context->file->f_ra,
2979 context->file,
2980 page, page_index,
2981 readahead_pages);
2982 }
2983
2972 if (!PageUptodate(page)) { 2984 if (!PageUptodate(page)) {
2973 ret = block_read_full_page(page, ocfs2_get_block); 2985 ret = block_read_full_page(page, ocfs2_get_block);
2974 if (ret) { 2986 if (ret) {
@@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
3409 return ret; 3421 return ret;
3410} 3422}
3411 3423
3424static void ocfs2_readahead_for_cow(struct inode *inode,
3425 struct file *file,
3426 u32 start, u32 len)
3427{
3428 struct address_space *mapping;
3429 pgoff_t index;
3430 unsigned long num_pages;
3431 int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
3432
3433 if (!file)
3434 return;
3435
3436 mapping = file->f_mapping;
3437 num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
3438 if (!num_pages)
3439 num_pages = 1;
3440
3441 index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
3442 page_cache_sync_readahead(mapping, &file->f_ra, file,
3443 index, num_pages);
3444}
3445
3412/* 3446/*
3413 * Starting at cpos, try to CoW write_len clusters. Don't CoW 3447 * Starting at cpos, try to CoW write_len clusters. Don't CoW
3414 * past max_cpos. This will stop when it runs into a hole or an 3448 * past max_cpos. This will stop when it runs into a hole or an
3415 * unrefcounted extent. 3449 * unrefcounted extent.
3416 */ 3450 */
3417static int ocfs2_refcount_cow_hunk(struct inode *inode, 3451static int ocfs2_refcount_cow_hunk(struct inode *inode,
3452 struct file *file,
3418 struct buffer_head *di_bh, 3453 struct buffer_head *di_bh,
3419 u32 cpos, u32 write_len, u32 max_cpos) 3454 u32 cpos, u32 write_len, u32 max_cpos)
3420{ 3455{
@@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3443 3478
3444 BUG_ON(cow_len == 0); 3479 BUG_ON(cow_len == 0);
3445 3480
3481 ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
3482
3446 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3483 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3447 if (!context) { 3484 if (!context) {
3448 ret = -ENOMEM; 3485 ret = -ENOMEM;
@@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3464 context->ref_root_bh = ref_root_bh; 3501 context->ref_root_bh = ref_root_bh;
3465 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; 3502 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
3466 context->get_clusters = ocfs2_di_get_clusters; 3503 context->get_clusters = ocfs2_di_get_clusters;
3504 context->file = file;
3467 3505
3468 ocfs2_init_dinode_extent_tree(&context->data_et, 3506 ocfs2_init_dinode_extent_tree(&context->data_et,
3469 INODE_CACHE(inode), di_bh); 3507 INODE_CACHE(inode), di_bh);
@@ -3492,6 +3530,7 @@ out:
3492 * clusters between cpos and cpos+write_len are safe to modify. 3530 * clusters between cpos and cpos+write_len are safe to modify.
3493 */ 3531 */
3494int ocfs2_refcount_cow(struct inode *inode, 3532int ocfs2_refcount_cow(struct inode *inode,
3533 struct file *file,
3495 struct buffer_head *di_bh, 3534 struct buffer_head *di_bh,
3496 u32 cpos, u32 write_len, u32 max_cpos) 3535 u32 cpos, u32 write_len, u32 max_cpos)
3497{ 3536{
@@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode,
3511 num_clusters = write_len; 3550 num_clusters = write_len;
3512 3551
3513 if (ext_flags & OCFS2_EXT_REFCOUNTED) { 3552 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3514 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, 3553 ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
3515 num_clusters, max_cpos); 3554 num_clusters, max_cpos);
3516 if (ret) { 3555 if (ret) {
3517 mlog_errno(ret); 3556 mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9983ba1570e2..c8ce46f7d8e3 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree {
21 struct rb_node rf_node; 21 struct rb_node rf_node;
22 u64 rf_blkno; 22 u64 rf_blkno;
23 u32 rf_generation; 23 u32 rf_generation;
24 struct kref rf_getcnt;
24 struct rw_semaphore rf_sem; 25 struct rw_semaphore rf_sem;
25 struct ocfs2_lock_res rf_lockres; 26 struct ocfs2_lock_res rf_lockres;
26 struct kref rf_getcnt;
27 int rf_removed; 27 int rf_removed;
28 28
29 /* the following 4 fields are used by caching_info. */ 29 /* the following 4 fields are used by caching_info. */
30 struct ocfs2_caching_info rf_ci;
31 spinlock_t rf_lock; 30 spinlock_t rf_lock;
31 struct ocfs2_caching_info rf_ci;
32 struct mutex rf_io_mutex; 32 struct mutex rf_io_mutex;
33 struct super_block *rf_sb; 33 struct super_block *rf_sb;
34}; 34};
@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
52 u32 clusters, 52 u32 clusters,
53 int *credits, 53 int *credits,
54 int *ref_blocks); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, 55int ocfs2_refcount_cow(struct inode *inode,
56 struct file *filep, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos); 57 u32 cpos, u32 write_len, u32 max_cpos);
57 58
58typedef int (ocfs2_post_refcount_func)(struct inode *inode, 59typedef int (ocfs2_post_refcount_func)(struct inode *inode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bfbd7e9e949f..ab4e0172cc1d 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
357{ 357{
358 int status = 0; 358 int status = 0;
359 u64 blkno; 359 u64 blkno;
360 unsigned long long blocks, bytes; 360 unsigned long long blocks, bytes = 0;
361 unsigned int i; 361 unsigned int i;
362 struct buffer_head *bh; 362 struct buffer_head *bh;
363 363
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 849c2f0e0a0e..5fed60de7630 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1380 } 1380 }
1381 1381
1382 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 1382 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
1383 if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
1384 ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
1385 " count %u but claims %u are freed. num_bits %d",
1386 (unsigned long long)le64_to_cpu(bg->bg_blkno),
1387 le16_to_cpu(bg->bg_bits),
1388 le16_to_cpu(bg->bg_free_bits_count), num_bits);
1389 return -EROFS;
1390 }
1383 while(num_bits--) 1391 while(num_bits--)
1384 ocfs2_set_bit(bit_off++, bitmap); 1392 ocfs2_set_bit(bit_off++, bitmap);
1385 1393
@@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2419 (unsigned long *) undo_bg->bg_bitmap); 2427 (unsigned long *) undo_bg->bg_bitmap);
2420 } 2428 }
2421 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 2429 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
2430 if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
2431 ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
2432 " count %u but claims %u are freed. num_bits %d",
2433 (unsigned long long)le64_to_cpu(bg->bg_blkno),
2434 le16_to_cpu(bg->bg_bits),
2435 le16_to_cpu(bg->bg_free_bits_count), num_bits);
2436 return -EROFS;
2437 }
2422 2438
2423 if (undo_fn) 2439 if (undo_fn)
2424 jbd_unlock_bh_state(group_bh); 2440 jbd_unlock_bh_state(group_bh);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4e009ad303a1..a8a0ca44f88f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -178,6 +178,8 @@ enum {
178 Opt_noacl, 178 Opt_noacl,
179 Opt_usrquota, 179 Opt_usrquota,
180 Opt_grpquota, 180 Opt_grpquota,
181 Opt_coherency_buffered,
182 Opt_coherency_full,
181 Opt_resv_level, 183 Opt_resv_level,
182 Opt_dir_resv_level, 184 Opt_dir_resv_level,
183 Opt_err, 185 Opt_err,
@@ -207,6 +209,8 @@ static const match_table_t tokens = {
207 {Opt_noacl, "noacl"}, 209 {Opt_noacl, "noacl"},
208 {Opt_usrquota, "usrquota"}, 210 {Opt_usrquota, "usrquota"},
209 {Opt_grpquota, "grpquota"}, 211 {Opt_grpquota, "grpquota"},
212 {Opt_coherency_buffered, "coherency=buffered"},
213 {Opt_coherency_full, "coherency=full"},
210 {Opt_resv_level, "resv_level=%u"}, 214 {Opt_resv_level, "resv_level=%u"},
211 {Opt_dir_resv_level, "dir_resv_level=%u"}, 215 {Opt_dir_resv_level, "dir_resv_level=%u"},
212 {Opt_err, NULL} 216 {Opt_err, NULL}
@@ -516,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
516 520
517 mlog_entry_void(); 521 mlog_entry_void();
518 522
519 for (i = 0; i < NUM_SYSTEM_INODES; i++) { 523 for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) {
520 inode = osb->system_inodes[i]; 524 inode = osb->global_system_inodes[i];
521 if (inode) { 525 if (inode) {
522 iput(inode); 526 iput(inode);
523 osb->system_inodes[i] = NULL; 527 osb->global_system_inodes[i] = NULL;
524 } 528 }
525 } 529 }
526 530
@@ -536,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
536 osb->root_inode = NULL; 540 osb->root_inode = NULL;
537 } 541 }
538 542
543 if (!osb->local_system_inodes)
544 goto out;
545
546 for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) {
547 if (osb->local_system_inodes[i]) {
548 iput(osb->local_system_inodes[i]);
549 osb->local_system_inodes[i] = NULL;
550 }
551 }
552
553 kfree(osb->local_system_inodes);
554 osb->local_system_inodes = NULL;
555
556out:
539 mlog_exit(0); 557 mlog_exit(0);
540} 558}
541 559
@@ -1452,6 +1470,12 @@ static int ocfs2_parse_options(struct super_block *sb,
1452 case Opt_grpquota: 1470 case Opt_grpquota:
1453 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; 1471 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1454 break; 1472 break;
1473 case Opt_coherency_buffered:
1474 mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
1475 break;
1476 case Opt_coherency_full:
1477 mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
1478 break;
1455 case Opt_acl: 1479 case Opt_acl:
1456 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; 1480 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1457 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; 1481 mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
@@ -1563,6 +1587,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1563 if (opts & OCFS2_MOUNT_GRPQUOTA) 1587 if (opts & OCFS2_MOUNT_GRPQUOTA)
1564 seq_printf(s, ",grpquota"); 1588 seq_printf(s, ",grpquota");
1565 1589
1590 if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED)
1591 seq_printf(s, ",coherency=buffered");
1592 else
1593 seq_printf(s, ",coherency=full");
1594
1566 if (opts & OCFS2_MOUNT_NOUSERXATTR) 1595 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1567 seq_printf(s, ",nouser_xattr"); 1596 seq_printf(s, ",nouser_xattr");
1568 else 1597 else
@@ -2017,6 +2046,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
2017 return 0; 2046 return 0;
2018} 2047}
2019 2048
2049/* Make sure entire volume is addressable by our journal. Requires
2050 osb_clusters_at_boot to be valid and for the journal to have been
2051 initialized by ocfs2_journal_init(). */
2052static int ocfs2_journal_addressable(struct ocfs2_super *osb)
2053{
2054 int status = 0;
2055 u64 max_block =
2056 ocfs2_clusters_to_blocks(osb->sb,
2057 osb->osb_clusters_at_boot) - 1;
2058
2059 /* 32-bit block number is always OK. */
2060 if (max_block <= (u32)~0ULL)
2061 goto out;
2062
2063 /* Volume is "huge", so see if our journal is new enough to
2064 support it. */
2065 if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb,
2066 OCFS2_FEATURE_COMPAT_JBD2_SB) &&
2067 jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0,
2068 JBD2_FEATURE_INCOMPAT_64BIT))) {
2069 mlog(ML_ERROR, "The journal cannot address the entire volume. "
2070 "Enable the 'block64' journal option with tunefs.ocfs2");
2071 status = -EFBIG;
2072 goto out;
2073 }
2074
2075 out:
2076 return status;
2077}
2078
2020static int ocfs2_initialize_super(struct super_block *sb, 2079static int ocfs2_initialize_super(struct super_block *sb,
2021 struct buffer_head *bh, 2080 struct buffer_head *bh,
2022 int sector_size, 2081 int sector_size,
@@ -2029,6 +2088,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2029 struct ocfs2_journal *journal; 2088 struct ocfs2_journal *journal;
2030 __le32 uuid_net_key; 2089 __le32 uuid_net_key;
2031 struct ocfs2_super *osb; 2090 struct ocfs2_super *osb;
2091 u64 total_blocks;
2032 2092
2033 mlog_entry_void(); 2093 mlog_entry_void();
2034 2094
@@ -2087,6 +2147,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
2087 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2147 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
2088 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2148 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
2089 2149
2150 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
2151 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
2152 mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
2153 osb->max_slots);
2154 status = -EINVAL;
2155 goto bail;
2156 }
2157 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
2158
2090 ocfs2_orphan_scan_init(osb); 2159 ocfs2_orphan_scan_init(osb);
2091 2160
2092 status = ocfs2_recovery_init(osb); 2161 status = ocfs2_recovery_init(osb);
@@ -2125,15 +2194,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
2125 goto bail; 2194 goto bail;
2126 } 2195 }
2127 2196
2128 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
2129 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
2130 mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
2131 osb->max_slots);
2132 status = -EINVAL;
2133 goto bail;
2134 }
2135 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
2136
2137 osb->slot_recovery_generations = 2197 osb->slot_recovery_generations =
2138 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), 2198 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
2139 GFP_KERNEL); 2199 GFP_KERNEL);
@@ -2243,11 +2303,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
2243 goto bail; 2303 goto bail;
2244 } 2304 }
2245 2305
2246 if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) 2306 total_blocks = ocfs2_clusters_to_blocks(osb->sb,
2247 > (u32)~0UL) { 2307 le32_to_cpu(di->i_clusters));
2248 mlog(ML_ERROR, "Volume might try to write to blocks beyond " 2308
2249 "what jbd can address in 32 bits.\n"); 2309 status = generic_check_addressable(osb->sb->s_blocksize_bits,
2250 status = -EINVAL; 2310 total_blocks);
2311 if (status) {
2312 mlog(ML_ERROR, "Volume too large "
2313 "to mount safely on this system");
2314 status = -EFBIG;
2251 goto bail; 2315 goto bail;
2252 } 2316 }
2253 2317
@@ -2409,6 +2473,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2409 goto finally; 2473 goto finally;
2410 } 2474 }
2411 2475
2476 /* Now that journal has been initialized, check to make sure
2477 entire volume is addressable. */
2478 status = ocfs2_journal_addressable(osb);
2479 if (status)
2480 goto finally;
2481
2412 /* If the journal was unmounted cleanly then we don't want to 2482 /* If the journal was unmounted cleanly then we don't want to
2413 * recover anything. Otherwise, journal_load will do that 2483 * recover anything. Otherwise, journal_load will do that
2414 * dirty work for us :) */ 2484 * dirty work for us :) */
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index bfe7190cdbf1..902efb23b6a6 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
44 int type, 44 int type,
45 u32 slot); 45 u32 slot);
46 46
47static inline int is_global_system_inode(int type);
48static inline int is_in_system_inode_array(struct ocfs2_super *osb,
49 int type,
50 u32 slot);
51
52#ifdef CONFIG_DEBUG_LOCK_ALLOC 47#ifdef CONFIG_DEBUG_LOCK_ALLOC
53static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; 48static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
54#endif 49#endif
@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type)
59 type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; 54 type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE;
60} 55}
61 56
62static inline int is_in_system_inode_array(struct ocfs2_super *osb, 57static struct inode **get_local_system_inode(struct ocfs2_super *osb,
63 int type, 58 int type,
64 u32 slot) 59 u32 slot)
65{ 60{
66 return slot == osb->slot_num || is_global_system_inode(type); 61 int index;
62 struct inode **local_system_inodes, **free = NULL;
63
64 BUG_ON(slot == OCFS2_INVALID_SLOT);
65 BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE ||
66 type > OCFS2_LAST_LOCAL_SYSTEM_INODE);
67
68 spin_lock(&osb->osb_lock);
69 local_system_inodes = osb->local_system_inodes;
70 spin_unlock(&osb->osb_lock);
71
72 if (unlikely(!local_system_inodes)) {
73 local_system_inodes = kzalloc(sizeof(struct inode *) *
74 NUM_LOCAL_SYSTEM_INODES *
75 osb->max_slots,
76 GFP_NOFS);
77 if (!local_system_inodes) {
78 mlog_errno(-ENOMEM);
79 /*
80 * return NULL here so that ocfs2_get_sytem_file_inodes
81 * will try to create an inode and use it. We will try
82 * to initialize local_system_inodes next time.
83 */
84 return NULL;
85 }
86
87 spin_lock(&osb->osb_lock);
88 if (osb->local_system_inodes) {
89 /* Someone has initialized it for us. */
90 free = local_system_inodes;
91 local_system_inodes = osb->local_system_inodes;
92 } else
93 osb->local_system_inodes = local_system_inodes;
94 spin_unlock(&osb->osb_lock);
95 if (unlikely(free))
96 kfree(free);
97 }
98
99 index = (slot * NUM_LOCAL_SYSTEM_INODES) +
100 (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE);
101
102 return &local_system_inodes[index];
67} 103}
68 104
69struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, 105struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb,
74 struct inode **arr = NULL; 110 struct inode **arr = NULL;
75 111
76 /* avoid the lookup if cached in local system file array */ 112 /* avoid the lookup if cached in local system file array */
77 if (is_in_system_inode_array(osb, type, slot)) 113 if (is_global_system_inode(type)) {
78 arr = &(osb->system_inodes[type]); 114 arr = &(osb->global_system_inodes[type]);
115 } else
116 arr = get_local_system_inode(osb, type, slot);
79 117
80 if (arr && ((inode = *arr) != NULL)) { 118 if (arr && ((inode = *arr) != NULL)) {
81 /* get a ref in addition to the array ref */ 119 /* get a ref in addition to the array ref */