aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/acl.c33
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/blockcheck.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c17
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c9
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c22
-rw-r--r--fs/ocfs2/dlm/dlmthread.c114
-rw-r--r--fs/ocfs2/file.c36
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/inode.h11
-rw-r--r--fs/ocfs2/ioctl.c356
-rw-r--r--fs/ocfs2/journal.c9
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/mmap.c8
-rw-r--r--fs/ocfs2/namei.c302
-rw-r--r--fs/ocfs2/ocfs2.h23
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h95
-rw-r--r--fs/ocfs2/refcounttree.c25
-rw-r--r--fs/ocfs2/refcounttree.h4
-rw-r--r--fs/ocfs2/suballoc.c219
-rw-r--r--fs/ocfs2/suballoc.h21
22 files changed, 1076 insertions, 245 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index da702294d7e7..a76e0aa5cd3f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -290,12 +290,30 @@ static int ocfs2_set_acl(handle_t *handle,
290 290
291int ocfs2_check_acl(struct inode *inode, int mask) 291int ocfs2_check_acl(struct inode *inode, int mask)
292{ 292{
293 struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); 293 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
294 struct buffer_head *di_bh = NULL;
295 struct posix_acl *acl;
296 int ret = -EAGAIN;
294 297
295 if (IS_ERR(acl)) 298 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
299 return ret;
300
301 ret = ocfs2_read_inode_block(inode, &di_bh);
302 if (ret < 0) {
303 mlog_errno(ret);
304 return ret;
305 }
306
307 acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh);
308
309 brelse(di_bh);
310
311 if (IS_ERR(acl)) {
312 mlog_errno(PTR_ERR(acl));
296 return PTR_ERR(acl); 313 return PTR_ERR(acl);
314 }
297 if (acl) { 315 if (acl) {
298 int ret = posix_acl_permission(inode, acl, mask); 316 ret = posix_acl_permission(inode, acl, mask);
299 posix_acl_release(acl); 317 posix_acl_release(acl);
300 return ret; 318 return ret;
301 } 319 }
@@ -344,7 +362,7 @@ int ocfs2_init_acl(handle_t *handle,
344{ 362{
345 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 363 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
346 struct posix_acl *acl = NULL; 364 struct posix_acl *acl = NULL;
347 int ret = 0; 365 int ret = 0, ret2;
348 mode_t mode; 366 mode_t mode;
349 367
350 if (!S_ISLNK(inode->i_mode)) { 368 if (!S_ISLNK(inode->i_mode)) {
@@ -381,7 +399,12 @@ int ocfs2_init_acl(handle_t *handle,
381 mode = inode->i_mode; 399 mode = inode->i_mode;
382 ret = posix_acl_create_masq(clone, &mode); 400 ret = posix_acl_create_masq(clone, &mode);
383 if (ret >= 0) { 401 if (ret >= 0) {
384 ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); 402 ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
403 if (ret2) {
404 mlog_errno(ret2);
405 ret = ret2;
406 goto cleanup;
407 }
385 if (ret > 0) { 408 if (ret > 0) {
386 ret = ocfs2_set_acl(handle, inode, 409 ret = ocfs2_set_acl(handle, inode,
387 di_bh, ACL_TYPE_ACCESS, 410 di_bh, ACL_TYPE_ACCESS,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 215e12ce1d85..592fae5007d1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
6672 last_page_bytes = PAGE_ALIGN(end); 6672 last_page_bytes = PAGE_ALIGN(end);
6673 index = start >> PAGE_CACHE_SHIFT; 6673 index = start >> PAGE_CACHE_SHIFT;
6674 do { 6674 do {
6675 pages[numpages] = grab_cache_page(mapping, index); 6675 pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
6676 if (!pages[numpages]) { 6676 if (!pages[numpages]) {
6677 ret = -ENOMEM; 6677 ret = -ENOMEM;
6678 mlog_errno(ret); 6678 mlog_errno(ret);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7155c5a919d7..5cfeee118158 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt {
883 * out in so that future reads from that region will get 883 * out in so that future reads from that region will get
884 * zero's. 884 * zero's.
885 */ 885 */
886 struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
887 unsigned int w_num_pages; 886 unsigned int w_num_pages;
887 struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
888 struct page *w_target_page; 888 struct page *w_target_page;
889 889
890 /* 890 /*
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index ec6d12339593..c7ee03c22226 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
439 439
440 ocfs2_blockcheck_inc_failure(stats); 440 ocfs2_blockcheck_inc_failure(stats);
441 mlog(ML_ERROR, 441 mlog(ML_ERROR,
442 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", 442 "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
443 (unsigned int)check.bc_crc32e, (unsigned int)crc); 443 (unsigned int)check.bc_crc32e, (unsigned int)crc);
444 444
445 /* Ok, try ECC fixups */ 445 /* Ok, try ECC fixups */
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
453 goto out; 453 goto out;
454 } 454 }
455 455
456 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", 456 mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
457 (unsigned int)check.bc_crc32e, (unsigned int)crc); 457 (unsigned int)check.bc_crc32e, (unsigned int)crc);
458 458
459 rc = -EIO; 459 rc = -EIO;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index aa75ca3f78da..1361997cf205 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1759,6 +1759,7 @@ static int o2net_accept_one(struct socket *sock)
1759 struct sockaddr_in sin; 1759 struct sockaddr_in sin;
1760 struct socket *new_sock = NULL; 1760 struct socket *new_sock = NULL;
1761 struct o2nm_node *node = NULL; 1761 struct o2nm_node *node = NULL;
1762 struct o2nm_node *local_node = NULL;
1762 struct o2net_sock_container *sc = NULL; 1763 struct o2net_sock_container *sc = NULL;
1763 struct o2net_node *nn; 1764 struct o2net_node *nn;
1764 1765
@@ -1796,11 +1797,15 @@ static int o2net_accept_one(struct socket *sock)
1796 goto out; 1797 goto out;
1797 } 1798 }
1798 1799
1799 if (o2nm_this_node() > node->nd_num) { 1800 if (o2nm_this_node() >= node->nd_num) {
1800 mlog(ML_NOTICE, "unexpected connect attempted from a lower " 1801 local_node = o2nm_get_node_by_num(o2nm_this_node());
1801 "numbered node '%s' at " "%pI4:%d with num %u\n", 1802 mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' ("
1802 node->nd_name, &sin.sin_addr.s_addr, 1803 "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n",
1803 ntohs(sin.sin_port), node->nd_num); 1804 local_node->nd_name, local_node->nd_num,
1805 &(local_node->nd_ipv4_address),
1806 ntohs(local_node->nd_ipv4_port),
1807 node->nd_name, node->nd_num, &sin.sin_addr.s_addr,
1808 ntohs(sin.sin_port));
1804 ret = -EINVAL; 1809 ret = -EINVAL;
1805 goto out; 1810 goto out;
1806 } 1811 }
@@ -1857,6 +1862,8 @@ out:
1857 sock_release(new_sock); 1862 sock_release(new_sock);
1858 if (node) 1863 if (node)
1859 o2nm_node_put(node); 1864 o2nm_node_put(node);
1865 if (local_node)
1866 o2nm_node_put(local_node);
1860 if (sc) 1867 if (sc)
1861 sc_put(sc); 1868 sc_put(sc);
1862 return ret; 1869 return ret;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 94b97fc6a88e..ffb4c68dafa4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref)
511 511
512 atomic_dec(&dlm->res_cur_count); 512 atomic_dec(&dlm->res_cur_count);
513 513
514 dlm_put(dlm);
515
516 if (!hlist_unhashed(&res->hash_node) || 514 if (!hlist_unhashed(&res->hash_node) ||
517 !list_empty(&res->granted) || 515 !list_empty(&res->granted) ||
518 !list_empty(&res->converting) || 516 !list_empty(&res->converting) ||
@@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
585 res->migration_pending = 0; 583 res->migration_pending = 0;
586 res->inflight_locks = 0; 584 res->inflight_locks = 0;
587 585
588 /* put in dlm_lockres_release */
589 dlm_grab(dlm);
590 res->dlm = dlm; 586 res->dlm = dlm;
591 587
592 kref_init(&res->refs); 588 kref_init(&res->refs);
@@ -3050,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3050 /* check for pre-existing lock */ 3046 /* check for pre-existing lock */
3051 spin_lock(&dlm->spinlock); 3047 spin_lock(&dlm->spinlock);
3052 res = __dlm_lookup_lockres(dlm, name, namelen, hash); 3048 res = __dlm_lookup_lockres(dlm, name, namelen, hash);
3053 spin_lock(&dlm->master_lock);
3054
3055 if (res) { 3049 if (res) {
3056 spin_lock(&res->spinlock); 3050 spin_lock(&res->spinlock);
3057 if (res->state & DLM_LOCK_RES_RECOVERING) { 3051 if (res->state & DLM_LOCK_RES_RECOVERING) {
@@ -3069,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3069 spin_unlock(&res->spinlock); 3063 spin_unlock(&res->spinlock);
3070 } 3064 }
3071 3065
3066 spin_lock(&dlm->master_lock);
3072 /* ignore status. only nonzero status would BUG. */ 3067 /* ignore status. only nonzero status would BUG. */
3073 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, 3068 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle,
3074 name, namelen, 3069 name, namelen,
3075 migrate->new_master, 3070 migrate->new_master,
3076 migrate->master); 3071 migrate->master);
3077 3072
3078unlock:
3079 spin_unlock(&dlm->master_lock); 3073 spin_unlock(&dlm->master_lock);
3074unlock:
3080 spin_unlock(&dlm->spinlock); 3075 spin_unlock(&dlm->spinlock);
3081 3076
3082 if (oldmle) { 3077 if (oldmle) {
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 9dfaac73b36d..aaaffbcbe916 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
1997 struct list_head *queue; 1997 struct list_head *queue;
1998 struct dlm_lock *lock, *next; 1998 struct dlm_lock *lock, *next;
1999 1999
2000 assert_spin_locked(&dlm->spinlock);
2001 assert_spin_locked(&res->spinlock);
2000 res->state |= DLM_LOCK_RES_RECOVERING; 2002 res->state |= DLM_LOCK_RES_RECOVERING;
2001 if (!list_empty(&res->recovering)) { 2003 if (!list_empty(&res->recovering)) {
2002 mlog(0, 2004 mlog(0,
@@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2326 /* zero the lvb if necessary */ 2328 /* zero the lvb if necessary */
2327 dlm_revalidate_lvb(dlm, res, dead_node); 2329 dlm_revalidate_lvb(dlm, res, dead_node);
2328 if (res->owner == dead_node) { 2330 if (res->owner == dead_node) {
2329 if (res->state & DLM_LOCK_RES_DROPPING_REF) 2331 if (res->state & DLM_LOCK_RES_DROPPING_REF) {
2330 mlog(0, "%s:%.*s: owned by " 2332 mlog(ML_NOTICE, "Ignore %.*s for "
2331 "dead node %u, this node was " 2333 "recovery as it is being freed\n",
2332 "dropping its ref when it died. " 2334 res->lockname.len,
2333 "continue, dropping the flag.\n", 2335 res->lockname.name);
2334 dlm->name, res->lockname.len, 2336 } else
2335 res->lockname.name, dead_node); 2337 dlm_move_lockres_to_recovery_list(dlm,
2336 2338 res);
2337 /* the wake_up for this will happen when the
2338 * RECOVERING flag is dropped later */
2339 res->state &= ~DLM_LOCK_RES_DROPPING_REF;
2340 2339
2341 dlm_move_lockres_to_recovery_list(dlm, res);
2342 } else if (res->owner == dlm->node_num) { 2340 } else if (res->owner == dlm->node_num) {
2343 dlm_free_dead_locks(dlm, res, dead_node); 2341 dlm_free_dead_locks(dlm, res, dead_node);
2344 __dlm_lockres_calc_usage(dlm, res); 2342 __dlm_lockres_calc_usage(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d4f73ca68fe5..2211acf33d9b 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
92 * truly ready to be freed. */ 92 * truly ready to be freed. */
93int __dlm_lockres_unused(struct dlm_lock_resource *res) 93int __dlm_lockres_unused(struct dlm_lock_resource *res)
94{ 94{
95 if (!__dlm_lockres_has_locks(res) && 95 int bit;
96 (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { 96
97 /* try not to scan the bitmap unless the first two 97 if (__dlm_lockres_has_locks(res))
98 * conditions are already true */ 98 return 0;
99 int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); 99
100 if (bit >= O2NM_MAX_NODES) { 100 if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY)
101 /* since the bit for dlm->node_num is not 101 return 0;
102 * set, inflight_locks better be zero */ 102
103 BUG_ON(res->inflight_locks != 0); 103 if (res->state & DLM_LOCK_RES_RECOVERING)
104 return 1; 104 return 0;
105 } 105
106 } 106 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
107 return 0; 107 if (bit < O2NM_MAX_NODES)
108 return 0;
109
110 /*
111 * since the bit for dlm->node_num is not set, inflight_locks better
112 * be zero
113 */
114 BUG_ON(res->inflight_locks != 0);
115 return 1;
108} 116}
109 117
110 118
@@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
152 spin_unlock(&dlm->spinlock); 160 spin_unlock(&dlm->spinlock);
153} 161}
154 162
155static int dlm_purge_lockres(struct dlm_ctxt *dlm, 163static void dlm_purge_lockres(struct dlm_ctxt *dlm,
156 struct dlm_lock_resource *res) 164 struct dlm_lock_resource *res)
157{ 165{
158 int master; 166 int master;
159 int ret = 0; 167 int ret = 0;
160 168
161 spin_lock(&res->spinlock); 169 assert_spin_locked(&dlm->spinlock);
162 if (!__dlm_lockres_unused(res)) { 170 assert_spin_locked(&res->spinlock);
163 mlog(0, "%s:%.*s: tried to purge but not unused\n",
164 dlm->name, res->lockname.len, res->lockname.name);
165 __dlm_print_one_lock_resource(res);
166 spin_unlock(&res->spinlock);
167 BUG();
168 }
169
170 if (res->state & DLM_LOCK_RES_MIGRATING) {
171 mlog(0, "%s:%.*s: Delay dropref as this lockres is "
172 "being remastered\n", dlm->name, res->lockname.len,
173 res->lockname.name);
174 /* Re-add the lockres to the end of the purge list */
175 if (!list_empty(&res->purge)) {
176 list_del_init(&res->purge);
177 list_add_tail(&res->purge, &dlm->purge_list);
178 }
179 spin_unlock(&res->spinlock);
180 return 0;
181 }
182 171
183 master = (res->owner == dlm->node_num); 172 master = (res->owner == dlm->node_num);
184 173
185 if (!master)
186 res->state |= DLM_LOCK_RES_DROPPING_REF;
187 spin_unlock(&res->spinlock);
188 174
189 mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, 175 mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
190 res->lockname.name, master); 176 res->lockname.name, master);
191 177
192 if (!master) { 178 if (!master) {
179 res->state |= DLM_LOCK_RES_DROPPING_REF;
193 /* drop spinlock... retake below */ 180 /* drop spinlock... retake below */
181 spin_unlock(&res->spinlock);
194 spin_unlock(&dlm->spinlock); 182 spin_unlock(&dlm->spinlock);
195 183
196 spin_lock(&res->spinlock); 184 spin_lock(&res->spinlock);
@@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
208 mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", 196 mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
209 dlm->name, res->lockname.len, res->lockname.name, ret); 197 dlm->name, res->lockname.len, res->lockname.name, ret);
210 spin_lock(&dlm->spinlock); 198 spin_lock(&dlm->spinlock);
199 spin_lock(&res->spinlock);
211 } 200 }
212 201
213 spin_lock(&res->spinlock);
214 if (!list_empty(&res->purge)) { 202 if (!list_empty(&res->purge)) {
215 mlog(0, "removing lockres %.*s:%p from purgelist, " 203 mlog(0, "removing lockres %.*s:%p from purgelist, "
216 "master = %d\n", res->lockname.len, res->lockname.name, 204 "master = %d\n", res->lockname.len, res->lockname.name,
217 res, master); 205 res, master);
218 list_del_init(&res->purge); 206 list_del_init(&res->purge);
219 spin_unlock(&res->spinlock);
220 dlm_lockres_put(res); 207 dlm_lockres_put(res);
221 dlm->purge_count--; 208 dlm->purge_count--;
222 } else 209 }
223 spin_unlock(&res->spinlock); 210
211 if (!__dlm_lockres_unused(res)) {
212 mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n",
213 dlm->name, res->lockname.len, res->lockname.name);
214 __dlm_print_one_lock_resource(res);
215 BUG();
216 }
224 217
225 __dlm_unhash_lockres(res); 218 __dlm_unhash_lockres(res);
226 219
227 /* lockres is not in the hash now. drop the flag and wake up 220 /* lockres is not in the hash now. drop the flag and wake up
228 * any processes waiting in dlm_get_lock_resource. */ 221 * any processes waiting in dlm_get_lock_resource. */
229 if (!master) { 222 if (!master) {
230 spin_lock(&res->spinlock);
231 res->state &= ~DLM_LOCK_RES_DROPPING_REF; 223 res->state &= ~DLM_LOCK_RES_DROPPING_REF;
232 spin_unlock(&res->spinlock); 224 spin_unlock(&res->spinlock);
233 wake_up(&res->wq); 225 wake_up(&res->wq);
234 } 226 } else
235 return 0; 227 spin_unlock(&res->spinlock);
236} 228}
237 229
238static void dlm_run_purge_list(struct dlm_ctxt *dlm, 230static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
251 lockres = list_entry(dlm->purge_list.next, 243 lockres = list_entry(dlm->purge_list.next,
252 struct dlm_lock_resource, purge); 244 struct dlm_lock_resource, purge);
253 245
254 /* Status of the lockres *might* change so double
255 * check. If the lockres is unused, holding the dlm
256 * spinlock will prevent people from getting and more
257 * refs on it -- there's no need to keep the lockres
258 * spinlock. */
259 spin_lock(&lockres->spinlock); 246 spin_lock(&lockres->spinlock);
260 unused = __dlm_lockres_unused(lockres);
261 spin_unlock(&lockres->spinlock);
262
263 if (!unused)
264 continue;
265 247
266 purge_jiffies = lockres->last_used + 248 purge_jiffies = lockres->last_used +
267 msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); 249 msecs_to_jiffies(DLM_PURGE_INTERVAL_MS);
@@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
273 * in tail order, we can stop at the first 255 * in tail order, we can stop at the first
274 * unpurgable resource -- anyone added after 256 * unpurgable resource -- anyone added after
275 * him will have a greater last_used value */ 257 * him will have a greater last_used value */
258 spin_unlock(&lockres->spinlock);
276 break; 259 break;
277 } 260 }
278 261
262 /* Status of the lockres *might* change so double
263 * check. If the lockres is unused, holding the dlm
264 * spinlock will prevent people from getting and more
265 * refs on it. */
266 unused = __dlm_lockres_unused(lockres);
267 if (!unused ||
268 (lockres->state & DLM_LOCK_RES_MIGRATING)) {
269 mlog(0, "lockres %s:%.*s: is in use or "
270 "being remastered, used %d, state %d\n",
271 dlm->name, lockres->lockname.len,
272 lockres->lockname.name, !unused, lockres->state);
273 list_move_tail(&dlm->purge_list, &lockres->purge);
274 spin_unlock(&lockres->spinlock);
275 continue;
276 }
277
279 dlm_lockres_get(lockres); 278 dlm_lockres_get(lockres);
280 279
281 /* This may drop and reacquire the dlm spinlock if it 280 dlm_purge_lockres(dlm, lockres);
282 * has to do migration. */
283 if (dlm_purge_lockres(dlm, lockres))
284 BUG();
285 281
286 dlm_lockres_put(lockres); 282 dlm_lockres_put(lockres);
287 283
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4331f57e9fde..9a74542e1a05 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -36,6 +36,7 @@
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37#include <linux/falloc.h> 37#include <linux/falloc.h>
38#include <linux/quotaops.h> 38#include <linux/quotaops.h>
39#include <linux/blkdev.h>
39 40
40#define MLOG_MASK_PREFIX ML_INODE 41#define MLOG_MASK_PREFIX ML_INODE
41#include <cluster/masklog.h> 42#include <cluster/masklog.h>
@@ -63,12 +64,6 @@
63 64
64#include "buffer_head_io.h" 65#include "buffer_head_io.h"
65 66
66static int ocfs2_sync_inode(struct inode *inode)
67{
68 filemap_fdatawrite(inode->i_mapping);
69 return sync_mapping_buffers(inode->i_mapping);
70}
71
72static int ocfs2_init_file_private(struct inode *inode, struct file *file) 67static int ocfs2_init_file_private(struct inode *inode, struct file *file)
73{ 68{
74 struct ocfs2_file_private *fp; 69 struct ocfs2_file_private *fp;
@@ -186,12 +181,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
186 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, 181 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
187 dentry->d_name.len, dentry->d_name.name); 182 dentry->d_name.len, dentry->d_name.name);
188 183
189 err = ocfs2_sync_inode(dentry->d_inode); 184 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
190 if (err) 185 /*
191 goto bail; 186 * We still have to flush drive's caches to get data to the
192 187 * platter
193 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 188 */
189 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
190 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
191 NULL, BLKDEV_IFL_WAIT);
194 goto bail; 192 goto bail;
193 }
195 194
196 journal = osb->journal->j_journal; 195 journal = osb->journal->j_journal;
197 err = jbd2_journal_force_commit(journal); 196 err = jbd2_journal_force_commit(journal);
@@ -774,7 +773,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); 773 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
775 BUG_ON(abs_from & (inode->i_blkbits - 1)); 774 BUG_ON(abs_from & (inode->i_blkbits - 1));
776 775
777 page = grab_cache_page(mapping, index); 776 page = find_or_create_page(mapping, index, GFP_NOFS);
778 if (!page) { 777 if (!page) {
779 ret = -ENOMEM; 778 ret = -ENOMEM;
780 mlog_errno(ret); 779 mlog_errno(ret);
@@ -2306,17 +2305,6 @@ relock:
2306 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2305 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2307 ppos, count, ocount); 2306 ppos, count, ocount);
2308 if (written < 0) { 2307 if (written < 0) {
2309 /*
2310 * direct write may have instantiated a few
2311 * blocks outside i_size. Trim these off again.
2312 * Don't need i_size_read because we hold i_mutex.
2313 *
2314 * XXX(truncate): this looks buggy because ocfs2 did not
2315 * actually implement ->truncate. Take a look at
2316 * the new truncate sequence and update this accordingly
2317 */
2318 if (*ppos + count > inode->i_size)
2319 truncate_setsize(inode, inode->i_size);
2320 ret = written; 2308 ret = written;
2321 goto out_dio; 2309 goto out_dio;
2322 } 2310 }
@@ -2332,7 +2320,7 @@ out_dio:
2332 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2320 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2333 2321
2334 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || 2322 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
2335 ((file->f_flags & O_DIRECT) && has_refcount)) { 2323 ((file->f_flags & O_DIRECT) && !direct_io)) {
2336 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2324 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2337 pos + count - 1); 2325 pos + count - 1);
2338 if (ret < 0) 2326 if (ret < 0)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0492464916b1..eece3e05d9d0 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
488 OCFS2_BH_IGNORE_CACHE); 488 OCFS2_BH_IGNORE_CACHE);
489 } else { 489 } else {
490 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); 490 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
491 if (!status) 491 /*
492 * If buffer is in jbd, then its checksum may not have been
493 * computed as yet.
494 */
495 if (!status && !buffer_jbd(bh))
492 status = ocfs2_validate_inode_block(osb->sb, bh); 496 status = ocfs2_validate_inode_block(osb->sb, bh);
493 } 497 }
494 if (status < 0) { 498 if (status < 0) {
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 6de5a869db30..0bc477a3aeb8 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -46,27 +46,24 @@ struct ocfs2_inode_info
46 /* These fields are protected by ip_lock */ 46 /* These fields are protected by ip_lock */
47 spinlock_t ip_lock; 47 spinlock_t ip_lock;
48 u32 ip_open_count; 48 u32 ip_open_count;
49 u32 ip_clusters;
50 struct list_head ip_io_markers; 49 struct list_head ip_io_markers;
50 u32 ip_clusters;
51 51
52 u16 ip_dyn_features;
52 struct mutex ip_io_mutex; 53 struct mutex ip_io_mutex;
53
54 u32 ip_flags; /* see below */ 54 u32 ip_flags; /* see below */
55 u32 ip_attr; /* inode attributes */ 55 u32 ip_attr; /* inode attributes */
56 u16 ip_dyn_features;
57 56
58 /* protected by recovery_lock. */ 57 /* protected by recovery_lock. */
59 struct inode *ip_next_orphan; 58 struct inode *ip_next_orphan;
60 59
61 u32 ip_dir_start_lookup;
62
63 struct ocfs2_caching_info ip_metadata_cache; 60 struct ocfs2_caching_info ip_metadata_cache;
64
65 struct ocfs2_extent_map ip_extent_map; 61 struct ocfs2_extent_map ip_extent_map;
66
67 struct inode vfs_inode; 62 struct inode vfs_inode;
68 struct jbd2_inode ip_jinode; 63 struct jbd2_inode ip_jinode;
69 64
65 u32 ip_dir_start_lookup;
66
70 /* Only valid if the inode is the dir. */ 67 /* Only valid if the inode is the dir. */
71 u32 ip_last_used_slot; 68 u32 ip_last_used_slot;
72 u64 ip_last_used_group; 69 u64 ip_last_used_group;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7d9d9c132cef..7a4868196152 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -26,6 +26,26 @@
26 26
27#include <linux/ext2_fs.h> 27#include <linux/ext2_fs.h>
28 28
29#define o2info_from_user(a, b) \
30 copy_from_user(&(a), (b), sizeof(a))
31#define o2info_to_user(a, b) \
32 copy_to_user((typeof(a) __user *)b, &(a), sizeof(a))
33
34/*
35 * This call is void because we are already reporting an error that may
36 * be -EFAULT. The error will be returned from the ioctl(2) call. It's
37 * just a best-effort to tell userspace that this request caused the error.
38 */
39static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq,
40 struct ocfs2_info_request __user *req)
41{
42 kreq->ir_flags |= OCFS2_INFO_FL_ERROR;
43 (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags));
44}
45
46#define o2info_set_request_error(a, b) \
47 __o2info_set_request_error((struct ocfs2_info_request *)&(a), b)
48
29static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) 49static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
30{ 50{
31 int status; 51 int status;
@@ -109,6 +129,328 @@ bail:
109 return status; 129 return status;
110} 130}
111 131
132int ocfs2_info_handle_blocksize(struct inode *inode,
133 struct ocfs2_info_request __user *req)
134{
135 int status = -EFAULT;
136 struct ocfs2_info_blocksize oib;
137
138 if (o2info_from_user(oib, req))
139 goto bail;
140
141 oib.ib_blocksize = inode->i_sb->s_blocksize;
142 oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED;
143
144 if (o2info_to_user(oib, req))
145 goto bail;
146
147 status = 0;
148bail:
149 if (status)
150 o2info_set_request_error(oib, req);
151
152 return status;
153}
154
155int ocfs2_info_handle_clustersize(struct inode *inode,
156 struct ocfs2_info_request __user *req)
157{
158 int status = -EFAULT;
159 struct ocfs2_info_clustersize oic;
160 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
161
162 if (o2info_from_user(oic, req))
163 goto bail;
164
165 oic.ic_clustersize = osb->s_clustersize;
166 oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED;
167
168 if (o2info_to_user(oic, req))
169 goto bail;
170
171 status = 0;
172bail:
173 if (status)
174 o2info_set_request_error(oic, req);
175
176 return status;
177}
178
179int ocfs2_info_handle_maxslots(struct inode *inode,
180 struct ocfs2_info_request __user *req)
181{
182 int status = -EFAULT;
183 struct ocfs2_info_maxslots oim;
184 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
185
186 if (o2info_from_user(oim, req))
187 goto bail;
188
189 oim.im_max_slots = osb->max_slots;
190 oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED;
191
192 if (o2info_to_user(oim, req))
193 goto bail;
194
195 status = 0;
196bail:
197 if (status)
198 o2info_set_request_error(oim, req);
199
200 return status;
201}
202
203int ocfs2_info_handle_label(struct inode *inode,
204 struct ocfs2_info_request __user *req)
205{
206 int status = -EFAULT;
207 struct ocfs2_info_label oil;
208 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
209
210 if (o2info_from_user(oil, req))
211 goto bail;
212
213 memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN);
214 oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED;
215
216 if (o2info_to_user(oil, req))
217 goto bail;
218
219 status = 0;
220bail:
221 if (status)
222 o2info_set_request_error(oil, req);
223
224 return status;
225}
226
227int ocfs2_info_handle_uuid(struct inode *inode,
228 struct ocfs2_info_request __user *req)
229{
230 int status = -EFAULT;
231 struct ocfs2_info_uuid oiu;
232 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
233
234 if (o2info_from_user(oiu, req))
235 goto bail;
236
237 memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1);
238 oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED;
239
240 if (o2info_to_user(oiu, req))
241 goto bail;
242
243 status = 0;
244bail:
245 if (status)
246 o2info_set_request_error(oiu, req);
247
248 return status;
249}
250
251int ocfs2_info_handle_fs_features(struct inode *inode,
252 struct ocfs2_info_request __user *req)
253{
254 int status = -EFAULT;
255 struct ocfs2_info_fs_features oif;
256 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
257
258 if (o2info_from_user(oif, req))
259 goto bail;
260
261 oif.if_compat_features = osb->s_feature_compat;
262 oif.if_incompat_features = osb->s_feature_incompat;
263 oif.if_ro_compat_features = osb->s_feature_ro_compat;
264 oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED;
265
266 if (o2info_to_user(oif, req))
267 goto bail;
268
269 status = 0;
270bail:
271 if (status)
272 o2info_set_request_error(oif, req);
273
274 return status;
275}
276
277int ocfs2_info_handle_journal_size(struct inode *inode,
278 struct ocfs2_info_request __user *req)
279{
280 int status = -EFAULT;
281 struct ocfs2_info_journal_size oij;
282 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
283
284 if (o2info_from_user(oij, req))
285 goto bail;
286
287 oij.ij_journal_size = osb->journal->j_inode->i_size;
288
289 oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED;
290
291 if (o2info_to_user(oij, req))
292 goto bail;
293
294 status = 0;
295bail:
296 if (status)
297 o2info_set_request_error(oij, req);
298
299 return status;
300}
301
302int ocfs2_info_handle_unknown(struct inode *inode,
303 struct ocfs2_info_request __user *req)
304{
305 int status = -EFAULT;
306 struct ocfs2_info_request oir;
307
308 if (o2info_from_user(oir, req))
309 goto bail;
310
311 oir.ir_flags &= ~OCFS2_INFO_FL_FILLED;
312
313 if (o2info_to_user(oir, req))
314 goto bail;
315
316 status = 0;
317bail:
318 if (status)
319 o2info_set_request_error(oir, req);
320
321 return status;
322}
323
324/*
325 * Validate and distinguish OCFS2_IOC_INFO requests.
326 *
327 * - validate the magic number.
328 * - distinguish different requests.
329 * - validate size of different requests.
330 */
331int ocfs2_info_handle_request(struct inode *inode,
332 struct ocfs2_info_request __user *req)
333{
334 int status = -EFAULT;
335 struct ocfs2_info_request oir;
336
337 if (o2info_from_user(oir, req))
338 goto bail;
339
340 status = -EINVAL;
341 if (oir.ir_magic != OCFS2_INFO_MAGIC)
342 goto bail;
343
344 switch (oir.ir_code) {
345 case OCFS2_INFO_BLOCKSIZE:
346 if (oir.ir_size == sizeof(struct ocfs2_info_blocksize))
347 status = ocfs2_info_handle_blocksize(inode, req);
348 break;
349 case OCFS2_INFO_CLUSTERSIZE:
350 if (oir.ir_size == sizeof(struct ocfs2_info_clustersize))
351 status = ocfs2_info_handle_clustersize(inode, req);
352 break;
353 case OCFS2_INFO_MAXSLOTS:
354 if (oir.ir_size == sizeof(struct ocfs2_info_maxslots))
355 status = ocfs2_info_handle_maxslots(inode, req);
356 break;
357 case OCFS2_INFO_LABEL:
358 if (oir.ir_size == sizeof(struct ocfs2_info_label))
359 status = ocfs2_info_handle_label(inode, req);
360 break;
361 case OCFS2_INFO_UUID:
362 if (oir.ir_size == sizeof(struct ocfs2_info_uuid))
363 status = ocfs2_info_handle_uuid(inode, req);
364 break;
365 case OCFS2_INFO_FS_FEATURES:
366 if (oir.ir_size == sizeof(struct ocfs2_info_fs_features))
367 status = ocfs2_info_handle_fs_features(inode, req);
368 break;
369 case OCFS2_INFO_JOURNAL_SIZE:
370 if (oir.ir_size == sizeof(struct ocfs2_info_journal_size))
371 status = ocfs2_info_handle_journal_size(inode, req);
372 break;
373 default:
374 status = ocfs2_info_handle_unknown(inode, req);
375 break;
376 }
377
378bail:
379 return status;
380}
381
382int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx,
383 u64 *req_addr, int compat_flag)
384{
385 int status = -EFAULT;
386 u64 __user *bp = NULL;
387
388 if (compat_flag) {
389#ifdef CONFIG_COMPAT
390 /*
391 * pointer bp stores the base address of a pointers array,
392 * which collects all addresses of separate request.
393 */
394 bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests);
395#else
396 BUG();
397#endif
398 } else
399 bp = (u64 __user *)(unsigned long)(info->oi_requests);
400
401 if (o2info_from_user(*req_addr, bp + idx))
402 goto bail;
403
404 status = 0;
405bail:
406 return status;
407}
408
409/*
410 * OCFS2_IOC_INFO handles an array of requests passed from userspace.
411 *
412 * ocfs2_info_handle() recevies a large info aggregation, grab and
413 * validate the request count from header, then break it into small
414 * pieces, later specific handlers can handle them one by one.
415 *
416 * Idea here is to make each separate request small enough to ensure
417 * a better backward&forward compatibility, since a small piece of
418 * request will be less likely to be broken if disk layout get changed.
419 */
420int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info,
421 int compat_flag)
422{
423 int i, status = 0;
424 u64 req_addr;
425 struct ocfs2_info_request __user *reqp;
426
427 if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) ||
428 (!info->oi_requests)) {
429 status = -EINVAL;
430 goto bail;
431 }
432
433 for (i = 0; i < info->oi_count; i++) {
434
435 status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag);
436 if (status)
437 break;
438
439 reqp = (struct ocfs2_info_request *)(unsigned long)req_addr;
440 if (!reqp) {
441 status = -EINVAL;
442 goto bail;
443 }
444
445 status = ocfs2_info_handle_request(inode, reqp);
446 if (status)
447 break;
448 }
449
450bail:
451 return status;
452}
453
112long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 454long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
113{ 455{
114 struct inode *inode = filp->f_path.dentry->d_inode; 456 struct inode *inode = filp->f_path.dentry->d_inode;
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
120 struct reflink_arguments args; 462 struct reflink_arguments args;
121 const char *old_path, *new_path; 463 const char *old_path, *new_path;
122 bool preserve; 464 bool preserve;
465 struct ocfs2_info info;
123 466
124 switch (cmd) { 467 switch (cmd) {
125 case OCFS2_IOC_GETFLAGS: 468 case OCFS2_IOC_GETFLAGS:
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
174 preserve = (args.preserve != 0); 517 preserve = (args.preserve != 0);
175 518
176 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); 519 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
520 case OCFS2_IOC_INFO:
521 if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
522 sizeof(struct ocfs2_info)))
523 return -EFAULT;
524
525 return ocfs2_info_handle(inode, &info, 0);
177 default: 526 default:
178 return -ENOTTY; 527 return -ENOTTY;
179 } 528 }
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
185 bool preserve; 534 bool preserve;
186 struct reflink_arguments args; 535 struct reflink_arguments args;
187 struct inode *inode = file->f_path.dentry->d_inode; 536 struct inode *inode = file->f_path.dentry->d_inode;
537 struct ocfs2_info info;
188 538
189 switch (cmd) { 539 switch (cmd) {
190 case OCFS2_IOC32_GETFLAGS: 540 case OCFS2_IOC32_GETFLAGS:
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
209 559
210 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), 560 return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
211 compat_ptr(args.new_path), preserve); 561 compat_ptr(args.new_path), preserve);
562 case OCFS2_IOC_INFO:
563 if (copy_from_user(&info, (struct ocfs2_info __user *)arg,
564 sizeof(struct ocfs2_info)))
565 return -EFAULT;
566
567 return ocfs2_info_handle(inode, &info, 1);
212 default: 568 default:
213 return -ENOIOCTLCMD; 569 return -ENOIOCTLCMD;
214 } 570 }
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9b57c0350ff9..faa2303dbf0a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
301{ 301{
302 int status = 0; 302 int status = 0;
303 unsigned int flushed; 303 unsigned int flushed;
304 unsigned long old_id;
305 struct ocfs2_journal *journal = NULL; 304 struct ocfs2_journal *journal = NULL;
306 305
307 mlog_entry_void(); 306 mlog_entry_void();
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
326 goto finally; 325 goto finally;
327 } 326 }
328 327
329 old_id = ocfs2_inc_trans_id(journal); 328 ocfs2_inc_trans_id(journal);
330 329
331 flushed = atomic_read(&journal->j_num_trans); 330 flushed = atomic_read(&journal->j_num_trans);
332 atomic_set(&journal->j_num_trans, 0); 331 atomic_set(&journal->j_num_trans, 0);
@@ -342,9 +341,6 @@ finally:
342 return status; 341 return status;
343} 342}
344 343
345/* pass it NULL and it will allocate a new handle object for you. If
346 * you pass it a handle however, it may still return error, in which
347 * case it has free'd the passed handle for you. */
348handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) 344handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
349{ 345{
350 journal_t *journal = osb->journal->j_journal; 346 journal_t *journal = osb->journal->j_journal;
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1888 1884
1889 os = &osb->osb_orphan_scan; 1885 os = &osb->osb_orphan_scan;
1890 1886
1887 mlog(0, "Begin orphan scan\n");
1888
1891 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) 1889 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1892 goto out; 1890 goto out;
1893 1891
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1920unlock: 1918unlock:
1921 ocfs2_orphan_scan_unlock(osb, seqno); 1919 ocfs2_orphan_scan_unlock(osb, seqno);
1922out: 1920out:
1921 mlog(0, "Orphan scan completed\n");
1923 return; 1922 return;
1924} 1923}
1925 1924
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index b5baaa8e710f..43e56b97f9c0 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -67,11 +67,12 @@ struct ocfs2_journal {
67 struct buffer_head *j_bh; /* Journal disk inode block */ 67 struct buffer_head *j_bh; /* Journal disk inode block */
68 atomic_t j_num_trans; /* Number of transactions 68 atomic_t j_num_trans; /* Number of transactions
69 * currently in the system. */ 69 * currently in the system. */
70 spinlock_t j_lock;
70 unsigned long j_trans_id; 71 unsigned long j_trans_id;
71 struct rw_semaphore j_trans_barrier; 72 struct rw_semaphore j_trans_barrier;
72 wait_queue_head_t j_checkpointed; 73 wait_queue_head_t j_checkpointed;
73 74
74 spinlock_t j_lock; 75 /* both fields protected by j_lock*/
75 struct list_head j_la_cleanups; 76 struct list_head j_la_cleanups;
76 struct work_struct j_recovery_work; 77 struct work_struct j_recovery_work;
77}; 78};
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index b04d6961c0d4..7e32db9c2c99 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -75,9 +75,11 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
75 /* 75 /*
76 * Another node might have truncated while we were waiting on 76 * Another node might have truncated while we were waiting on
77 * cluster locks. 77 * cluster locks.
78 * We don't check size == 0 before the shift. This is borrowed
79 * from do_generic_file_read.
78 */ 80 */
79 last_index = size >> PAGE_CACHE_SHIFT; 81 last_index = (size - 1) >> PAGE_CACHE_SHIFT;
80 if (page->index > last_index) { 82 if (unlikely(!size || page->index > last_index)) {
81 ret = -EINVAL; 83 ret = -EINVAL;
82 goto out; 84 goto out;
83 } 85 }
@@ -108,7 +110,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
108 * because the "write" would invalidate their data. 110 * because the "write" would invalidate their data.
109 */ 111 */
110 if (page->index == last_index) 112 if (page->index == last_index)
111 len = size & ~PAGE_CACHE_MASK; 113 len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
112 114
113 ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, 115 ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
114 &fsdata, di_bh, page); 116 &fsdata, di_bh, page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f171b51a74f7..a00dda2e4f16 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -472,32 +472,23 @@ leave:
472 return status; 472 return status;
473} 473}
474 474
475static int ocfs2_mknod_locked(struct ocfs2_super *osb, 475static int __ocfs2_mknod_locked(struct inode *dir,
476 struct inode *dir, 476 struct inode *inode,
477 struct inode *inode, 477 dev_t dev,
478 dev_t dev, 478 struct buffer_head **new_fe_bh,
479 struct buffer_head **new_fe_bh, 479 struct buffer_head *parent_fe_bh,
480 struct buffer_head *parent_fe_bh, 480 handle_t *handle,
481 handle_t *handle, 481 struct ocfs2_alloc_context *inode_ac,
482 struct ocfs2_alloc_context *inode_ac) 482 u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
483{ 483{
484 int status = 0; 484 int status = 0;
485 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
485 struct ocfs2_dinode *fe = NULL; 486 struct ocfs2_dinode *fe = NULL;
486 struct ocfs2_extent_list *fel; 487 struct ocfs2_extent_list *fel;
487 u64 suballoc_loc, fe_blkno = 0;
488 u16 suballoc_bit;
489 u16 feat; 488 u16 feat;
490 489
491 *new_fe_bh = NULL; 490 *new_fe_bh = NULL;
492 491
493 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
494 inode_ac, &suballoc_loc,
495 &suballoc_bit, &fe_blkno);
496 if (status < 0) {
497 mlog_errno(status);
498 goto leave;
499 }
500
501 /* populate as many fields early on as possible - many of 492 /* populate as many fields early on as possible - many of
502 * these are used by the support functions here and in 493 * these are used by the support functions here and in
503 * callers. */ 494 * callers. */
@@ -591,6 +582,34 @@ leave:
591 return status; 582 return status;
592} 583}
593 584
585static int ocfs2_mknod_locked(struct ocfs2_super *osb,
586 struct inode *dir,
587 struct inode *inode,
588 dev_t dev,
589 struct buffer_head **new_fe_bh,
590 struct buffer_head *parent_fe_bh,
591 handle_t *handle,
592 struct ocfs2_alloc_context *inode_ac)
593{
594 int status = 0;
595 u64 suballoc_loc, fe_blkno = 0;
596 u16 suballoc_bit;
597
598 *new_fe_bh = NULL;
599
600 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
601 inode_ac, &suballoc_loc,
602 &suballoc_bit, &fe_blkno);
603 if (status < 0) {
604 mlog_errno(status);
605 return status;
606 }
607
608 return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
609 parent_fe_bh, handle, inode_ac,
610 fe_blkno, suballoc_loc, suballoc_bit);
611}
612
594static int ocfs2_mkdir(struct inode *dir, 613static int ocfs2_mkdir(struct inode *dir,
595 struct dentry *dentry, 614 struct dentry *dentry,
596 int mode) 615 int mode)
@@ -1852,61 +1871,117 @@ bail:
1852 return status; 1871 return status;
1853} 1872}
1854 1873
1855static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 1874static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
1856 struct inode **ret_orphan_dir, 1875 struct inode **ret_orphan_dir,
1857 u64 blkno, 1876 struct buffer_head **ret_orphan_dir_bh)
1858 char *name,
1859 struct ocfs2_dir_lookup_result *lookup)
1860{ 1877{
1861 struct inode *orphan_dir_inode; 1878 struct inode *orphan_dir_inode;
1862 struct buffer_head *orphan_dir_bh = NULL; 1879 struct buffer_head *orphan_dir_bh = NULL;
1863 int status = 0; 1880 int ret = 0;
1864
1865 status = ocfs2_blkno_stringify(blkno, name);
1866 if (status < 0) {
1867 mlog_errno(status);
1868 return status;
1869 }
1870 1881
1871 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 1882 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1872 ORPHAN_DIR_SYSTEM_INODE, 1883 ORPHAN_DIR_SYSTEM_INODE,
1873 osb->slot_num); 1884 osb->slot_num);
1874 if (!orphan_dir_inode) { 1885 if (!orphan_dir_inode) {
1875 status = -ENOENT; 1886 ret = -ENOENT;
1876 mlog_errno(status); 1887 mlog_errno(ret);
1877 return status; 1888 return ret;
1878 } 1889 }
1879 1890
1880 mutex_lock(&orphan_dir_inode->i_mutex); 1891 mutex_lock(&orphan_dir_inode->i_mutex);
1881 1892
1882 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 1893 ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
1883 if (status < 0) { 1894 if (ret < 0) {
1884 mlog_errno(status); 1895 mutex_unlock(&orphan_dir_inode->i_mutex);
1885 goto leave; 1896 iput(orphan_dir_inode);
1897
1898 mlog_errno(ret);
1899 return ret;
1886 } 1900 }
1887 1901
1888 status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, 1902 *ret_orphan_dir = orphan_dir_inode;
1889 orphan_dir_bh, name, 1903 *ret_orphan_dir_bh = orphan_dir_bh;
1890 OCFS2_ORPHAN_NAMELEN, lookup);
1891 if (status < 0) {
1892 ocfs2_inode_unlock(orphan_dir_inode, 1);
1893 1904
1894 mlog_errno(status); 1905 return 0;
1895 goto leave; 1906}
1907
1908static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
1909 struct buffer_head *orphan_dir_bh,
1910 u64 blkno,
1911 char *name,
1912 struct ocfs2_dir_lookup_result *lookup)
1913{
1914 int ret;
1915 struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
1916
1917 ret = ocfs2_blkno_stringify(blkno, name);
1918 if (ret < 0) {
1919 mlog_errno(ret);
1920 return ret;
1921 }
1922
1923 ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
1924 orphan_dir_bh, name,
1925 OCFS2_ORPHAN_NAMELEN, lookup);
1926 if (ret < 0) {
1927 mlog_errno(ret);
1928 return ret;
1929 }
1930
1931 return 0;
1932}
1933
1934/**
1935 * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for
1936 * insertion of an orphan.
1937 * @osb: ocfs2 file system
1938 * @ret_orphan_dir: Orphan dir inode - returned locked!
1939 * @blkno: Actual block number of the inode to be inserted into orphan dir.
1940 * @lookup: dir lookup result, to be passed back into functions like
1941 * ocfs2_orphan_add
1942 *
1943 * Returns zero on success and the ret_orphan_dir, name and lookup
1944 * fields will be populated.
1945 *
1946 * Returns non-zero on failure.
1947 */
1948static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1949 struct inode **ret_orphan_dir,
1950 u64 blkno,
1951 char *name,
1952 struct ocfs2_dir_lookup_result *lookup)
1953{
1954 struct inode *orphan_dir_inode = NULL;
1955 struct buffer_head *orphan_dir_bh = NULL;
1956 int ret = 0;
1957
1958 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
1959 &orphan_dir_bh);
1960 if (ret < 0) {
1961 mlog_errno(ret);
1962 return ret;
1963 }
1964
1965 ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
1966 blkno, name, lookup);
1967 if (ret < 0) {
1968 mlog_errno(ret);
1969 goto out;
1896 } 1970 }
1897 1971
1898 *ret_orphan_dir = orphan_dir_inode; 1972 *ret_orphan_dir = orphan_dir_inode;
1899 1973
1900leave: 1974out:
1901 if (status) { 1975 brelse(orphan_dir_bh);
1976
1977 if (ret) {
1978 ocfs2_inode_unlock(orphan_dir_inode, 1);
1902 mutex_unlock(&orphan_dir_inode->i_mutex); 1979 mutex_unlock(&orphan_dir_inode->i_mutex);
1903 iput(orphan_dir_inode); 1980 iput(orphan_dir_inode);
1904 } 1981 }
1905 1982
1906 brelse(orphan_dir_bh); 1983 mlog_exit(ret);
1907 1984 return ret;
1908 mlog_exit(status);
1909 return status;
1910} 1985}
1911 1986
1912static int ocfs2_orphan_add(struct ocfs2_super *osb, 1987static int ocfs2_orphan_add(struct ocfs2_super *osb,
@@ -2053,6 +2128,99 @@ leave:
2053 return status; 2128 return status;
2054} 2129}
2055 2130
2131/**
2132 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly
2133 * allocated file. This is different from the typical 'add to orphan dir'
2134 * operation in that the inode does not yet exist. This is a problem because
2135 * the orphan dir stringifies the inode block number to come up with it's
2136 * dirent. Obviously if the inode does not yet exist we have a chicken and egg
2137 * problem. This function works around it by calling deeper into the orphan
2138 * and suballoc code than other callers. Use this only by necessity.
2139 * @dir: The directory which this inode will ultimately wind up under - not the
2140 * orphan dir!
2141 * @dir_bh: buffer_head the @dir inode block
2142 * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled
2143 * with the string to be used for orphan dirent. Pass back to the orphan dir
2144 * code.
2145 * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan
2146 * dir code.
2147 * @ret_di_blkno: block number where the new inode will be allocated.
2148 * @orphan_insert: Dir insert context to be passed back into orphan dir code.
2149 * @ret_inode_ac: Inode alloc context to be passed back to the allocator.
2150 *
2151 * Returns zero on success and the ret_orphan_dir, name and lookup
2152 * fields will be populated.
2153 *
2154 * Returns non-zero on failure.
2155 */
2156static int ocfs2_prep_new_orphaned_file(struct inode *dir,
2157 struct buffer_head *dir_bh,
2158 char *orphan_name,
2159 struct inode **ret_orphan_dir,
2160 u64 *ret_di_blkno,
2161 struct ocfs2_dir_lookup_result *orphan_insert,
2162 struct ocfs2_alloc_context **ret_inode_ac)
2163{
2164 int ret;
2165 u64 di_blkno;
2166 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2167 struct inode *orphan_dir = NULL;
2168 struct buffer_head *orphan_dir_bh = NULL;
2169 struct ocfs2_alloc_context *inode_ac = NULL;
2170
2171 ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh);
2172 if (ret < 0) {
2173 mlog_errno(ret);
2174 return ret;
2175 }
2176
2177 /* reserve an inode spot */
2178 ret = ocfs2_reserve_new_inode(osb, &inode_ac);
2179 if (ret < 0) {
2180 if (ret != -ENOSPC)
2181 mlog_errno(ret);
2182 goto out;
2183 }
2184
2185 ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac,
2186 &di_blkno);
2187 if (ret) {
2188 mlog_errno(ret);
2189 goto out;
2190 }
2191
2192 ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
2193 di_blkno, orphan_name, orphan_insert);
2194 if (ret < 0) {
2195 mlog_errno(ret);
2196 goto out;
2197 }
2198
2199out:
2200 if (ret == 0) {
2201 *ret_orphan_dir = orphan_dir;
2202 *ret_di_blkno = di_blkno;
2203 *ret_inode_ac = inode_ac;
2204 /*
2205 * orphan_name and orphan_insert are already up to
2206 * date via prepare_orphan_dir
2207 */
2208 } else {
2209 /* Unroll reserve_new_inode* */
2210 if (inode_ac)
2211 ocfs2_free_alloc_context(inode_ac);
2212
2213 /* Unroll orphan dir locking */
2214 mutex_unlock(&orphan_dir->i_mutex);
2215 ocfs2_inode_unlock(orphan_dir, 1);
2216 iput(orphan_dir);
2217 }
2218
2219 brelse(orphan_dir_bh);
2220
2221 return 0;
2222}
2223
2056int ocfs2_create_inode_in_orphan(struct inode *dir, 2224int ocfs2_create_inode_in_orphan(struct inode *dir,
2057 int mode, 2225 int mode,
2058 struct inode **new_inode) 2226 struct inode **new_inode)
@@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2068 struct buffer_head *new_di_bh = NULL; 2236 struct buffer_head *new_di_bh = NULL;
2069 struct ocfs2_alloc_context *inode_ac = NULL; 2237 struct ocfs2_alloc_context *inode_ac = NULL;
2070 struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; 2238 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2239 u64 uninitialized_var(di_blkno), suballoc_loc;
2240 u16 suballoc_bit;
2071 2241
2072 status = ocfs2_inode_lock(dir, &parent_di_bh, 1); 2242 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2073 if (status < 0) { 2243 if (status < 0) {
@@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2076 return status; 2246 return status;
2077 } 2247 }
2078 2248
2079 /* 2249 status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh,
2080 * We give the orphan dir the root blkno to fake an orphan name, 2250 orphan_name, &orphan_dir,
2081 * and allocate enough space for our insertion. 2251 &di_blkno, &orphan_insert, &inode_ac);
2082 */
2083 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
2084 osb->root_blkno,
2085 orphan_name, &orphan_insert);
2086 if (status < 0) {
2087 mlog_errno(status);
2088 goto leave;
2089 }
2090
2091 /* reserve an inode spot */
2092 status = ocfs2_reserve_new_inode(osb, &inode_ac);
2093 if (status < 0) { 2252 if (status < 0) {
2094 if (status != -ENOSPC) 2253 if (status != -ENOSPC)
2095 mlog_errno(status); 2254 mlog_errno(status);
@@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2116 goto leave; 2275 goto leave;
2117 did_quota_inode = 1; 2276 did_quota_inode = 1;
2118 2277
2119 inode->i_nlink = 0; 2278 status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac,
2120 /* do the real work now. */ 2279 &suballoc_loc,
2121 status = ocfs2_mknod_locked(osb, dir, inode, 2280 &suballoc_bit, di_blkno);
2122 0, &new_di_bh, parent_di_bh, handle,
2123 inode_ac);
2124 if (status < 0) { 2281 if (status < 0) {
2125 mlog_errno(status); 2282 mlog_errno(status);
2126 goto leave; 2283 goto leave;
2127 } 2284 }
2128 2285
2129 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); 2286 inode->i_nlink = 0;
2287 /* do the real work now. */
2288 status = __ocfs2_mknod_locked(dir, inode,
2289 0, &new_di_bh, parent_di_bh, handle,
2290 inode_ac, di_blkno, suballoc_loc,
2291 suballoc_bit);
2130 if (status < 0) { 2292 if (status < 0) {
2131 mlog_errno(status); 2293 mlog_errno(status);
2132 goto leave; 2294 goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c67003b6b5a2..65739b3b3276 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
150struct ocfs2_lock_res { 150struct ocfs2_lock_res {
151 void *l_priv; 151 void *l_priv;
152 struct ocfs2_lock_res_ops *l_ops; 152 struct ocfs2_lock_res_ops *l_ops;
153 spinlock_t l_lock; 153
154 154
155 struct list_head l_blocked_list; 155 struct list_head l_blocked_list;
156 struct list_head l_mask_waiters; 156 struct list_head l_mask_waiters;
157 157
158 enum ocfs2_lock_type l_type;
159 unsigned long l_flags; 158 unsigned long l_flags;
160 char l_name[OCFS2_LOCK_ID_MAX_LEN]; 159 char l_name[OCFS2_LOCK_ID_MAX_LEN];
161 int l_level;
162 unsigned int l_ro_holders; 160 unsigned int l_ro_holders;
163 unsigned int l_ex_holders; 161 unsigned int l_ex_holders;
164 struct ocfs2_dlm_lksb l_lksb; 162 unsigned char l_level;
163
164 /* Data packed - type enum ocfs2_lock_type */
165 unsigned char l_type;
165 166
166 /* used from AST/BAST funcs. */ 167 /* used from AST/BAST funcs. */
167 enum ocfs2_ast_action l_action; 168 /* Data packed - enum type ocfs2_ast_action */
168 enum ocfs2_unlock_action l_unlock_action; 169 unsigned char l_action;
169 int l_requested; 170 /* Data packed - enum type ocfs2_unlock_action */
170 int l_blocking; 171 unsigned char l_unlock_action;
172 unsigned char l_requested;
173 unsigned char l_blocking;
171 unsigned int l_pending_gen; 174 unsigned int l_pending_gen;
172 175
176 spinlock_t l_lock;
177
178 struct ocfs2_dlm_lksb l_lksb;
179
173 wait_queue_head_t l_event; 180 wait_queue_head_t l_event;
174 181
175 struct list_head l_debug_list; 182 struct list_head l_debug_list;
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 2d3420af1a83..9bc535499868 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -76,4 +76,99 @@ struct reflink_arguments {
76}; 76};
77#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) 77#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
78 78
79/* Following definitions dedicated for ocfs2_info_request ioctls. */
80#define OCFS2_INFO_MAX_REQUEST (50)
81#define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2)
82
83/* Magic number of all requests */
84#define OCFS2_INFO_MAGIC (0x4F32494E)
85
86/*
87 * Always try to separate info request into small pieces to
88 * guarantee the backward&forward compatibility.
89 */
90struct ocfs2_info {
91 __u64 oi_requests; /* Array of __u64 pointers to requests */
92 __u32 oi_count; /* Number of requests in info_requests */
93 __u32 oi_pad;
94};
95
96struct ocfs2_info_request {
97/*00*/ __u32 ir_magic; /* Magic number */
98 __u32 ir_code; /* Info request code */
99 __u32 ir_size; /* Size of request */
100 __u32 ir_flags; /* Request flags */
101/*10*/ /* Request specific fields */
102};
103
104struct ocfs2_info_clustersize {
105 struct ocfs2_info_request ic_req;
106 __u32 ic_clustersize;
107 __u32 ic_pad;
108};
109
110struct ocfs2_info_blocksize {
111 struct ocfs2_info_request ib_req;
112 __u32 ib_blocksize;
113 __u32 ib_pad;
114};
115
116struct ocfs2_info_maxslots {
117 struct ocfs2_info_request im_req;
118 __u32 im_max_slots;
119 __u32 im_pad;
120};
121
122struct ocfs2_info_label {
123 struct ocfs2_info_request il_req;
124 __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN];
125} __attribute__ ((packed));
126
127struct ocfs2_info_uuid {
128 struct ocfs2_info_request iu_req;
129 __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1];
130} __attribute__ ((packed));
131
132struct ocfs2_info_fs_features {
133 struct ocfs2_info_request if_req;
134 __u32 if_compat_features;
135 __u32 if_incompat_features;
136 __u32 if_ro_compat_features;
137 __u32 if_pad;
138};
139
140struct ocfs2_info_journal_size {
141 struct ocfs2_info_request ij_req;
142 __u64 ij_journal_size;
143};
144
145/* Codes for ocfs2_info_request */
146enum ocfs2_info_type {
147 OCFS2_INFO_CLUSTERSIZE = 1,
148 OCFS2_INFO_BLOCKSIZE,
149 OCFS2_INFO_MAXSLOTS,
150 OCFS2_INFO_LABEL,
151 OCFS2_INFO_UUID,
152 OCFS2_INFO_FS_FEATURES,
153 OCFS2_INFO_JOURNAL_SIZE,
154 OCFS2_INFO_NUM_TYPES
155};
156
157/* Flags for struct ocfs2_info_request */
158/* Filled by the caller */
159#define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not
160 required. This is a hint.
161 It is up to ocfs2 whether
162 the request can be fulfilled
163 without locking. */
164/* Filled by ocfs2 */
165#define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood
166 this request and
167 filled in the answer */
168
169#define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during
170 request handling. */
171
172#define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info)
173
79#endif /* OCFS2_IOCTL_H */ 174#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 47549f64224c..a120cfcf69bf 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2437,16 +2437,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2437 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + 2437 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2438 le32_to_cpu(rec.r_clusters)) - cpos; 2438 le32_to_cpu(rec.r_clusters)) - cpos;
2439 /* 2439 /*
2440 * If the refcount rec already exist, cool. We just need
2441 * to check whether there is a split. Otherwise we just need
2442 * to increase the refcount.
2443 * If we will insert one, increases recs_add.
2444 *
2445 * We record all the records which will be inserted to the 2440 * We record all the records which will be inserted to the
2446 * same refcount block, so that we can tell exactly whether 2441 * same refcount block, so that we can tell exactly whether
2447 * we need a new refcount block or not. 2442 * we need a new refcount block or not.
2443 *
2444 * If we will insert a new one, this is easy and only happens
2445 * during adding refcounted flag to the extent, so we don't
2446 * have a chance of spliting. We just need one record.
2447 *
2448 * If the refcount rec already exists, that would be a little
2449 * complicated. we may have to:
2450 * 1) split at the beginning if the start pos isn't aligned.
2451 * we need 1 more record in this case.
2452 * 2) split int the end if the end pos isn't aligned.
2453 * we need 1 more record in this case.
2454 * 3) split in the middle because of file system fragmentation.
2455 * we need 2 more records in this case(we can't detect this
2456 * beforehand, so always think of the worst case).
2448 */ 2457 */
2449 if (rec.r_refcount) { 2458 if (rec.r_refcount) {
2459 recs_add += 2;
2450 /* Check whether we need a split at the beginning. */ 2460 /* Check whether we need a split at the beginning. */
2451 if (cpos == start_cpos && 2461 if (cpos == start_cpos &&
2452 cpos != le64_to_cpu(rec.r_cpos)) 2462 cpos != le64_to_cpu(rec.r_cpos))
@@ -2954,7 +2964,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2954 if (map_end & (PAGE_CACHE_SIZE - 1)) 2964 if (map_end & (PAGE_CACHE_SIZE - 1))
2955 to = map_end & (PAGE_CACHE_SIZE - 1); 2965 to = map_end & (PAGE_CACHE_SIZE - 1);
2956 2966
2957 page = grab_cache_page(mapping, page_index); 2967 page = find_or_create_page(mapping, page_index, GFP_NOFS);
2958 2968
2959 /* 2969 /*
2960 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page 2970 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -3181,7 +3191,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
3181 if (map_end > end) 3191 if (map_end > end)
3182 map_end = end; 3192 map_end = end;
3183 3193
3184 page = grab_cache_page(context->inode->i_mapping, page_index); 3194 page = find_or_create_page(context->inode->i_mapping,
3195 page_index, GFP_NOFS);
3185 BUG_ON(!page); 3196 BUG_ON(!page);
3186 3197
3187 wait_on_page_writeback(page); 3198 wait_on_page_writeback(page);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 29cba0eaa927..c8ce46f7d8e3 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree {
21 struct rb_node rf_node; 21 struct rb_node rf_node;
22 u64 rf_blkno; 22 u64 rf_blkno;
23 u32 rf_generation; 23 u32 rf_generation;
24 struct kref rf_getcnt;
24 struct rw_semaphore rf_sem; 25 struct rw_semaphore rf_sem;
25 struct ocfs2_lock_res rf_lockres; 26 struct ocfs2_lock_res rf_lockres;
26 struct kref rf_getcnt;
27 int rf_removed; 27 int rf_removed;
28 28
29 /* the following 4 fields are used by caching_info. */ 29 /* the following 4 fields are used by caching_info. */
30 struct ocfs2_caching_info rf_ci;
31 spinlock_t rf_lock; 30 spinlock_t rf_lock;
31 struct ocfs2_caching_info rf_ci;
32 struct mutex rf_io_mutex; 32 struct mutex rf_io_mutex;
33 struct super_block *rf_sb; 33 struct super_block *rf_sb;
34}; 34};
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a8e6a95a353f..8a286f54dca1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set 57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is 58 to 0 when a block group is
59 contiguous. */ 59 contiguous. */
60 u64 sr_bg_stable_blkno; /*
61 * Doesn't change, always
62 * set to target block
63 * group descriptor
64 * block.
65 */
60 u64 sr_blkno; /* The first allocated block */ 66 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */ 67 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */ 68 unsigned int sr_bits; /* How many bits we claimed */
63}; 69};
64 70
71static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
72{
73 if (res->sr_blkno == 0)
74 return 0;
75
76 if (res->sr_bg_blkno)
77 return res->sr_bg_blkno;
78
79 return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
80}
81
65static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 82static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 83static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 84static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
138 brelse(ac->ac_bh); 155 brelse(ac->ac_bh);
139 ac->ac_bh = NULL; 156 ac->ac_bh = NULL;
140 ac->ac_resv = NULL; 157 ac->ac_resv = NULL;
158 if (ac->ac_find_loc_priv) {
159 kfree(ac->ac_find_loc_priv);
160 ac->ac_find_loc_priv = NULL;
161 }
141} 162}
142 163
143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 164void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1678 if (!ret) 1699 if (!ret)
1679 ocfs2_bg_discontig_fix_result(ac, gd, res); 1700 ocfs2_bg_discontig_fix_result(ac, gd, res);
1680 1701
1702 /*
1703 * sr_bg_blkno might have been changed by
1704 * ocfs2_bg_discontig_fix_result
1705 */
1706 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1707
1708 if (ac->ac_find_loc_only)
1709 goto out_loc_only;
1710
1681 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1711 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1682 res->sr_bits, 1712 res->sr_bits,
1683 le16_to_cpu(gd->bg_chain)); 1713 le16_to_cpu(gd->bg_chain));
@@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1691 if (ret < 0) 1721 if (ret < 0)
1692 mlog_errno(ret); 1722 mlog_errno(ret);
1693 1723
1724out_loc_only:
1694 *bits_left = le16_to_cpu(gd->bg_free_bits_count); 1725 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1695 1726
1696out: 1727out:
@@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1708{ 1739{
1709 int status; 1740 int status;
1710 u16 chain; 1741 u16 chain;
1711 u32 tmp_used;
1712 u64 next_group; 1742 u64 next_group;
1713 struct inode *alloc_inode = ac->ac_inode; 1743 struct inode *alloc_inode = ac->ac_inode;
1714 struct buffer_head *group_bh = NULL; 1744 struct buffer_head *group_bh = NULL;
@@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1770 if (!status) 1800 if (!status)
1771 ocfs2_bg_discontig_fix_result(ac, bg, res); 1801 ocfs2_bg_discontig_fix_result(ac, bg, res);
1772 1802
1803 /*
1804 * sr_bg_blkno might have been changed by
1805 * ocfs2_bg_discontig_fix_result
1806 */
1807 res->sr_bg_stable_blkno = group_bh->b_blocknr;
1773 1808
1774 /* 1809 /*
1775 * Keep track of previous block descriptor read. When 1810 * Keep track of previous block descriptor read. When
@@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1796 } 1831 }
1797 } 1832 }
1798 1833
1799 /* Ok, claim our bits now: set the info on dinode, chainlist 1834 if (ac->ac_find_loc_only)
1800 * and then the group */ 1835 goto out_loc_only;
1801 status = ocfs2_journal_access_di(handle, 1836
1802 INODE_CACHE(alloc_inode), 1837 status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
1803 ac->ac_bh, 1838 ac->ac_bh, res->sr_bits,
1804 OCFS2_JOURNAL_ACCESS_WRITE); 1839 chain);
1805 if (status < 0) { 1840 if (status) {
1806 mlog_errno(status); 1841 mlog_errno(status);
1807 goto bail; 1842 goto bail;
1808 } 1843 }
1809 1844
1810 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1811 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1812 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
1813 ocfs2_journal_dirty(handle, ac->ac_bh);
1814
1815 status = ocfs2_block_group_set_bits(handle, 1845 status = ocfs2_block_group_set_bits(handle,
1816 alloc_inode, 1846 alloc_inode,
1817 bg, 1847 bg,
@@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1826 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, 1856 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1827 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1857 (unsigned long long)le64_to_cpu(fe->i_blkno));
1828 1858
1859out_loc_only:
1829 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1860 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1830bail: 1861bail:
1831 brelse(group_bh); 1862 brelse(group_bh);
@@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1845 int status; 1876 int status;
1846 u16 victim, i; 1877 u16 victim, i;
1847 u16 bits_left = 0; 1878 u16 bits_left = 0;
1879 u64 hint = ac->ac_last_group;
1848 struct ocfs2_chain_list *cl; 1880 struct ocfs2_chain_list *cl;
1849 struct ocfs2_dinode *fe; 1881 struct ocfs2_dinode *fe;
1850 1882
@@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1872 goto bail; 1904 goto bail;
1873 } 1905 }
1874 1906
1875 res->sr_bg_blkno = ac->ac_last_group; 1907 res->sr_bg_blkno = hint;
1876 if (res->sr_bg_blkno) { 1908 if (res->sr_bg_blkno) {
1877 /* Attempt to short-circuit the usual search mechanism 1909 /* Attempt to short-circuit the usual search mechanism
1878 * by jumping straight to the most recently used 1910 * by jumping straight to the most recently used
@@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1896 1928
1897 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1929 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1898 res, &bits_left); 1930 res, &bits_left);
1899 if (!status) 1931 if (!status) {
1932 hint = ocfs2_group_from_res(res);
1900 goto set_hint; 1933 goto set_hint;
1934 }
1901 if (status < 0 && status != -ENOSPC) { 1935 if (status < 0 && status != -ENOSPC) {
1902 mlog_errno(status); 1936 mlog_errno(status);
1903 goto bail; 1937 goto bail;
@@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1920 ac->ac_chain = i; 1954 ac->ac_chain = i;
1921 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1955 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1922 res, &bits_left); 1956 res, &bits_left);
1923 if (!status) 1957 if (!status) {
1958 hint = ocfs2_group_from_res(res);
1924 break; 1959 break;
1960 }
1925 if (status < 0 && status != -ENOSPC) { 1961 if (status < 0 && status != -ENOSPC) {
1926 mlog_errno(status); 1962 mlog_errno(status);
1927 goto bail; 1963 goto bail;
@@ -1936,7 +1972,7 @@ set_hint:
1936 if (bits_left < min_bits) 1972 if (bits_left < min_bits)
1937 ac->ac_last_group = 0; 1973 ac->ac_last_group = 0;
1938 else 1974 else
1939 ac->ac_last_group = res->sr_bg_blkno; 1975 ac->ac_last_group = hint;
1940 } 1976 }
1941 1977
1942bail: 1978bail:
@@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
2016 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2052 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
2017} 2053}
2018 2054
2055int ocfs2_find_new_inode_loc(struct inode *dir,
2056 struct buffer_head *parent_fe_bh,
2057 struct ocfs2_alloc_context *ac,
2058 u64 *fe_blkno)
2059{
2060 int ret;
2061 handle_t *handle = NULL;
2062 struct ocfs2_suballoc_result *res;
2063
2064 BUG_ON(!ac);
2065 BUG_ON(ac->ac_bits_given != 0);
2066 BUG_ON(ac->ac_bits_wanted != 1);
2067 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
2068
2069 res = kzalloc(sizeof(*res), GFP_NOFS);
2070 if (res == NULL) {
2071 ret = -ENOMEM;
2072 mlog_errno(ret);
2073 goto out;
2074 }
2075
2076 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2077
2078 /*
2079 * The handle started here is for chain relink. Alternatively,
2080 * we could just disable relink for these calls.
2081 */
2082 handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
2083 if (IS_ERR(handle)) {
2084 ret = PTR_ERR(handle);
2085 handle = NULL;
2086 mlog_errno(ret);
2087 goto out;
2088 }
2089
2090 /*
2091 * This will instruct ocfs2_claim_suballoc_bits and
2092 * ocfs2_search_one_group to search but save actual allocation
2093 * for later.
2094 */
2095 ac->ac_find_loc_only = 1;
2096
2097 ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
2098 if (ret < 0) {
2099 mlog_errno(ret);
2100 goto out;
2101 }
2102
2103 ac->ac_find_loc_priv = res;
2104 *fe_blkno = res->sr_blkno;
2105
2106out:
2107 if (handle)
2108 ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
2109
2110 if (ret)
2111 kfree(res);
2112
2113 return ret;
2114}
2115
2116int ocfs2_claim_new_inode_at_loc(handle_t *handle,
2117 struct inode *dir,
2118 struct ocfs2_alloc_context *ac,
2119 u64 *suballoc_loc,
2120 u16 *suballoc_bit,
2121 u64 di_blkno)
2122{
2123 int ret;
2124 u16 chain;
2125 struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
2126 struct buffer_head *bg_bh = NULL;
2127 struct ocfs2_group_desc *bg;
2128 struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
2129
2130 /*
2131 * Since di_blkno is being passed back in, we check for any
2132 * inconsistencies which may have happened between
2133 * calls. These are code bugs as di_blkno is not expected to
2134 * change once returned from ocfs2_find_new_inode_loc()
2135 */
2136 BUG_ON(res->sr_blkno != di_blkno);
2137
2138 ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
2139 res->sr_bg_stable_blkno, &bg_bh);
2140 if (ret) {
2141 mlog_errno(ret);
2142 goto out;
2143 }
2144
2145 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
2146 chain = le16_to_cpu(bg->bg_chain);
2147
2148 ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
2149 ac->ac_bh, res->sr_bits,
2150 chain);
2151 if (ret) {
2152 mlog_errno(ret);
2153 goto out;
2154 }
2155
2156 ret = ocfs2_block_group_set_bits(handle,
2157 ac->ac_inode,
2158 bg,
2159 bg_bh,
2160 res->sr_bit_offset,
2161 res->sr_bits);
2162 if (ret < 0) {
2163 mlog_errno(ret);
2164 goto out;
2165 }
2166
2167 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
2168 (unsigned long long)di_blkno);
2169
2170 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
2171
2172 BUG_ON(res->sr_bits != 1);
2173
2174 *suballoc_loc = res->sr_bg_blkno;
2175 *suballoc_bit = res->sr_bit_offset;
2176 ac->ac_bits_given++;
2177 ocfs2_save_inode_ac_group(dir, ac);
2178
2179out:
2180 brelse(bg_bh);
2181
2182 return ret;
2183}
2184
2019int ocfs2_claim_new_inode(handle_t *handle, 2185int ocfs2_claim_new_inode(handle_t *handle,
2020 struct inode *dir, 2186 struct inode *dir,
2021 struct buffer_head *parent_fe_bh, 2187 struct buffer_head *parent_fe_bh,
@@ -2567,7 +2733,8 @@ out:
2567 * suballoc_bit. 2733 * suballoc_bit.
2568 */ 2734 */
2569static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, 2735static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2570 u16 *suballoc_slot, u16 *suballoc_bit) 2736 u16 *suballoc_slot, u64 *group_blkno,
2737 u16 *suballoc_bit)
2571{ 2738{
2572 int status; 2739 int status;
2573 struct buffer_head *inode_bh = NULL; 2740 struct buffer_head *inode_bh = NULL;
@@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2604 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); 2771 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2605 if (suballoc_bit) 2772 if (suballoc_bit)
2606 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); 2773 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2774 if (group_blkno)
2775 *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
2607 2776
2608bail: 2777bail:
2609 brelse(inode_bh); 2778 brelse(inode_bh);
@@ -2621,7 +2790,8 @@ bail:
2621 */ 2790 */
2622static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, 2791static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2623 struct inode *suballoc, 2792 struct inode *suballoc,
2624 struct buffer_head *alloc_bh, u64 blkno, 2793 struct buffer_head *alloc_bh,
2794 u64 group_blkno, u64 blkno,
2625 u16 bit, int *res) 2795 u16 bit, int *res)
2626{ 2796{
2627 struct ocfs2_dinode *alloc_di; 2797 struct ocfs2_dinode *alloc_di;
@@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2642 goto bail; 2812 goto bail;
2643 } 2813 }
2644 2814
2645 if (alloc_di->i_suballoc_loc) 2815 bg_blkno = group_blkno ? group_blkno :
2646 bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); 2816 ocfs2_which_suballoc_group(blkno, bit);
2647 else
2648 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2649 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, 2817 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2650 &group_bh); 2818 &group_bh);
2651 if (status < 0) { 2819 if (status < 0) {
@@ -2680,6 +2848,7 @@ bail:
2680int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) 2848int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2681{ 2849{
2682 int status; 2850 int status;
2851 u64 group_blkno = 0;
2683 u16 suballoc_bit = 0, suballoc_slot = 0; 2852 u16 suballoc_bit = 0, suballoc_slot = 0;
2684 struct inode *inode_alloc_inode; 2853 struct inode *inode_alloc_inode;
2685 struct buffer_head *alloc_bh = NULL; 2854 struct buffer_head *alloc_bh = NULL;
@@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2687 mlog_entry("blkno: %llu", (unsigned long long)blkno); 2856 mlog_entry("blkno: %llu", (unsigned long long)blkno);
2688 2857
2689 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, 2858 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2690 &suballoc_bit); 2859 &group_blkno, &suballoc_bit);
2691 if (status < 0) { 2860 if (status < 0) {
2692 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); 2861 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2693 goto bail; 2862 goto bail;
@@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2715 } 2884 }
2716 2885
2717 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, 2886 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2718 blkno, suballoc_bit, res); 2887 group_blkno, blkno, suballoc_bit, res);
2719 if (status < 0) 2888 if (status < 0)
2720 mlog(ML_ERROR, "test suballoc bit failed %d\n", status); 2889 mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2721 2890
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a017dd3ee7d9..b8afabfeede4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context {
56 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
57 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58 58
59 int ac_find_loc_only; /* hack for reflink operation ordering */
60 struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
61
59 struct ocfs2_alloc_reservation *ac_resv; 62 struct ocfs2_alloc_reservation *ac_resv;
60}; 63};
61 64
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
197 struct ocfs2_alloc_context **meta_ac); 200 struct ocfs2_alloc_context **meta_ac);
198 201
199int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); 202int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
203
204
205
206/*
207 * The following two interfaces are for ocfs2_create_inode_in_orphan().
208 */
209int ocfs2_find_new_inode_loc(struct inode *dir,
210 struct buffer_head *parent_fe_bh,
211 struct ocfs2_alloc_context *ac,
212 u64 *fe_blkno);
213
214int ocfs2_claim_new_inode_at_loc(handle_t *handle,
215 struct inode *dir,
216 struct ocfs2_alloc_context *ac,
217 u64 *suballoc_loc,
218 u16 *suballoc_bit,
219 u64 di_blkno);
220
200#endif /* _CHAINALLOC_H_ */ 221#endif /* _CHAINALLOC_H_ */