diff options
author | Joel Becker <jlbec@evilplan.org> | 2011-05-26 00:51:55 -0400 |
---|---|---|
committer | Joel Becker <jlbec@evilplan.org> | 2011-05-26 00:51:55 -0400 |
commit | ece928df16494becd43f999aff9bd530182e7e81 (patch) | |
tree | 905042764ea5d8ab6eda63666406e19f607bcf4c /fs/ocfs2 | |
parent | 3d1c1829ebe7e8bb48a997b39b4865abc9197e5e (diff) | |
parent | dda54e76d7dba0532ebdd72e0b4f492a03f83225 (diff) |
Merge branch 'move_extents' of git://oss.oracle.com/git/tye/linux-2.6 into ocfs2-merge-window
Conflicts:
fs/ocfs2/ioctl.c
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/Makefile | 1 | ||||
-rw-r--r-- | fs/ocfs2/ioctl.c | 468 | ||||
-rw-r--r-- | fs/ocfs2/move_extents.c | 1153 | ||||
-rw-r--r-- | fs/ocfs2/move_extents.h | 22 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_ioctl.h | 68 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 58 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.h | 11 |
7 files changed, 1725 insertions, 56 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index d8a0313e99e6..f17e58b32989 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -30,6 +30,7 @@ ocfs2-objs := \ | |||
30 | namei.o \ | 30 | namei.o \ |
31 | refcounttree.o \ | 31 | refcounttree.o \ |
32 | reservations.o \ | 32 | reservations.o \ |
33 | move_extents.o \ | ||
33 | resize.o \ | 34 | resize.o \ |
34 | slot_map.o \ | 35 | slot_map.o \ |
35 | suballoc.o \ | 36 | suballoc.o \ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 312a28f433a4..bc91072b7219 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -22,6 +22,11 @@ | |||
22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
23 | #include "resize.h" | 23 | #include "resize.h" |
24 | #include "refcounttree.h" | 24 | #include "refcounttree.h" |
25 | #include "sysfile.h" | ||
26 | #include "dir.h" | ||
27 | #include "buffer_head_io.h" | ||
28 | #include "suballoc.h" | ||
29 | #include "move_extents.h" | ||
25 | 30 | ||
26 | #include <linux/ext2_fs.h> | 31 | #include <linux/ext2_fs.h> |
27 | 32 | ||
@@ -35,31 +40,27 @@ | |||
35 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | 40 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's |
36 | * just a best-effort to tell userspace that this request caused the error. | 41 | * just a best-effort to tell userspace that this request caused the error. |
37 | */ | 42 | */ |
38 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | 43 | static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, |
39 | struct ocfs2_info_request __user *req) | 44 | struct ocfs2_info_request __user *req) |
40 | { | 45 | { |
41 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | 46 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; |
42 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | 47 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); |
43 | } | 48 | } |
44 | 49 | ||
45 | #define o2info_set_request_error(a, b) \ | 50 | static inline void o2info_set_request_filled(struct ocfs2_info_request *req) |
46 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
47 | |||
48 | static inline void __o2info_set_request_filled(struct ocfs2_info_request *req) | ||
49 | { | 51 | { |
50 | req->ir_flags |= OCFS2_INFO_FL_FILLED; | 52 | req->ir_flags |= OCFS2_INFO_FL_FILLED; |
51 | } | 53 | } |
52 | 54 | ||
53 | #define o2info_set_request_filled(a) \ | 55 | static inline void o2info_clear_request_filled(struct ocfs2_info_request *req) |
54 | __o2info_set_request_filled((struct ocfs2_info_request *)&(a)) | ||
55 | |||
56 | static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req) | ||
57 | { | 56 | { |
58 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; | 57 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; |
59 | } | 58 | } |
60 | 59 | ||
61 | #define o2info_clear_request_filled(a) \ | 60 | static inline int o2info_coherent(struct ocfs2_info_request *req) |
62 | __o2info_clear_request_filled((struct ocfs2_info_request *)&(a)) | 61 | { |
62 | return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT)); | ||
63 | } | ||
63 | 64 | ||
64 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 65 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
65 | { | 66 | { |
@@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
153 | 154 | ||
154 | oib.ib_blocksize = inode->i_sb->s_blocksize; | 155 | oib.ib_blocksize = inode->i_sb->s_blocksize; |
155 | 156 | ||
156 | o2info_set_request_filled(oib); | 157 | o2info_set_request_filled(&oib.ib_req); |
157 | 158 | ||
158 | if (o2info_to_user(oib, req)) | 159 | if (o2info_to_user(oib, req)) |
159 | goto bail; | 160 | goto bail; |
@@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
161 | status = 0; | 162 | status = 0; |
162 | bail: | 163 | bail: |
163 | if (status) | 164 | if (status) |
164 | o2info_set_request_error(oib, req); | 165 | o2info_set_request_error(&oib.ib_req, req); |
165 | 166 | ||
166 | return status; | 167 | return status; |
167 | } | 168 | } |
@@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
178 | 179 | ||
179 | oic.ic_clustersize = osb->s_clustersize; | 180 | oic.ic_clustersize = osb->s_clustersize; |
180 | 181 | ||
181 | o2info_set_request_filled(oic); | 182 | o2info_set_request_filled(&oic.ic_req); |
182 | 183 | ||
183 | if (o2info_to_user(oic, req)) | 184 | if (o2info_to_user(oic, req)) |
184 | goto bail; | 185 | goto bail; |
@@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
186 | status = 0; | 187 | status = 0; |
187 | bail: | 188 | bail: |
188 | if (status) | 189 | if (status) |
189 | o2info_set_request_error(oic, req); | 190 | o2info_set_request_error(&oic.ic_req, req); |
190 | 191 | ||
191 | return status; | 192 | return status; |
192 | } | 193 | } |
@@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
203 | 204 | ||
204 | oim.im_max_slots = osb->max_slots; | 205 | oim.im_max_slots = osb->max_slots; |
205 | 206 | ||
206 | o2info_set_request_filled(oim); | 207 | o2info_set_request_filled(&oim.im_req); |
207 | 208 | ||
208 | if (o2info_to_user(oim, req)) | 209 | if (o2info_to_user(oim, req)) |
209 | goto bail; | 210 | goto bail; |
@@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
211 | status = 0; | 212 | status = 0; |
212 | bail: | 213 | bail: |
213 | if (status) | 214 | if (status) |
214 | o2info_set_request_error(oim, req); | 215 | o2info_set_request_error(&oim.im_req, req); |
215 | 216 | ||
216 | return status; | 217 | return status; |
217 | } | 218 | } |
@@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
228 | 229 | ||
229 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | 230 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); |
230 | 231 | ||
231 | o2info_set_request_filled(oil); | 232 | o2info_set_request_filled(&oil.il_req); |
232 | 233 | ||
233 | if (o2info_to_user(oil, req)) | 234 | if (o2info_to_user(oil, req)) |
234 | goto bail; | 235 | goto bail; |
@@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
236 | status = 0; | 237 | status = 0; |
237 | bail: | 238 | bail: |
238 | if (status) | 239 | if (status) |
239 | o2info_set_request_error(oil, req); | 240 | o2info_set_request_error(&oil.il_req, req); |
240 | 241 | ||
241 | return status; | 242 | return status; |
242 | } | 243 | } |
@@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
253 | 254 | ||
254 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | 255 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); |
255 | 256 | ||
256 | o2info_set_request_filled(oiu); | 257 | o2info_set_request_filled(&oiu.iu_req); |
257 | 258 | ||
258 | if (o2info_to_user(oiu, req)) | 259 | if (o2info_to_user(oiu, req)) |
259 | goto bail; | 260 | goto bail; |
@@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
261 | status = 0; | 262 | status = 0; |
262 | bail: | 263 | bail: |
263 | if (status) | 264 | if (status) |
264 | o2info_set_request_error(oiu, req); | 265 | o2info_set_request_error(&oiu.iu_req, req); |
265 | 266 | ||
266 | return status; | 267 | return status; |
267 | } | 268 | } |
@@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
280 | oif.if_incompat_features = osb->s_feature_incompat; | 281 | oif.if_incompat_features = osb->s_feature_incompat; |
281 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | 282 | oif.if_ro_compat_features = osb->s_feature_ro_compat; |
282 | 283 | ||
283 | o2info_set_request_filled(oif); | 284 | o2info_set_request_filled(&oif.if_req); |
284 | 285 | ||
285 | if (o2info_to_user(oif, req)) | 286 | if (o2info_to_user(oif, req)) |
286 | goto bail; | 287 | goto bail; |
@@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
288 | status = 0; | 289 | status = 0; |
289 | bail: | 290 | bail: |
290 | if (status) | 291 | if (status) |
291 | o2info_set_request_error(oif, req); | 292 | o2info_set_request_error(&oif.if_req, req); |
292 | 293 | ||
293 | return status; | 294 | return status; |
294 | } | 295 | } |
@@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
305 | 306 | ||
306 | oij.ij_journal_size = osb->journal->j_inode->i_size; | 307 | oij.ij_journal_size = osb->journal->j_inode->i_size; |
307 | 308 | ||
308 | o2info_set_request_filled(oij); | 309 | o2info_set_request_filled(&oij.ij_req); |
309 | 310 | ||
310 | if (o2info_to_user(oij, req)) | 311 | if (o2info_to_user(oij, req)) |
311 | goto bail; | 312 | goto bail; |
@@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
313 | status = 0; | 314 | status = 0; |
314 | bail: | 315 | bail: |
315 | if (status) | 316 | if (status) |
316 | o2info_set_request_error(oij, req); | 317 | o2info_set_request_error(&oij.ij_req, req); |
318 | |||
319 | return status; | ||
320 | } | ||
321 | |||
322 | int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, | ||
323 | struct inode *inode_alloc, u64 blkno, | ||
324 | struct ocfs2_info_freeinode *fi, u32 slot) | ||
325 | { | ||
326 | int status = 0, unlock = 0; | ||
327 | |||
328 | struct buffer_head *bh = NULL; | ||
329 | struct ocfs2_dinode *dinode_alloc = NULL; | ||
330 | |||
331 | if (inode_alloc) | ||
332 | mutex_lock(&inode_alloc->i_mutex); | ||
333 | |||
334 | if (o2info_coherent(&fi->ifi_req)) { | ||
335 | status = ocfs2_inode_lock(inode_alloc, &bh, 0); | ||
336 | if (status < 0) { | ||
337 | mlog_errno(status); | ||
338 | goto bail; | ||
339 | } | ||
340 | unlock = 1; | ||
341 | } else { | ||
342 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
343 | if (status < 0) { | ||
344 | mlog_errno(status); | ||
345 | goto bail; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | dinode_alloc = (struct ocfs2_dinode *)bh->b_data; | ||
350 | |||
351 | fi->ifi_stat[slot].lfi_total = | ||
352 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total); | ||
353 | fi->ifi_stat[slot].lfi_free = | ||
354 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) - | ||
355 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_used); | ||
356 | |||
357 | bail: | ||
358 | if (unlock) | ||
359 | ocfs2_inode_unlock(inode_alloc, 0); | ||
360 | |||
361 | if (inode_alloc) | ||
362 | mutex_unlock(&inode_alloc->i_mutex); | ||
363 | |||
364 | brelse(bh); | ||
365 | |||
366 | return status; | ||
367 | } | ||
368 | |||
369 | int ocfs2_info_handle_freeinode(struct inode *inode, | ||
370 | struct ocfs2_info_request __user *req) | ||
371 | { | ||
372 | u32 i; | ||
373 | u64 blkno = -1; | ||
374 | char namebuf[40]; | ||
375 | int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; | ||
376 | struct ocfs2_info_freeinode *oifi = NULL; | ||
377 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
378 | struct inode *inode_alloc = NULL; | ||
379 | |||
380 | oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL); | ||
381 | if (!oifi) { | ||
382 | status = -ENOMEM; | ||
383 | mlog_errno(status); | ||
384 | goto bail; | ||
385 | } | ||
386 | |||
387 | if (o2info_from_user(*oifi, req)) | ||
388 | goto bail; | ||
389 | |||
390 | oifi->ifi_slotnum = osb->max_slots; | ||
391 | |||
392 | for (i = 0; i < oifi->ifi_slotnum; i++) { | ||
393 | if (o2info_coherent(&oifi->ifi_req)) { | ||
394 | inode_alloc = ocfs2_get_system_file_inode(osb, type, i); | ||
395 | if (!inode_alloc) { | ||
396 | mlog(ML_ERROR, "unable to get alloc inode in " | ||
397 | "slot %u\n", i); | ||
398 | status = -EIO; | ||
399 | goto bail; | ||
400 | } | ||
401 | } else { | ||
402 | ocfs2_sprintf_system_inode_name(namebuf, | ||
403 | sizeof(namebuf), | ||
404 | type, i); | ||
405 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
406 | namebuf, | ||
407 | strlen(namebuf), | ||
408 | &blkno); | ||
409 | if (status < 0) { | ||
410 | status = -ENOENT; | ||
411 | goto bail; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); | ||
416 | if (status < 0) | ||
417 | goto bail; | ||
418 | |||
419 | iput(inode_alloc); | ||
420 | inode_alloc = NULL; | ||
421 | } | ||
422 | |||
423 | o2info_set_request_filled(&oifi->ifi_req); | ||
424 | |||
425 | if (o2info_to_user(*oifi, req)) | ||
426 | goto bail; | ||
427 | |||
428 | status = 0; | ||
429 | bail: | ||
430 | if (status) | ||
431 | o2info_set_request_error(&oifi->ifi_req, req); | ||
432 | |||
433 | kfree(oifi); | ||
434 | |||
435 | return status; | ||
436 | } | ||
437 | |||
438 | static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, | ||
439 | unsigned int chunksize) | ||
440 | { | ||
441 | int index; | ||
442 | |||
443 | index = __ilog2_u32(chunksize); | ||
444 | if (index >= OCFS2_INFO_MAX_HIST) | ||
445 | index = OCFS2_INFO_MAX_HIST - 1; | ||
446 | |||
447 | hist->fc_chunks[index]++; | ||
448 | hist->fc_clusters[index] += chunksize; | ||
449 | } | ||
450 | |||
451 | static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats, | ||
452 | unsigned int chunksize) | ||
453 | { | ||
454 | if (chunksize > stats->ffs_max) | ||
455 | stats->ffs_max = chunksize; | ||
456 | |||
457 | if (chunksize < stats->ffs_min) | ||
458 | stats->ffs_min = chunksize; | ||
459 | |||
460 | stats->ffs_avg += chunksize; | ||
461 | stats->ffs_free_chunks_real++; | ||
462 | } | ||
463 | |||
464 | void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, | ||
465 | unsigned int chunksize) | ||
466 | { | ||
467 | o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize); | ||
468 | o2ffg_update_stats(&(ffg->iff_ffs), chunksize); | ||
469 | } | ||
470 | |||
471 | int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, | ||
472 | struct inode *gb_inode, | ||
473 | struct ocfs2_dinode *gb_dinode, | ||
474 | struct ocfs2_chain_rec *rec, | ||
475 | struct ocfs2_info_freefrag *ffg, | ||
476 | u32 chunks_in_group) | ||
477 | { | ||
478 | int status = 0, used; | ||
479 | u64 blkno; | ||
480 | |||
481 | struct buffer_head *bh = NULL; | ||
482 | struct ocfs2_group_desc *bg = NULL; | ||
483 | |||
484 | unsigned int max_bits, num_clusters; | ||
485 | unsigned int offset = 0, cluster, chunk; | ||
486 | unsigned int chunk_free, last_chunksize = 0; | ||
487 | |||
488 | if (!le32_to_cpu(rec->c_free)) | ||
489 | goto bail; | ||
490 | |||
491 | do { | ||
492 | if (!bg) | ||
493 | blkno = le64_to_cpu(rec->c_blkno); | ||
494 | else | ||
495 | blkno = le64_to_cpu(bg->bg_next_group); | ||
496 | |||
497 | if (bh) { | ||
498 | brelse(bh); | ||
499 | bh = NULL; | ||
500 | } | ||
501 | |||
502 | if (o2info_coherent(&ffg->iff_req)) | ||
503 | status = ocfs2_read_group_descriptor(gb_inode, | ||
504 | gb_dinode, | ||
505 | blkno, &bh); | ||
506 | else | ||
507 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
508 | |||
509 | if (status < 0) { | ||
510 | mlog(ML_ERROR, "Can't read the group descriptor # " | ||
511 | "%llu from device.", (unsigned long long)blkno); | ||
512 | status = -EIO; | ||
513 | goto bail; | ||
514 | } | ||
515 | |||
516 | bg = (struct ocfs2_group_desc *)bh->b_data; | ||
517 | |||
518 | if (!le16_to_cpu(bg->bg_free_bits_count)) | ||
519 | continue; | ||
520 | |||
521 | max_bits = le16_to_cpu(bg->bg_bits); | ||
522 | offset = 0; | ||
523 | |||
524 | for (chunk = 0; chunk < chunks_in_group; chunk++) { | ||
525 | /* | ||
526 | * last chunk may be not an entire one. | ||
527 | */ | ||
528 | if ((offset + ffg->iff_chunksize) > max_bits) | ||
529 | num_clusters = max_bits - offset; | ||
530 | else | ||
531 | num_clusters = ffg->iff_chunksize; | ||
532 | |||
533 | chunk_free = 0; | ||
534 | for (cluster = 0; cluster < num_clusters; cluster++) { | ||
535 | used = ocfs2_test_bit(offset, | ||
536 | (unsigned long *)bg->bg_bitmap); | ||
537 | /* | ||
538 | * - chunk_free counts free clusters in #N chunk. | ||
539 | * - last_chunksize records the size(in) clusters | ||
540 | * for the last real free chunk being counted. | ||
541 | */ | ||
542 | if (!used) { | ||
543 | last_chunksize++; | ||
544 | chunk_free++; | ||
545 | } | ||
546 | |||
547 | if (used && last_chunksize) { | ||
548 | ocfs2_info_update_ffg(ffg, | ||
549 | last_chunksize); | ||
550 | last_chunksize = 0; | ||
551 | } | ||
552 | |||
553 | offset++; | ||
554 | } | ||
555 | |||
556 | if (chunk_free == ffg->iff_chunksize) | ||
557 | ffg->iff_ffs.ffs_free_chunks++; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * need to update the info for last free chunk. | ||
562 | */ | ||
563 | if (last_chunksize) | ||
564 | ocfs2_info_update_ffg(ffg, last_chunksize); | ||
565 | |||
566 | } while (le64_to_cpu(bg->bg_next_group)); | ||
567 | |||
568 | bail: | ||
569 | brelse(bh); | ||
570 | |||
571 | return status; | ||
572 | } | ||
573 | |||
574 | int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, | ||
575 | struct inode *gb_inode, u64 blkno, | ||
576 | struct ocfs2_info_freefrag *ffg) | ||
577 | { | ||
578 | u32 chunks_in_group; | ||
579 | int status = 0, unlock = 0, i; | ||
580 | |||
581 | struct buffer_head *bh = NULL; | ||
582 | struct ocfs2_chain_list *cl = NULL; | ||
583 | struct ocfs2_chain_rec *rec = NULL; | ||
584 | struct ocfs2_dinode *gb_dinode = NULL; | ||
585 | |||
586 | if (gb_inode) | ||
587 | mutex_lock(&gb_inode->i_mutex); | ||
588 | |||
589 | if (o2info_coherent(&ffg->iff_req)) { | ||
590 | status = ocfs2_inode_lock(gb_inode, &bh, 0); | ||
591 | if (status < 0) { | ||
592 | mlog_errno(status); | ||
593 | goto bail; | ||
594 | } | ||
595 | unlock = 1; | ||
596 | } else { | ||
597 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
598 | if (status < 0) { | ||
599 | mlog_errno(status); | ||
600 | goto bail; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | gb_dinode = (struct ocfs2_dinode *)bh->b_data; | ||
605 | cl = &(gb_dinode->id2.i_chain); | ||
606 | |||
607 | /* | ||
608 | * Chunksize(in) clusters from userspace should be | ||
609 | * less than clusters in a group. | ||
610 | */ | ||
611 | if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) { | ||
612 | status = -EINVAL; | ||
613 | goto bail; | ||
614 | } | ||
615 | |||
616 | memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); | ||
617 | |||
618 | ffg->iff_ffs.ffs_min = ~0U; | ||
619 | ffg->iff_ffs.ffs_clusters = | ||
620 | le32_to_cpu(gb_dinode->id1.bitmap1.i_total); | ||
621 | ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters - | ||
622 | le32_to_cpu(gb_dinode->id1.bitmap1.i_used); | ||
623 | |||
624 | chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1; | ||
625 | |||
626 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
627 | rec = &(cl->cl_recs[i]); | ||
628 | status = ocfs2_info_freefrag_scan_chain(osb, gb_inode, | ||
629 | gb_dinode, | ||
630 | rec, ffg, | ||
631 | chunks_in_group); | ||
632 | if (status) | ||
633 | goto bail; | ||
634 | } | ||
635 | |||
636 | if (ffg->iff_ffs.ffs_free_chunks_real) | ||
637 | ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg / | ||
638 | ffg->iff_ffs.ffs_free_chunks_real); | ||
639 | bail: | ||
640 | if (unlock) | ||
641 | ocfs2_inode_unlock(gb_inode, 0); | ||
642 | |||
643 | if (gb_inode) | ||
644 | mutex_unlock(&gb_inode->i_mutex); | ||
645 | |||
646 | if (gb_inode) | ||
647 | iput(gb_inode); | ||
648 | |||
649 | brelse(bh); | ||
650 | |||
651 | return status; | ||
652 | } | ||
653 | |||
654 | int ocfs2_info_handle_freefrag(struct inode *inode, | ||
655 | struct ocfs2_info_request __user *req) | ||
656 | { | ||
657 | u64 blkno = -1; | ||
658 | char namebuf[40]; | ||
659 | int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; | ||
660 | |||
661 | struct ocfs2_info_freefrag *oiff; | ||
662 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
663 | struct inode *gb_inode = NULL; | ||
664 | |||
665 | oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL); | ||
666 | if (!oiff) { | ||
667 | status = -ENOMEM; | ||
668 | mlog_errno(status); | ||
669 | goto bail; | ||
670 | } | ||
671 | |||
672 | if (o2info_from_user(*oiff, req)) | ||
673 | goto bail; | ||
674 | /* | ||
675 | * chunksize from userspace should be power of 2. | ||
676 | */ | ||
677 | if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) || | ||
678 | (!oiff->iff_chunksize)) { | ||
679 | status = -EINVAL; | ||
680 | goto bail; | ||
681 | } | ||
682 | |||
683 | if (o2info_coherent(&oiff->iff_req)) { | ||
684 | gb_inode = ocfs2_get_system_file_inode(osb, type, | ||
685 | OCFS2_INVALID_SLOT); | ||
686 | if (!gb_inode) { | ||
687 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
688 | status = -EIO; | ||
689 | goto bail; | ||
690 | } | ||
691 | } else { | ||
692 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, | ||
693 | OCFS2_INVALID_SLOT); | ||
694 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
695 | namebuf, | ||
696 | strlen(namebuf), | ||
697 | &blkno); | ||
698 | if (status < 0) { | ||
699 | status = -ENOENT; | ||
700 | goto bail; | ||
701 | } | ||
702 | } | ||
703 | |||
704 | status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff); | ||
705 | if (status < 0) | ||
706 | goto bail; | ||
707 | |||
708 | o2info_set_request_filled(&oiff->iff_req); | ||
709 | |||
710 | if (o2info_to_user(*oiff, req)) | ||
711 | goto bail; | ||
712 | |||
713 | status = 0; | ||
714 | bail: | ||
715 | if (status) | ||
716 | o2info_set_request_error(&oiff->iff_req, req); | ||
717 | |||
718 | kfree(oiff); | ||
317 | 719 | ||
318 | return status; | 720 | return status; |
319 | } | 721 | } |
@@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
327 | if (o2info_from_user(oir, req)) | 729 | if (o2info_from_user(oir, req)) |
328 | goto bail; | 730 | goto bail; |
329 | 731 | ||
330 | o2info_clear_request_filled(oir); | 732 | o2info_clear_request_filled(&oir); |
331 | 733 | ||
332 | if (o2info_to_user(oir, req)) | 734 | if (o2info_to_user(oir, req)) |
333 | goto bail; | 735 | goto bail; |
@@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
335 | status = 0; | 737 | status = 0; |
336 | bail: | 738 | bail: |
337 | if (status) | 739 | if (status) |
338 | o2info_set_request_error(oir, req); | 740 | o2info_set_request_error(&oir, req); |
339 | 741 | ||
340 | return status; | 742 | return status; |
341 | } | 743 | } |
@@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode, | |||
389 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | 791 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) |
390 | status = ocfs2_info_handle_journal_size(inode, req); | 792 | status = ocfs2_info_handle_journal_size(inode, req); |
391 | break; | 793 | break; |
794 | case OCFS2_INFO_FREEINODE: | ||
795 | if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) | ||
796 | status = ocfs2_info_handle_freeinode(inode, req); | ||
797 | break; | ||
798 | case OCFS2_INFO_FREEFRAG: | ||
799 | if (oir.ir_size == sizeof(struct ocfs2_info_freefrag)) | ||
800 | status = ocfs2_info_handle_freefrag(inode, req); | ||
801 | break; | ||
392 | default: | 802 | default: |
393 | status = ocfs2_info_handle_unknown(inode, req); | 803 | status = ocfs2_info_handle_unknown(inode, req); |
394 | break; | 804 | break; |
@@ -565,6 +975,8 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
565 | 975 | ||
566 | return 0; | 976 | return 0; |
567 | } | 977 | } |
978 | case OCFS2_IOC_MOVE_EXT: | ||
979 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | ||
568 | default: | 980 | default: |
569 | return -ENOTTY; | 981 | return -ENOTTY; |
570 | } | 982 | } |
@@ -608,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
608 | return -EFAULT; | 1020 | return -EFAULT; |
609 | 1021 | ||
610 | return ocfs2_info_handle(inode, &info, 1); | 1022 | return ocfs2_info_handle(inode, &info, 1); |
1023 | case OCFS2_IOC_MOVE_EXT: | ||
1024 | break; | ||
611 | default: | 1025 | default: |
612 | return -ENOIOCTLCMD; | 1026 | return -ENOIOCTLCMD; |
613 | } | 1027 | } |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c new file mode 100644 index 000000000000..4c5488468c14 --- /dev/null +++ b/fs/ocfs2/move_extents.c | |||
@@ -0,0 +1,1153 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * move_extents.c | ||
5 | * | ||
6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License version 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | */ | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/mount.h> | ||
20 | #include <linux/swap.h> | ||
21 | |||
22 | #include <cluster/masklog.h> | ||
23 | |||
24 | #include "ocfs2.h" | ||
25 | #include "ocfs2_ioctl.h" | ||
26 | |||
27 | #include "alloc.h" | ||
28 | #include "aops.h" | ||
29 | #include "dlmglue.h" | ||
30 | #include "extent_map.h" | ||
31 | #include "inode.h" | ||
32 | #include "journal.h" | ||
33 | #include "suballoc.h" | ||
34 | #include "uptodate.h" | ||
35 | #include "super.h" | ||
36 | #include "dir.h" | ||
37 | #include "buffer_head_io.h" | ||
38 | #include "sysfile.h" | ||
39 | #include "suballoc.h" | ||
40 | #include "refcounttree.h" | ||
41 | #include "move_extents.h" | ||
42 | |||
43 | struct ocfs2_move_extents_context { | ||
44 | struct inode *inode; | ||
45 | struct file *file; | ||
46 | int auto_defrag; | ||
47 | int partial; | ||
48 | int credits; | ||
49 | u32 new_phys_cpos; | ||
50 | u32 clusters_moved; | ||
51 | u64 refcount_loc; | ||
52 | struct ocfs2_move_extents *range; | ||
53 | struct ocfs2_extent_tree et; | ||
54 | struct ocfs2_alloc_context *meta_ac; | ||
55 | struct ocfs2_alloc_context *data_ac; | ||
56 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
57 | }; | ||
58 | |||
59 | static int __ocfs2_move_extent(handle_t *handle, | ||
60 | struct ocfs2_move_extents_context *context, | ||
61 | u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, | ||
62 | int ext_flags) | ||
63 | { | ||
64 | int ret = 0, index; | ||
65 | struct inode *inode = context->inode; | ||
66 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
67 | struct ocfs2_extent_rec *rec, replace_rec; | ||
68 | struct ocfs2_path *path = NULL; | ||
69 | struct ocfs2_extent_list *el; | ||
70 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); | ||
71 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); | ||
72 | |||
73 | ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, | ||
74 | p_cpos, new_p_cpos, len); | ||
75 | if (ret) { | ||
76 | mlog_errno(ret); | ||
77 | goto out; | ||
78 | } | ||
79 | |||
80 | memset(&replace_rec, 0, sizeof(replace_rec)); | ||
81 | replace_rec.e_cpos = cpu_to_le32(cpos); | ||
82 | replace_rec.e_leaf_clusters = cpu_to_le16(len); | ||
83 | replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, | ||
84 | new_p_cpos)); | ||
85 | |||
86 | path = ocfs2_new_path_from_et(&context->et); | ||
87 | if (!path) { | ||
88 | ret = -ENOMEM; | ||
89 | mlog_errno(ret); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); | ||
94 | if (ret) { | ||
95 | mlog_errno(ret); | ||
96 | goto out; | ||
97 | } | ||
98 | |||
99 | el = path_leaf_el(path); | ||
100 | |||
101 | index = ocfs2_search_extent_list(el, cpos); | ||
102 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
103 | ocfs2_error(inode->i_sb, | ||
104 | "Inode %llu has an extent at cpos %u which can no " | ||
105 | "longer be found.\n", | ||
106 | (unsigned long long)ino, cpos); | ||
107 | ret = -EROFS; | ||
108 | goto out; | ||
109 | } | ||
110 | |||
111 | rec = &el->l_recs[index]; | ||
112 | |||
113 | BUG_ON(ext_flags != rec->e_flags); | ||
114 | /* | ||
115 | * after moving/defraging to new location, the extent is not going | ||
116 | * to be refcounted anymore. | ||
117 | */ | ||
118 | replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; | ||
119 | |||
120 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), | ||
121 | context->et.et_root_bh, | ||
122 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
123 | if (ret) { | ||
124 | mlog_errno(ret); | ||
125 | goto out; | ||
126 | } | ||
127 | |||
128 | ret = ocfs2_split_extent(handle, &context->et, path, index, | ||
129 | &replace_rec, context->meta_ac, | ||
130 | &context->dealloc); | ||
131 | if (ret) { | ||
132 | mlog_errno(ret); | ||
133 | goto out; | ||
134 | } | ||
135 | |||
136 | ocfs2_journal_dirty(handle, context->et.et_root_bh); | ||
137 | |||
138 | context->new_phys_cpos = new_p_cpos; | ||
139 | |||
140 | /* | ||
141 | * need I to append truncate log for old clusters? | ||
142 | */ | ||
143 | if (old_blkno) { | ||
144 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
145 | ret = ocfs2_decrease_refcount(inode, handle, | ||
146 | ocfs2_blocks_to_clusters(osb->sb, | ||
147 | old_blkno), | ||
148 | len, context->meta_ac, | ||
149 | &context->dealloc, 1); | ||
150 | else | ||
151 | ret = ocfs2_truncate_log_append(osb, handle, | ||
152 | old_blkno, len); | ||
153 | } | ||
154 | |||
155 | out: | ||
156 | return ret; | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * lock allocators, and reserving appropriate number of bits for | ||
161 | * meta blocks and data clusters. | ||
162 | * | ||
163 | * in some cases, we don't need to reserve clusters, just let data_ac | ||
164 | * be NULL. | ||
165 | */ | ||
166 | static int ocfs2_lock_allocators_move_extents(struct inode *inode, | ||
167 | struct ocfs2_extent_tree *et, | ||
168 | u32 clusters_to_move, | ||
169 | u32 extents_to_split, | ||
170 | struct ocfs2_alloc_context **meta_ac, | ||
171 | struct ocfs2_alloc_context **data_ac, | ||
172 | int extra_blocks, | ||
173 | int *credits) | ||
174 | { | ||
175 | int ret, num_free_extents; | ||
176 | unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; | ||
177 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
178 | |||
179 | num_free_extents = ocfs2_num_free_extents(osb, et); | ||
180 | if (num_free_extents < 0) { | ||
181 | ret = num_free_extents; | ||
182 | mlog_errno(ret); | ||
183 | goto out; | ||
184 | } | ||
185 | |||
186 | if (!num_free_extents || | ||
187 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) | ||
188 | extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); | ||
189 | |||
190 | ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); | ||
191 | if (ret) { | ||
192 | mlog_errno(ret); | ||
193 | goto out; | ||
194 | } | ||
195 | |||
196 | if (data_ac) { | ||
197 | ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); | ||
198 | if (ret) { | ||
199 | mlog_errno(ret); | ||
200 | goto out; | ||
201 | } | ||
202 | } | ||
203 | |||
204 | *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, | ||
205 | clusters_to_move + 2); | ||
206 | |||
207 | mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", | ||
208 | extra_blocks, clusters_to_move, *credits); | ||
209 | out: | ||
210 | if (ret) { | ||
211 | if (*meta_ac) { | ||
212 | ocfs2_free_alloc_context(*meta_ac); | ||
213 | *meta_ac = NULL; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Using one journal handle to guarantee the data consistency in case | ||
222 | * crash happens anywhere. | ||
223 | * | ||
224 | * XXX: defrag can end up with finishing partial extent as requested, | ||
225 | * due to not enough contiguous clusters can be found in allocator. | ||
226 | */ | ||
227 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | ||
228 | u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) | ||
229 | { | ||
230 | int ret, credits = 0, extra_blocks = 0, partial = context->partial; | ||
231 | handle_t *handle; | ||
232 | struct inode *inode = context->inode; | ||
233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
234 | struct inode *tl_inode = osb->osb_tl_inode; | ||
235 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
236 | u32 new_phys_cpos, new_len; | ||
237 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
238 | |||
239 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { | ||
240 | |||
241 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
242 | OCFS2_HAS_REFCOUNT_FL)); | ||
243 | |||
244 | BUG_ON(!context->refcount_loc); | ||
245 | |||
246 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
247 | &ref_tree, NULL); | ||
248 | if (ret) { | ||
249 | mlog_errno(ret); | ||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
254 | context->refcount_loc, | ||
255 | phys_blkno, | ||
256 | *len, | ||
257 | &credits, | ||
258 | &extra_blocks); | ||
259 | if (ret) { | ||
260 | mlog_errno(ret); | ||
261 | goto out; | ||
262 | } | ||
263 | } | ||
264 | |||
265 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, | ||
266 | &context->meta_ac, | ||
267 | &context->data_ac, | ||
268 | extra_blocks, &credits); | ||
269 | if (ret) { | ||
270 | mlog_errno(ret); | ||
271 | goto out; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * should be using allocation reservation strategy there? | ||
276 | * | ||
277 | * if (context->data_ac) | ||
278 | * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; | ||
279 | */ | ||
280 | |||
281 | mutex_lock(&tl_inode->i_mutex); | ||
282 | |||
283 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
284 | ret = __ocfs2_flush_truncate_log(osb); | ||
285 | if (ret < 0) { | ||
286 | mlog_errno(ret); | ||
287 | goto out_unlock_mutex; | ||
288 | } | ||
289 | } | ||
290 | |||
291 | handle = ocfs2_start_trans(osb, credits); | ||
292 | if (IS_ERR(handle)) { | ||
293 | ret = PTR_ERR(handle); | ||
294 | mlog_errno(ret); | ||
295 | goto out_unlock_mutex; | ||
296 | } | ||
297 | |||
298 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, | ||
299 | &new_phys_cpos, &new_len); | ||
300 | if (ret) { | ||
301 | mlog_errno(ret); | ||
302 | goto out_commit; | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * allowing partial extent moving is kind of 'pros and cons', it makes | ||
307 | * whole defragmentation less likely to fail, on the contrary, the bad | ||
308 | * thing is it may make the fs even more fragmented after moving, let | ||
309 | * userspace make a good decision here. | ||
310 | */ | ||
311 | if (new_len != *len) { | ||
312 | mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); | ||
313 | if (!partial) { | ||
314 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | ||
315 | ret = -ENOSPC; | ||
316 | goto out_commit; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, | ||
321 | phys_cpos, new_phys_cpos); | ||
322 | |||
323 | ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, | ||
324 | new_phys_cpos, ext_flags); | ||
325 | if (ret) | ||
326 | mlog_errno(ret); | ||
327 | |||
328 | if (partial && (new_len != *len)) | ||
329 | *len = new_len; | ||
330 | |||
331 | /* | ||
332 | * Here we should write the new page out first if we are | ||
333 | * in write-back mode. | ||
334 | */ | ||
335 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); | ||
336 | if (ret) | ||
337 | mlog_errno(ret); | ||
338 | |||
339 | out_commit: | ||
340 | ocfs2_commit_trans(osb, handle); | ||
341 | |||
342 | out_unlock_mutex: | ||
343 | mutex_unlock(&tl_inode->i_mutex); | ||
344 | |||
345 | if (context->data_ac) { | ||
346 | ocfs2_free_alloc_context(context->data_ac); | ||
347 | context->data_ac = NULL; | ||
348 | } | ||
349 | |||
350 | if (context->meta_ac) { | ||
351 | ocfs2_free_alloc_context(context->meta_ac); | ||
352 | context->meta_ac = NULL; | ||
353 | } | ||
354 | |||
355 | out: | ||
356 | if (ref_tree) | ||
357 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
358 | |||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * find the victim alloc group, where #blkno fits. | ||
364 | */ | ||
365 | static int ocfs2_find_victim_alloc_group(struct inode *inode, | ||
366 | u64 vict_blkno, | ||
367 | int type, int slot, | ||
368 | int *vict_bit, | ||
369 | struct buffer_head **ret_bh) | ||
370 | { | ||
371 | int ret, i, blocks_per_unit = 1; | ||
372 | u64 blkno; | ||
373 | char namebuf[40]; | ||
374 | |||
375 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
376 | struct buffer_head *ac_bh = NULL, *gd_bh = NULL; | ||
377 | struct ocfs2_chain_list *cl; | ||
378 | struct ocfs2_chain_rec *rec; | ||
379 | struct ocfs2_dinode *ac_dinode; | ||
380 | struct ocfs2_group_desc *bg; | ||
381 | |||
382 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); | ||
383 | ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, | ||
384 | strlen(namebuf), &blkno); | ||
385 | if (ret) { | ||
386 | ret = -ENOENT; | ||
387 | goto out; | ||
388 | } | ||
389 | |||
390 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); | ||
391 | if (ret) { | ||
392 | mlog_errno(ret); | ||
393 | goto out; | ||
394 | } | ||
395 | |||
396 | ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; | ||
397 | cl = &(ac_dinode->id2.i_chain); | ||
398 | rec = &(cl->cl_recs[0]); | ||
399 | |||
400 | if (type == GLOBAL_BITMAP_SYSTEM_INODE) | ||
401 | blocks_per_unit <<= (osb->s_clustersize_bits - | ||
402 | inode->i_sb->s_blocksize_bits); | ||
403 | /* | ||
404 | * 'vict_blkno' was out of the valid range. | ||
405 | */ | ||
406 | if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | ||
407 | (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) * | ||
408 | blocks_per_unit))) { | ||
409 | ret = -EINVAL; | ||
410 | goto out; | ||
411 | } | ||
412 | |||
413 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
414 | |||
415 | rec = &(cl->cl_recs[i]); | ||
416 | if (!rec) | ||
417 | continue; | ||
418 | |||
419 | bg = NULL; | ||
420 | |||
421 | do { | ||
422 | if (!bg) | ||
423 | blkno = le64_to_cpu(rec->c_blkno); | ||
424 | else | ||
425 | blkno = le64_to_cpu(bg->bg_next_group); | ||
426 | |||
427 | if (gd_bh) { | ||
428 | brelse(gd_bh); | ||
429 | gd_bh = NULL; | ||
430 | } | ||
431 | |||
432 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); | ||
433 | if (ret) { | ||
434 | mlog_errno(ret); | ||
435 | goto out; | ||
436 | } | ||
437 | |||
438 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
439 | |||
440 | if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + | ||
441 | le16_to_cpu(bg->bg_bits))) { | ||
442 | |||
443 | *ret_bh = gd_bh; | ||
444 | *vict_bit = (vict_blkno - blkno) / | ||
445 | blocks_per_unit; | ||
446 | mlog(0, "find the victim group: #%llu, " | ||
447 | "total_bits: %u, vict_bit: %u\n", | ||
448 | blkno, le16_to_cpu(bg->bg_bits), | ||
449 | *vict_bit); | ||
450 | goto out; | ||
451 | } | ||
452 | |||
453 | } while (le64_to_cpu(bg->bg_next_group)); | ||
454 | } | ||
455 | |||
456 | ret = -EINVAL; | ||
457 | out: | ||
458 | brelse(ac_bh); | ||
459 | |||
460 | /* | ||
461 | * caller has to release the gd_bh properly. | ||
462 | */ | ||
463 | return ret; | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * XXX: helper to validate and adjust moving goal. | ||
468 | */ | ||
469 | static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, | ||
470 | struct ocfs2_move_extents *range) | ||
471 | { | ||
472 | int ret, goal_bit = 0; | ||
473 | |||
474 | struct buffer_head *gd_bh = NULL; | ||
475 | struct ocfs2_group_desc *bg; | ||
476 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
477 | int c_to_b = 1 << (osb->s_clustersize_bits - | ||
478 | inode->i_sb->s_blocksize_bits); | ||
479 | |||
480 | /* | ||
481 | * validate goal sits within global_bitmap, and return the victim | ||
482 | * group desc | ||
483 | */ | ||
484 | ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, | ||
485 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
486 | OCFS2_INVALID_SLOT, | ||
487 | &goal_bit, &gd_bh); | ||
488 | if (ret) | ||
489 | goto out; | ||
490 | |||
491 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
492 | |||
493 | /* | ||
494 | * make goal become cluster aligned. | ||
495 | */ | ||
496 | if (range->me_goal % c_to_b) | ||
497 | range->me_goal = range->me_goal / c_to_b * c_to_b; | ||
498 | |||
499 | /* | ||
500 | * moving goal is not allowd to start with a group desc blok(#0 blk) | ||
501 | * let's compromise to the latter cluster. | ||
502 | */ | ||
503 | if (range->me_goal == le64_to_cpu(bg->bg_blkno)) | ||
504 | range->me_goal += c_to_b; | ||
505 | |||
506 | /* | ||
507 | * movement is not gonna cross two groups. | ||
508 | */ | ||
509 | if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < | ||
510 | range->me_len) { | ||
511 | ret = -EINVAL; | ||
512 | goto out; | ||
513 | } | ||
514 | /* | ||
515 | * more exact validations/adjustments will be performed later during | ||
516 | * moving operation for each extent range. | ||
517 | */ | ||
518 | mlog(0, "extents get ready to be moved to #%llu block\n", | ||
519 | range->me_goal); | ||
520 | |||
521 | out: | ||
522 | brelse(gd_bh); | ||
523 | |||
524 | return ret; | ||
525 | } | ||
526 | |||
527 | static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, | ||
528 | int *goal_bit, u32 move_len, u32 max_hop, | ||
529 | u32 *phys_cpos) | ||
530 | { | ||
531 | int i, used, last_free_bits = 0, base_bit = *goal_bit; | ||
532 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | ||
533 | u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
534 | le64_to_cpu(gd->bg_blkno)); | ||
535 | |||
536 | for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { | ||
537 | |||
538 | used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); | ||
539 | if (used) { | ||
540 | /* | ||
541 | * we even tried searching the free chunk by jumping | ||
542 | * a 'max_hop' distance, but still failed. | ||
543 | */ | ||
544 | if ((i - base_bit) > max_hop) { | ||
545 | *phys_cpos = 0; | ||
546 | break; | ||
547 | } | ||
548 | |||
549 | if (last_free_bits) | ||
550 | last_free_bits = 0; | ||
551 | |||
552 | continue; | ||
553 | } else | ||
554 | last_free_bits++; | ||
555 | |||
556 | if (last_free_bits == move_len) { | ||
557 | *goal_bit = i; | ||
558 | *phys_cpos = base_cpos + i; | ||
559 | break; | ||
560 | } | ||
561 | } | ||
562 | |||
563 | mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); | ||
564 | } | ||
565 | |||
566 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
567 | handle_t *handle, | ||
568 | struct buffer_head *di_bh, | ||
569 | u32 num_bits, | ||
570 | u16 chain) | ||
571 | { | ||
572 | int ret; | ||
573 | u32 tmp_used; | ||
574 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
575 | struct ocfs2_chain_list *cl = | ||
576 | (struct ocfs2_chain_list *) &di->id2.i_chain; | ||
577 | |||
578 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
579 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
580 | if (ret < 0) { | ||
581 | mlog_errno(ret); | ||
582 | goto out; | ||
583 | } | ||
584 | |||
585 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
586 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | ||
587 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | ||
588 | ocfs2_journal_dirty(handle, di_bh); | ||
589 | |||
590 | out: | ||
591 | return ret; | ||
592 | } | ||
593 | |||
594 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | ||
595 | struct inode *alloc_inode, | ||
596 | struct ocfs2_group_desc *bg, | ||
597 | struct buffer_head *group_bh, | ||
598 | unsigned int bit_off, | ||
599 | unsigned int num_bits) | ||
600 | { | ||
601 | int status; | ||
602 | void *bitmap = bg->bg_bitmap; | ||
603 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
604 | |||
605 | /* All callers get the descriptor via | ||
606 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ | ||
607 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); | ||
608 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); | ||
609 | |||
610 | mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, | ||
611 | num_bits); | ||
612 | |||
613 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | ||
614 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
615 | |||
616 | status = ocfs2_journal_access_gd(handle, | ||
617 | INODE_CACHE(alloc_inode), | ||
618 | group_bh, | ||
619 | journal_type); | ||
620 | if (status < 0) { | ||
621 | mlog_errno(status); | ||
622 | goto bail; | ||
623 | } | ||
624 | |||
625 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | ||
626 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
627 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
628 | " count %u but claims %u are freed. num_bits %d", | ||
629 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
630 | le16_to_cpu(bg->bg_bits), | ||
631 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
632 | return -EROFS; | ||
633 | } | ||
634 | while (num_bits--) | ||
635 | ocfs2_set_bit(bit_off++, bitmap); | ||
636 | |||
637 | ocfs2_journal_dirty(handle, group_bh); | ||
638 | |||
639 | bail: | ||
640 | return status; | ||
641 | } | ||
642 | |||
643 | static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | ||
644 | u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, | ||
645 | u32 len, int ext_flags) | ||
646 | { | ||
647 | int ret, credits = 0, extra_blocks = 0, goal_bit = 0; | ||
648 | handle_t *handle; | ||
649 | struct inode *inode = context->inode; | ||
650 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
651 | struct inode *tl_inode = osb->osb_tl_inode; | ||
652 | struct inode *gb_inode = NULL; | ||
653 | struct buffer_head *gb_bh = NULL; | ||
654 | struct buffer_head *gd_bh = NULL; | ||
655 | struct ocfs2_group_desc *gd; | ||
656 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
657 | u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, | ||
658 | context->range->me_threshold); | ||
659 | u64 phys_blkno, new_phys_blkno; | ||
660 | |||
661 | phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
662 | |||
663 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { | ||
664 | |||
665 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
666 | OCFS2_HAS_REFCOUNT_FL)); | ||
667 | |||
668 | BUG_ON(!context->refcount_loc); | ||
669 | |||
670 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
671 | &ref_tree, NULL); | ||
672 | if (ret) { | ||
673 | mlog_errno(ret); | ||
674 | return ret; | ||
675 | } | ||
676 | |||
677 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
678 | context->refcount_loc, | ||
679 | phys_blkno, | ||
680 | len, | ||
681 | &credits, | ||
682 | &extra_blocks); | ||
683 | if (ret) { | ||
684 | mlog_errno(ret); | ||
685 | goto out; | ||
686 | } | ||
687 | } | ||
688 | |||
689 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, | ||
690 | &context->meta_ac, | ||
691 | NULL, extra_blocks, &credits); | ||
692 | if (ret) { | ||
693 | mlog_errno(ret); | ||
694 | goto out; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * need to count 2 extra credits for global_bitmap inode and | ||
699 | * group descriptor. | ||
700 | */ | ||
701 | credits += OCFS2_INODE_UPDATE_CREDITS + 1; | ||
702 | |||
703 | /* | ||
704 | * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() | ||
705 | * logic, while we still need to lock the global_bitmap. | ||
706 | */ | ||
707 | gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, | ||
708 | OCFS2_INVALID_SLOT); | ||
709 | if (!gb_inode) { | ||
710 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
711 | ret = -EIO; | ||
712 | goto out; | ||
713 | } | ||
714 | |||
715 | mutex_lock(&gb_inode->i_mutex); | ||
716 | |||
717 | ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); | ||
718 | if (ret) { | ||
719 | mlog_errno(ret); | ||
720 | goto out_unlock_gb_mutex; | ||
721 | } | ||
722 | |||
723 | mutex_lock(&tl_inode->i_mutex); | ||
724 | |||
725 | handle = ocfs2_start_trans(osb, credits); | ||
726 | if (IS_ERR(handle)) { | ||
727 | ret = PTR_ERR(handle); | ||
728 | mlog_errno(ret); | ||
729 | goto out_unlock_tl_inode; | ||
730 | } | ||
731 | |||
732 | new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); | ||
733 | ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, | ||
734 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
735 | OCFS2_INVALID_SLOT, | ||
736 | &goal_bit, &gd_bh); | ||
737 | if (ret) { | ||
738 | mlog_errno(ret); | ||
739 | goto out_commit; | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * probe the victim cluster group to find a proper | ||
744 | * region to fit wanted movement, it even will perfrom | ||
745 | * a best-effort attempt by compromising to a threshold | ||
746 | * around the goal. | ||
747 | */ | ||
748 | ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, | ||
749 | new_phys_cpos); | ||
750 | if (!new_phys_cpos) { | ||
751 | ret = -ENOSPC; | ||
752 | goto out_commit; | ||
753 | } | ||
754 | |||
755 | ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, | ||
756 | *new_phys_cpos, ext_flags); | ||
757 | if (ret) { | ||
758 | mlog_errno(ret); | ||
759 | goto out_commit; | ||
760 | } | ||
761 | |||
762 | gd = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
763 | ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, | ||
764 | le16_to_cpu(gd->bg_chain)); | ||
765 | if (ret) { | ||
766 | mlog_errno(ret); | ||
767 | goto out_commit; | ||
768 | } | ||
769 | |||
770 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, | ||
771 | goal_bit, len); | ||
772 | if (ret) | ||
773 | mlog_errno(ret); | ||
774 | |||
775 | /* | ||
776 | * Here we should write the new page out first if we are | ||
777 | * in write-back mode. | ||
778 | */ | ||
779 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); | ||
780 | if (ret) | ||
781 | mlog_errno(ret); | ||
782 | |||
783 | out_commit: | ||
784 | ocfs2_commit_trans(osb, handle); | ||
785 | brelse(gd_bh); | ||
786 | |||
787 | out_unlock_tl_inode: | ||
788 | mutex_unlock(&tl_inode->i_mutex); | ||
789 | |||
790 | ocfs2_inode_unlock(gb_inode, 1); | ||
791 | out_unlock_gb_mutex: | ||
792 | mutex_unlock(&gb_inode->i_mutex); | ||
793 | brelse(gb_bh); | ||
794 | iput(gb_inode); | ||
795 | |||
796 | out: | ||
797 | if (context->meta_ac) { | ||
798 | ocfs2_free_alloc_context(context->meta_ac); | ||
799 | context->meta_ac = NULL; | ||
800 | } | ||
801 | |||
802 | if (ref_tree) | ||
803 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
804 | |||
805 | return ret; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Helper to calculate the defraging length in one run according to threshold. | ||
810 | */ | ||
811 | static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, | ||
812 | u32 threshold, int *skip) | ||
813 | { | ||
814 | if ((*alloc_size + *len_defraged) < threshold) { | ||
815 | /* | ||
816 | * proceed defragmentation until we meet the thresh | ||
817 | */ | ||
818 | *len_defraged += *alloc_size; | ||
819 | } else if (*len_defraged == 0) { | ||
820 | /* | ||
821 | * XXX: skip a large extent. | ||
822 | */ | ||
823 | *skip = 1; | ||
824 | } else { | ||
825 | /* | ||
826 | * split this extent to coalesce with former pieces as | ||
827 | * to reach the threshold. | ||
828 | * | ||
829 | * we're done here with one cycle of defragmentation | ||
830 | * in a size of 'thresh', resetting 'len_defraged' | ||
831 | * forces a new defragmentation. | ||
832 | */ | ||
833 | *alloc_size = threshold - *len_defraged; | ||
834 | *len_defraged = 0; | ||
835 | } | ||
836 | } | ||
837 | |||
838 | static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | ||
839 | struct ocfs2_move_extents_context *context) | ||
840 | { | ||
841 | int ret = 0, flags, do_defrag, skip = 0; | ||
842 | u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; | ||
843 | u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; | ||
844 | |||
845 | struct inode *inode = context->inode; | ||
846 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
847 | struct ocfs2_move_extents *range = context->range; | ||
848 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
849 | |||
850 | if ((inode->i_size == 0) || (range->me_len == 0)) | ||
851 | return 0; | ||
852 | |||
853 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
854 | return 0; | ||
855 | |||
856 | context->refcount_loc = le64_to_cpu(di->i_refcount_loc); | ||
857 | |||
858 | ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); | ||
859 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
860 | |||
861 | /* | ||
862 | * TO-DO XXX: | ||
863 | * | ||
864 | * - xattr extents. | ||
865 | */ | ||
866 | |||
867 | do_defrag = context->auto_defrag; | ||
868 | |||
869 | /* | ||
870 | * extents moving happens in unit of clusters, for the sake | ||
871 | * of simplicity, we may ignore two clusters where 'byte_start' | ||
872 | * and 'byte_start + len' were within. | ||
873 | */ | ||
874 | move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); | ||
875 | len_to_move = (range->me_start + range->me_len) >> | ||
876 | osb->s_clustersize_bits; | ||
877 | if (len_to_move >= move_start) | ||
878 | len_to_move -= move_start; | ||
879 | else | ||
880 | len_to_move = 0; | ||
881 | |||
882 | if (do_defrag) { | ||
883 | defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; | ||
884 | if (defrag_thresh <= 1) | ||
885 | goto done; | ||
886 | } else | ||
887 | new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
888 | range->me_goal); | ||
889 | |||
890 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " | ||
891 | "thresh: %u\n", | ||
892 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
893 | (unsigned long long)range->me_start, | ||
894 | (unsigned long long)range->me_len, | ||
895 | move_start, len_to_move, defrag_thresh); | ||
896 | |||
897 | cpos = move_start; | ||
898 | while (len_to_move) { | ||
899 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, | ||
900 | &flags); | ||
901 | if (ret) { | ||
902 | mlog_errno(ret); | ||
903 | goto out; | ||
904 | } | ||
905 | |||
906 | if (alloc_size > len_to_move) | ||
907 | alloc_size = len_to_move; | ||
908 | |||
909 | /* | ||
910 | * XXX: how to deal with a hole: | ||
911 | * | ||
912 | * - skip the hole of course | ||
913 | * - force a new defragmentation | ||
914 | */ | ||
915 | if (!phys_cpos) { | ||
916 | if (do_defrag) | ||
917 | len_defraged = 0; | ||
918 | |||
919 | goto next; | ||
920 | } | ||
921 | |||
922 | if (do_defrag) { | ||
923 | ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, | ||
924 | defrag_thresh, &skip); | ||
925 | /* | ||
926 | * skip large extents | ||
927 | */ | ||
928 | if (skip) { | ||
929 | skip = 0; | ||
930 | goto next; | ||
931 | } | ||
932 | |||
933 | mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " | ||
934 | "alloc_size: %u, len_defraged: %u\n", | ||
935 | cpos, phys_cpos, alloc_size, len_defraged); | ||
936 | |||
937 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, | ||
938 | &alloc_size, flags); | ||
939 | } else { | ||
940 | ret = ocfs2_move_extent(context, cpos, phys_cpos, | ||
941 | &new_phys_cpos, alloc_size, | ||
942 | flags); | ||
943 | |||
944 | new_phys_cpos += alloc_size; | ||
945 | } | ||
946 | |||
947 | if (ret < 0) { | ||
948 | mlog_errno(ret); | ||
949 | goto out; | ||
950 | } | ||
951 | |||
952 | context->clusters_moved += alloc_size; | ||
953 | next: | ||
954 | cpos += alloc_size; | ||
955 | len_to_move -= alloc_size; | ||
956 | } | ||
957 | |||
958 | done: | ||
959 | range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; | ||
960 | |||
961 | out: | ||
962 | range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, | ||
963 | context->clusters_moved); | ||
964 | range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, | ||
965 | context->new_phys_cpos); | ||
966 | |||
967 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
968 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
969 | |||
970 | return ret; | ||
971 | } | ||
972 | |||
973 | static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | ||
974 | { | ||
975 | int status; | ||
976 | handle_t *handle; | ||
977 | struct inode *inode = context->inode; | ||
978 | struct ocfs2_dinode *di; | ||
979 | struct buffer_head *di_bh = NULL; | ||
980 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
981 | |||
982 | if (!inode) | ||
983 | return -ENOENT; | ||
984 | |||
985 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
986 | return -EROFS; | ||
987 | |||
988 | mutex_lock(&inode->i_mutex); | ||
989 | |||
990 | /* | ||
991 | * This prevents concurrent writes from other nodes | ||
992 | */ | ||
993 | status = ocfs2_rw_lock(inode, 1); | ||
994 | if (status) { | ||
995 | mlog_errno(status); | ||
996 | goto out; | ||
997 | } | ||
998 | |||
999 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
1000 | if (status) { | ||
1001 | mlog_errno(status); | ||
1002 | goto out_rw_unlock; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * rememer ip_xattr_sem also needs to be held if necessary | ||
1007 | */ | ||
1008 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1009 | |||
1010 | status = __ocfs2_move_extents_range(di_bh, context); | ||
1011 | |||
1012 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1013 | if (status) { | ||
1014 | mlog_errno(status); | ||
1015 | goto out_inode_unlock; | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * We update ctime for these changes | ||
1020 | */ | ||
1021 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1022 | if (IS_ERR(handle)) { | ||
1023 | status = PTR_ERR(handle); | ||
1024 | mlog_errno(status); | ||
1025 | goto out_inode_unlock; | ||
1026 | } | ||
1027 | |||
1028 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
1029 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1030 | if (status) { | ||
1031 | mlog_errno(status); | ||
1032 | goto out_commit; | ||
1033 | } | ||
1034 | |||
1035 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1036 | inode->i_ctime = CURRENT_TIME; | ||
1037 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
1038 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
1039 | |||
1040 | ocfs2_journal_dirty(handle, di_bh); | ||
1041 | |||
1042 | out_commit: | ||
1043 | ocfs2_commit_trans(osb, handle); | ||
1044 | |||
1045 | out_inode_unlock: | ||
1046 | brelse(di_bh); | ||
1047 | ocfs2_inode_unlock(inode, 1); | ||
1048 | out_rw_unlock: | ||
1049 | ocfs2_rw_unlock(inode, 1); | ||
1050 | out: | ||
1051 | mutex_unlock(&inode->i_mutex); | ||
1052 | |||
1053 | return status; | ||
1054 | } | ||
1055 | |||
1056 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | ||
1057 | { | ||
1058 | int status; | ||
1059 | |||
1060 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
1061 | struct ocfs2_move_extents range; | ||
1062 | struct ocfs2_move_extents_context *context = NULL; | ||
1063 | |||
1064 | status = mnt_want_write(filp->f_path.mnt); | ||
1065 | if (status) | ||
1066 | return status; | ||
1067 | |||
1068 | if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) | ||
1069 | goto out; | ||
1070 | |||
1071 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
1072 | status = -EPERM; | ||
1073 | goto out; | ||
1074 | } | ||
1075 | |||
1076 | context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); | ||
1077 | if (!context) { | ||
1078 | status = -ENOMEM; | ||
1079 | mlog_errno(status); | ||
1080 | goto out; | ||
1081 | } | ||
1082 | |||
1083 | context->inode = inode; | ||
1084 | context->file = filp; | ||
1085 | |||
1086 | if (argp) { | ||
1087 | if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, | ||
1088 | sizeof(range))) { | ||
1089 | status = -EFAULT; | ||
1090 | goto out; | ||
1091 | } | ||
1092 | } else { | ||
1093 | status = -EINVAL; | ||
1094 | goto out; | ||
1095 | } | ||
1096 | |||
1097 | if (range.me_start > i_size_read(inode)) | ||
1098 | goto out; | ||
1099 | |||
1100 | if (range.me_start + range.me_len > i_size_read(inode)) | ||
1101 | range.me_len = i_size_read(inode) - range.me_start; | ||
1102 | |||
1103 | context->range = ⦥ | ||
1104 | |||
1105 | if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { | ||
1106 | context->auto_defrag = 1; | ||
1107 | /* | ||
1108 | * ok, the default theshold for the defragmentation | ||
1109 | * is 1M, since our maximum clustersize was 1M also. | ||
1110 | * any thought? | ||
1111 | */ | ||
1112 | if (!range.me_threshold) | ||
1113 | range.me_threshold = 1024 * 1024; | ||
1114 | |||
1115 | if (range.me_threshold > i_size_read(inode)) | ||
1116 | range.me_threshold = i_size_read(inode); | ||
1117 | |||
1118 | if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) | ||
1119 | context->partial = 1; | ||
1120 | } else { | ||
1121 | /* | ||
1122 | * first best-effort attempt to validate and adjust the goal | ||
1123 | * (physical address in block), while it can't guarantee later | ||
1124 | * operation can succeed all the time since global_bitmap may | ||
1125 | * change a bit over time. | ||
1126 | */ | ||
1127 | |||
1128 | status = ocfs2_validate_and_adjust_move_goal(inode, &range); | ||
1129 | if (status) | ||
1130 | goto out; | ||
1131 | } | ||
1132 | |||
1133 | status = ocfs2_move_extents(context); | ||
1134 | if (status) | ||
1135 | mlog_errno(status); | ||
1136 | out: | ||
1137 | /* | ||
1138 | * movement/defragmentation may end up being partially completed, | ||
1139 | * that's the reason why we need to return userspace the finished | ||
1140 | * length and new_offset even if failure happens somewhere. | ||
1141 | */ | ||
1142 | if (argp) { | ||
1143 | if (copy_to_user((struct ocfs2_move_extents *)argp, &range, | ||
1144 | sizeof(range))) | ||
1145 | status = -EFAULT; | ||
1146 | } | ||
1147 | |||
1148 | kfree(context); | ||
1149 | |||
1150 | mnt_drop_write(filp->f_path.mnt); | ||
1151 | |||
1152 | return status; | ||
1153 | } | ||
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h new file mode 100644 index 000000000000..4e143e811441 --- /dev/null +++ b/fs/ocfs2/move_extents.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * move_extents.h | ||
5 | * | ||
6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public | ||
10 | * License version 2 as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * General Public License for more details. | ||
16 | */ | ||
17 | #ifndef OCFS2_MOVE_EXTENTS_H | ||
18 | #define OCFS2_MOVE_EXTENTS_H | ||
19 | |||
20 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp); | ||
21 | |||
22 | #endif /* OCFS2_MOVE_EXTENTS_H */ | ||
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index b46f39bf7438..5b27ff1fa577 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -142,6 +142,38 @@ struct ocfs2_info_journal_size { | |||
142 | __u64 ij_journal_size; | 142 | __u64 ij_journal_size; |
143 | }; | 143 | }; |
144 | 144 | ||
145 | struct ocfs2_info_freeinode { | ||
146 | struct ocfs2_info_request ifi_req; | ||
147 | struct ocfs2_info_local_freeinode { | ||
148 | __u64 lfi_total; | ||
149 | __u64 lfi_free; | ||
150 | } ifi_stat[OCFS2_MAX_SLOTS]; | ||
151 | __u32 ifi_slotnum; /* out */ | ||
152 | __u32 ifi_pad; | ||
153 | }; | ||
154 | |||
155 | #define OCFS2_INFO_MAX_HIST (32) | ||
156 | |||
157 | struct ocfs2_info_freefrag { | ||
158 | struct ocfs2_info_request iff_req; | ||
159 | struct ocfs2_info_freefrag_stats { /* (out) */ | ||
160 | struct ocfs2_info_free_chunk_list { | ||
161 | __u32 fc_chunks[OCFS2_INFO_MAX_HIST]; | ||
162 | __u32 fc_clusters[OCFS2_INFO_MAX_HIST]; | ||
163 | } ffs_fc_hist; | ||
164 | __u32 ffs_clusters; | ||
165 | __u32 ffs_free_clusters; | ||
166 | __u32 ffs_free_chunks; | ||
167 | __u32 ffs_free_chunks_real; | ||
168 | __u32 ffs_min; /* Minimum free chunksize in clusters */ | ||
169 | __u32 ffs_max; | ||
170 | __u32 ffs_avg; | ||
171 | __u32 ffs_pad; | ||
172 | } iff_ffs; | ||
173 | __u32 iff_chunksize; /* chunksize in clusters(in) */ | ||
174 | __u32 iff_pad; | ||
175 | }; | ||
176 | |||
145 | /* Codes for ocfs2_info_request */ | 177 | /* Codes for ocfs2_info_request */ |
146 | enum ocfs2_info_type { | 178 | enum ocfs2_info_type { |
147 | OCFS2_INFO_CLUSTERSIZE = 1, | 179 | OCFS2_INFO_CLUSTERSIZE = 1, |
@@ -151,6 +183,8 @@ enum ocfs2_info_type { | |||
151 | OCFS2_INFO_UUID, | 183 | OCFS2_INFO_UUID, |
152 | OCFS2_INFO_FS_FEATURES, | 184 | OCFS2_INFO_FS_FEATURES, |
153 | OCFS2_INFO_JOURNAL_SIZE, | 185 | OCFS2_INFO_JOURNAL_SIZE, |
186 | OCFS2_INFO_FREEINODE, | ||
187 | OCFS2_INFO_FREEFRAG, | ||
154 | OCFS2_INFO_NUM_TYPES | 188 | OCFS2_INFO_NUM_TYPES |
155 | }; | 189 | }; |
156 | 190 | ||
@@ -171,4 +205,38 @@ enum ocfs2_info_type { | |||
171 | 205 | ||
172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | 206 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) |
173 | 207 | ||
208 | struct ocfs2_move_extents { | ||
209 | /* All values are in bytes */ | ||
210 | /* in */ | ||
211 | __u64 me_start; /* Virtual start in the file to move */ | ||
212 | __u64 me_len; /* Length of the extents to be moved */ | ||
213 | __u64 me_goal; /* Physical offset of the goal, | ||
214 | it's in block unit */ | ||
215 | __u64 me_threshold; /* Maximum distance from goal or threshold | ||
216 | for auto defragmentation */ | ||
217 | __u64 me_flags; /* Flags for the operation: | ||
218 | * - auto defragmentation. | ||
219 | * - refcount,xattr cases. | ||
220 | */ | ||
221 | /* out */ | ||
222 | __u64 me_moved_len; /* Moved/defraged length */ | ||
223 | __u64 me_new_offset; /* Resulting physical location */ | ||
224 | __u32 me_reserved[2]; /* Reserved for futhure */ | ||
225 | }; | ||
226 | |||
227 | #define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to | ||
228 | claim new clusters | ||
229 | as the goal place | ||
230 | for extents moving */ | ||
231 | #define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent | ||
232 | moving, is to make | ||
233 | movement less likely | ||
234 | to fail, may make fs | ||
235 | even more fragmented */ | ||
236 | #define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation | ||
237 | completely gets done. | ||
238 | */ | ||
239 | |||
240 | #define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents) | ||
241 | |||
174 | #endif /* OCFS2_IOCTL_H */ | 242 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3c7606cff1ab..ebfd3825f12a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -66,7 +66,7 @@ struct ocfs2_cow_context { | |||
66 | u32 *num_clusters, | 66 | u32 *num_clusters, |
67 | unsigned int *extent_flags); | 67 | unsigned int *extent_flags); |
68 | int (*cow_duplicate_clusters)(handle_t *handle, | 68 | int (*cow_duplicate_clusters)(handle_t *handle, |
69 | struct ocfs2_cow_context *context, | 69 | struct file *file, |
70 | u32 cpos, u32 old_cluster, | 70 | u32 cpos, u32 old_cluster, |
71 | u32 new_cluster, u32 new_len); | 71 | u32 new_cluster, u32 new_len); |
72 | }; | 72 | }; |
@@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | |||
2921 | return 0; | 2921 | return 0; |
2922 | } | 2922 | } |
2923 | 2923 | ||
2924 | static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 2924 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
2925 | struct ocfs2_cow_context *context, | 2925 | struct file *file, |
2926 | u32 cpos, u32 old_cluster, | 2926 | u32 cpos, u32 old_cluster, |
2927 | u32 new_cluster, u32 new_len) | 2927 | u32 new_cluster, u32 new_len) |
2928 | { | 2928 | { |
2929 | int ret = 0, partial; | 2929 | int ret = 0, partial; |
2930 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 2930 | struct inode *inode = file->f_path.dentry->d_inode; |
2931 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
2931 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 2932 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2933 | struct page *page; | 2934 | struct page *page; |
2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
2935 | unsigned int from, to, readahead_pages; | 2936 | unsigned int from, to, readahead_pages; |
2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = inode->i_mapping; |
2938 | 2939 | ||
2939 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, | 2940 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, |
2940 | new_cluster, new_len); | 2941 | new_cluster, new_len); |
@@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2948 | * We only duplicate pages until we reach the page contains i_size - 1. | 2949 | * We only duplicate pages until we reach the page contains i_size - 1. |
2949 | * So trim 'end' to i_size. | 2950 | * So trim 'end' to i_size. |
2950 | */ | 2951 | */ |
2951 | if (end > i_size_read(context->inode)) | 2952 | if (end > i_size_read(inode)) |
2952 | end = i_size_read(context->inode); | 2953 | end = i_size_read(inode); |
2953 | 2954 | ||
2954 | while (offset < end) { | 2955 | while (offset < end) { |
2955 | page_index = offset >> PAGE_CACHE_SHIFT; | 2956 | page_index = offset >> PAGE_CACHE_SHIFT; |
@@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2972 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2973 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
2974 | 2975 | ||
2975 | if (PageReadahead(page) && context->file) { | 2976 | if (PageReadahead(page)) { |
2976 | page_cache_async_readahead(mapping, | 2977 | page_cache_async_readahead(mapping, |
2977 | &context->file->f_ra, | 2978 | &file->f_ra, file, |
2978 | context->file, | ||
2979 | page, page_index, | 2979 | page, page_index, |
2980 | readahead_pages); | 2980 | readahead_pages); |
2981 | } | 2981 | } |
@@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2999 | } | 2999 | } |
3000 | } | 3000 | } |
3001 | 3001 | ||
3002 | ocfs2_map_and_dirty_page(context->inode, | 3002 | ocfs2_map_and_dirty_page(inode, handle, from, to, |
3003 | handle, from, to, | ||
3004 | page, 0, &new_block); | 3003 | page, 0, &new_block); |
3005 | mark_page_accessed(page); | 3004 | mark_page_accessed(page); |
3006 | unlock: | 3005 | unlock: |
@@ -3015,14 +3014,15 @@ unlock: | |||
3015 | return ret; | 3014 | return ret; |
3016 | } | 3015 | } |
3017 | 3016 | ||
3018 | static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 3017 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
3019 | struct ocfs2_cow_context *context, | 3018 | struct file *file, |
3020 | u32 cpos, u32 old_cluster, | 3019 | u32 cpos, u32 old_cluster, |
3021 | u32 new_cluster, u32 new_len) | 3020 | u32 new_cluster, u32 new_len) |
3022 | { | 3021 | { |
3023 | int ret = 0; | 3022 | int ret = 0; |
3024 | struct super_block *sb = context->inode->i_sb; | 3023 | struct inode *inode = file->f_path.dentry->d_inode; |
3025 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 3024 | struct super_block *sb = inode->i_sb; |
3025 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | 3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); |
3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); | 3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); |
3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
@@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle, | |||
3145 | 3145 | ||
3146 | /*If the old clusters is unwritten, no need to duplicate. */ | 3146 | /*If the old clusters is unwritten, no need to duplicate. */ |
3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
3148 | ret = context->cow_duplicate_clusters(handle, context, cpos, | 3148 | ret = context->cow_duplicate_clusters(handle, context->file, |
3149 | old, new, len); | 3149 | cpos, old, new, len); |
3150 | if (ret) { | 3150 | if (ret) { |
3151 | mlog_errno(ret); | 3151 | mlog_errno(ret); |
3152 | goto out; | 3152 | goto out; |
@@ -3162,22 +3162,22 @@ out: | |||
3162 | return ret; | 3162 | return ret; |
3163 | } | 3163 | } |
3164 | 3164 | ||
3165 | static int ocfs2_cow_sync_writeback(struct super_block *sb, | 3165 | int ocfs2_cow_sync_writeback(struct super_block *sb, |
3166 | struct ocfs2_cow_context *context, | 3166 | struct inode *inode, |
3167 | u32 cpos, u32 num_clusters) | 3167 | u32 cpos, u32 num_clusters) |
3168 | { | 3168 | { |
3169 | int ret = 0; | 3169 | int ret = 0; |
3170 | loff_t offset, end, map_end; | 3170 | loff_t offset, end, map_end; |
3171 | pgoff_t page_index; | 3171 | pgoff_t page_index; |
3172 | struct page *page; | 3172 | struct page *page; |
3173 | 3173 | ||
3174 | if (ocfs2_should_order_data(context->inode)) | 3174 | if (ocfs2_should_order_data(inode)) |
3175 | return 0; | 3175 | return 0; |
3176 | 3176 | ||
3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); | 3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); |
3179 | 3179 | ||
3180 | ret = filemap_fdatawrite_range(context->inode->i_mapping, | 3180 | ret = filemap_fdatawrite_range(inode->i_mapping, |
3181 | offset, end - 1); | 3181 | offset, end - 1); |
3182 | if (ret < 0) { | 3182 | if (ret < 0) { |
3183 | mlog_errno(ret); | 3183 | mlog_errno(ret); |
@@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
3190 | if (map_end > end) | 3190 | if (map_end > end) |
3191 | map_end = end; | 3191 | map_end = end; |
3192 | 3192 | ||
3193 | page = find_or_create_page(context->inode->i_mapping, | 3193 | page = find_or_create_page(inode->i_mapping, |
3194 | page_index, GFP_NOFS); | 3194 | page_index, GFP_NOFS); |
3195 | BUG_ON(!page); | 3195 | BUG_ON(!page); |
3196 | 3196 | ||
@@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, | |||
3349 | * in write-back mode. | 3349 | * in write-back mode. |
3350 | */ | 3350 | */ |
3351 | if (context->get_clusters == ocfs2_di_get_clusters) { | 3351 | if (context->get_clusters == ocfs2_di_get_clusters) { |
3352 | ret = ocfs2_cow_sync_writeback(sb, context, cpos, | 3352 | ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos, |
3353 | orig_num_clusters); | 3353 | orig_num_clusters); |
3354 | if (ret) | 3354 | if (ret) |
3355 | mlog_errno(ret); | 3355 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index c8ce46f7d8e3..7754608c83a4 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, | |||
84 | struct buffer_head *ref_root_bh, | 84 | struct buffer_head *ref_root_bh, |
85 | u32 cpos, u32 write_len, | 85 | u32 cpos, u32 write_len, |
86 | struct ocfs2_post_refcount *post); | 86 | struct ocfs2_post_refcount *post); |
87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | ||
88 | struct file *file, | ||
89 | u32 cpos, u32 old_cluster, | ||
90 | u32 new_cluster, u32 new_len); | ||
91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | ||
92 | struct file *file, | ||
93 | u32 cpos, u32 old_cluster, | ||
94 | u32 new_cluster, u32 new_len); | ||
95 | int ocfs2_cow_sync_writeback(struct super_block *sb, | ||
96 | struct inode *inode, | ||
97 | u32 cpos, u32 num_clusters); | ||
87 | int ocfs2_add_refcount_flag(struct inode *inode, | 98 | int ocfs2_add_refcount_flag(struct inode *inode, |
88 | struct ocfs2_extent_tree *data_et, | 99 | struct ocfs2_extent_tree *data_et, |
89 | struct ocfs2_caching_info *ref_ci, | 100 | struct ocfs2_caching_info *ref_ci, |