aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2007-12-18 02:47:25 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2008-01-25 18:04:24 -0500
commit7909f2bf835376a20d6dbf853eb459a27566eba2 (patch)
treed256570ad048e27741970517929bc3c6d4c2b656
parentd659072f736837e56b6433d58e5315ad1d4d5ccf (diff)
[PATCH 2/2] ocfs2: Implement group add for online resize
This patch adds the ability for a userspace program to request that a properly formatted cluster group be added to the main allocation bitmap for an Ocfs2 file system. The request is made via an ioctl, OCFS2_IOC_GROUP_ADD. On a high level, this is similar to ext3, but we use a different ioctl as the structure which has to be passed through is different. During an online resize, tunefs.ocfs2 will format any new cluster groups which must be added to complete the resize, and call OCFS2_IOC_GROUP_ADD on each one. Kernel verifies that the core cluster group information is valid and then does the work of linking it into the global allocation bitmap. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/ioctl.c9
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2_fs.h12
-rw-r--r--fs/ocfs2/resize.c245
-rw-r--r--fs/ocfs2/resize.h1
5 files changed, 269 insertions, 1 deletions
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b74b24ecf0e4..7003d5820d79 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -119,6 +119,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
119 int new_clusters; 119 int new_clusters;
120 int status; 120 int status;
121 struct ocfs2_space_resv sr; 121 struct ocfs2_space_resv sr;
122 struct ocfs2_new_group_input input;
122 123
123 switch (cmd) { 124 switch (cmd) {
124 case OCFS2_IOC_GETFLAGS: 125 case OCFS2_IOC_GETFLAGS:
@@ -147,6 +148,12 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
147 return -EFAULT; 148 return -EFAULT;
148 149
149 return ocfs2_group_extend(inode, new_clusters); 150 return ocfs2_group_extend(inode, new_clusters);
151 case OCFS2_IOC_GROUP_ADD:
152 case OCFS2_IOC_GROUP_ADD64:
153 if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
154 return -EFAULT;
155
156 return ocfs2_group_add(inode, &input);
150 default: 157 default:
151 return -ENOTTY; 158 return -ENOTTY;
152 } 159 }
@@ -170,6 +177,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
170 case OCFS2_IOC_UNRESVSP: 177 case OCFS2_IOC_UNRESVSP:
171 case OCFS2_IOC_UNRESVSP64: 178 case OCFS2_IOC_UNRESVSP64:
172 case OCFS2_IOC_GROUP_EXTEND: 179 case OCFS2_IOC_GROUP_EXTEND:
180 case OCFS2_IOC_GROUP_ADD:
181 case OCFS2_IOC_GROUP_ADD64:
173 break; 182 break;
174 default: 183 default:
175 return -ENOIOCTLCMD; 184 return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 0ba3a421ccf2..220f3e818e78 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -281,6 +281,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
281/* group extend. inode update and last group update. */ 281/* group extend. inode update and last group update. */
282#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 282#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
283 283
284/* group add. inode update and the new group update. */
285#define OCFS2_GROUP_ADD_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
286
284/* get one bit out of a suballocator: dinode + group descriptor + 287/* get one bit out of a suballocator: dinode + group descriptor +
285 * prev. group desc. if we relink. */ 288 * prev. group desc. if we relink. */
286#define OCFS2_SUBALLOC_ALLOC (3) 289#define OCFS2_SUBALLOC_ALLOC (3)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 19ac421b613b..425551737f1f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -231,7 +231,19 @@ struct ocfs2_space_resv {
231#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) 231#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
232#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) 232#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
233 233
234/* Used to pass group descriptor data when online resize is done */
235struct ocfs2_new_group_input {
236 __u64 group; /* Group descriptor's blkno. */
237 __u32 clusters; /* Total number of clusters in this group */
238 __u32 frees; /* Total free clusters in this group */
239 __u16 chain; /* Chain for this group */
240 __u16 reserved1;
241 __u32 reserved2;
242};
243
234#define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int) 244#define OCFS2_IOC_GROUP_EXTEND _IOW('o', 1, int)
245#define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input)
246#define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input)
235 247
236/* 248/*
237 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 249 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 848f7293f4fc..7791309bb258 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -356,7 +356,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
356 } 356 }
357 357
358 mlog(0, "extend the last group at %llu, new clusters = %d\n", 358 mlog(0, "extend the last group at %llu, new clusters = %d\n",
359 le64_to_cpu(group->bg_blkno), new_clusters); 359 (unsigned long long)le64_to_cpu(group->bg_blkno), new_clusters);
360 360
361 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); 361 handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS);
362 if (IS_ERR(handle)) { 362 if (IS_ERR(handle)) {
@@ -396,3 +396,246 @@ out:
396 mlog_exit_void(); 396 mlog_exit_void();
397 return ret; 397 return ret;
398} 398}
399
400static int ocfs2_check_new_group(struct inode *inode,
401 struct ocfs2_dinode *di,
402 struct ocfs2_new_group_input *input,
403 struct buffer_head *group_bh)
404{
405 int ret;
406 struct ocfs2_group_desc *gd;
407 u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
408 unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
409 le16_to_cpu(di->id2.i_chain.cl_bpc);
410
411
412 gd = (struct ocfs2_group_desc *)group_bh->b_data;
413
414 ret = -EIO;
415 if (!OCFS2_IS_VALID_GROUP_DESC(gd))
416 mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
417 (unsigned long long)le64_to_cpu(gd->bg_blkno));
418 else if (di->i_blkno != gd->bg_parent_dinode)
419 mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
420 "pointer (%llu, expected %llu)\n",
421 (unsigned long long)le64_to_cpu(gd->bg_blkno),
422 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
423 (unsigned long long)le64_to_cpu(di->i_blkno));
424 else if (le16_to_cpu(gd->bg_bits) > max_bits)
425 mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
426 (unsigned long long)le64_to_cpu(gd->bg_blkno),
427 le16_to_cpu(gd->bg_bits));
428 else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
429 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
430 "claims that %u are free\n",
431 (unsigned long long)le64_to_cpu(gd->bg_blkno),
432 le16_to_cpu(gd->bg_bits),
433 le16_to_cpu(gd->bg_free_bits_count));
434 else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
435 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
436 "max bitmap bits of %u\n",
437 (unsigned long long)le64_to_cpu(gd->bg_blkno),
438 le16_to_cpu(gd->bg_bits),
439 8 * le16_to_cpu(gd->bg_size));
440 else if (le16_to_cpu(gd->bg_chain) != input->chain)
441 mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
442 "while input has %u set.\n",
443 (unsigned long long)le64_to_cpu(gd->bg_blkno),
444 le16_to_cpu(gd->bg_chain), input->chain);
445 else if (le16_to_cpu(gd->bg_bits) != input->clusters * cl_bpc)
446 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
447 "input has %u clusters set\n",
448 (unsigned long long)le64_to_cpu(gd->bg_blkno),
449 le16_to_cpu(gd->bg_bits), input->clusters);
450 else if (le16_to_cpu(gd->bg_free_bits_count) != input->frees * cl_bpc)
451 mlog(ML_ERROR, "Group descriptor # %llu has free bit count %u "
452 "but it should have %u set\n",
453 (unsigned long long)le64_to_cpu(gd->bg_blkno),
454 le16_to_cpu(gd->bg_bits),
455 input->frees * cl_bpc);
456 else
457 ret = 0;
458
459 return ret;
460}
461
462static int ocfs2_verify_group_and_input(struct inode *inode,
463 struct ocfs2_dinode *di,
464 struct ocfs2_new_group_input *input,
465 struct buffer_head *group_bh)
466{
467 u16 cl_count = le16_to_cpu(di->id2.i_chain.cl_count);
468 u16 cl_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
469 u16 next_free = le16_to_cpu(di->id2.i_chain.cl_next_free_rec);
470 u32 cluster = ocfs2_blocks_to_clusters(inode->i_sb, input->group);
471 u32 total_clusters = le32_to_cpu(di->i_clusters);
472 int ret = -EINVAL;
473
474 if (cluster < total_clusters)
475 mlog(ML_ERROR, "add a group which is in the current volume.\n");
476 else if (input->chain >= cl_count)
477 mlog(ML_ERROR, "input chain exceeds the limit.\n");
478 else if (next_free != cl_count && next_free != input->chain)
479 mlog(ML_ERROR,
480 "the add group should be in chain %u\n", next_free);
481 else if (total_clusters + input->clusters < total_clusters)
482 mlog(ML_ERROR, "add group's clusters overflow.\n");
483 else if (input->clusters > cl_cpg)
484 mlog(ML_ERROR, "the cluster exceeds the maximum of a group\n");
485 else if (input->frees > input->clusters)
486 mlog(ML_ERROR, "the free cluster exceeds the total clusters\n");
487 else if (total_clusters % cl_cpg != 0)
488 mlog(ML_ERROR,
489 "the last group isn't full. Use group extend first.\n");
490 else if (input->group != ocfs2_which_cluster_group(inode, cluster))
491 mlog(ML_ERROR, "group blkno is invalid\n");
492 else if ((ret = ocfs2_check_new_group(inode, di, input, group_bh)))
493 mlog(ML_ERROR, "group descriptor check failed.\n");
494 else
495 ret = 0;
496
497 return ret;
498}
499
500/* Add a new group descriptor to global_bitmap. */
501int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
502{
503 int ret;
504 handle_t *handle;
505 struct buffer_head *main_bm_bh = NULL;
506 struct inode *main_bm_inode = NULL;
507 struct ocfs2_dinode *fe = NULL;
508 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
509 struct buffer_head *group_bh = NULL;
510 struct ocfs2_group_desc *group = NULL;
511 struct ocfs2_chain_list *cl;
512 struct ocfs2_chain_rec *cr;
513 u16 cl_bpc;
514
515 mlog_entry_void();
516
517 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
518 return -EROFS;
519
520 main_bm_inode = ocfs2_get_system_file_inode(osb,
521 GLOBAL_BITMAP_SYSTEM_INODE,
522 OCFS2_INVALID_SLOT);
523 if (!main_bm_inode) {
524 ret = -EINVAL;
525 mlog_errno(ret);
526 goto out;
527 }
528
529 mutex_lock(&main_bm_inode->i_mutex);
530
531 ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
532 if (ret < 0) {
533 mlog_errno(ret);
534 goto out_mutex;
535 }
536
537 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
538
539 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
540 ocfs2_group_bitmap_size(osb->sb) * 8) {
541 mlog(ML_ERROR, "The disk is too old and small."
542 " Force to do offline resize.");
543 ret = -EINVAL;
544 goto out_unlock;
545 }
546
547 ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL);
548 if (ret < 0) {
549 mlog(ML_ERROR, "Can't read the group descriptor # %llu "
550 "from the device.", (unsigned long long)input->group);
551 goto out_unlock;
552 }
553
554 ocfs2_set_new_buffer_uptodate(inode, group_bh);
555
556 ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
557 if (ret) {
558 mlog_errno(ret);
559 goto out_unlock;
560 }
561
562 mlog(0, "Add a new group %llu in chain = %u, length = %u\n",
563 (unsigned long long)input->group, input->chain, input->clusters);
564
565 handle = ocfs2_start_trans(osb, OCFS2_GROUP_ADD_CREDITS);
566 if (IS_ERR(handle)) {
567 mlog_errno(PTR_ERR(handle));
568 ret = -EINVAL;
569 goto out_unlock;
570 }
571
572 cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
573 cl = &fe->id2.i_chain;
574 cr = &cl->cl_recs[input->chain];
575
576 ret = ocfs2_journal_access(handle, main_bm_inode, group_bh,
577 OCFS2_JOURNAL_ACCESS_WRITE);
578 if (ret < 0) {
579 mlog_errno(ret);
580 goto out_commit;
581 }
582
583 group = (struct ocfs2_group_desc *)group_bh->b_data;
584 group->bg_next_group = cr->c_blkno;
585
586 ret = ocfs2_journal_dirty(handle, group_bh);
587 if (ret < 0) {
588 mlog_errno(ret);
589 goto out_commit;
590 }
591
592 ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh,
593 OCFS2_JOURNAL_ACCESS_WRITE);
594 if (ret < 0) {
595 mlog_errno(ret);
596 goto out_commit;
597 }
598
599 if (input->chain == le16_to_cpu(cl->cl_next_free_rec)) {
600 le16_add_cpu(&cl->cl_next_free_rec, 1);
601 memset(cr, 0, sizeof(struct ocfs2_chain_rec));
602 }
603
604 cr->c_blkno = le64_to_cpu(input->group);
605 le32_add_cpu(&cr->c_total, input->clusters * cl_bpc);
606 le32_add_cpu(&cr->c_free, input->frees * cl_bpc);
607
608 le32_add_cpu(&fe->id1.bitmap1.i_total, input->clusters *cl_bpc);
609 le32_add_cpu(&fe->id1.bitmap1.i_used,
610 (input->clusters - input->frees) * cl_bpc);
611 le32_add_cpu(&fe->i_clusters, input->clusters);
612
613 ocfs2_journal_dirty(handle, main_bm_bh);
614
615 spin_lock(&OCFS2_I(main_bm_inode)->ip_lock);
616 OCFS2_I(main_bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
617 le64_add_cpu(&fe->i_size, input->clusters << osb->s_clustersize_bits);
618 spin_unlock(&OCFS2_I(main_bm_inode)->ip_lock);
619 i_size_write(main_bm_inode, le64_to_cpu(fe->i_size));
620
621 ocfs2_update_super_and_backups(main_bm_inode, input->clusters);
622
623out_commit:
624 ocfs2_commit_trans(osb, handle);
625out_unlock:
626 if (group_bh)
627 brelse(group_bh);
628
629 if (main_bm_bh)
630 brelse(main_bm_bh);
631
632 ocfs2_inode_unlock(main_bm_inode, 1);
633
634out_mutex:
635 mutex_unlock(&main_bm_inode->i_mutex);
636 iput(main_bm_inode);
637
638out:
639 mlog_exit_void();
640 return ret;
641}
diff --git a/fs/ocfs2/resize.h b/fs/ocfs2/resize.h
index 3acb79af451b..f38841abf10b 100644
--- a/fs/ocfs2/resize.h
+++ b/fs/ocfs2/resize.h
@@ -27,5 +27,6 @@
27#define OCFS2_RESIZE_H 27#define OCFS2_RESIZE_H
28 28
29int ocfs2_group_extend(struct inode * inode, int new_clusters); 29int ocfs2_group_extend(struct inode * inode, int new_clusters);
30int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input);
30 31
31#endif /* OCFS2_RESIZE_H */ 32#endif /* OCFS2_RESIZE_H */