diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2012-08-01 06:24:18 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-08-01 06:24:18 -0400 |
commit | c83f6bf98dc1f1a194118b3830706cebbebda8c4 (patch) | |
tree | ea8fbd925584f784164617964a9f025bda16ed15 /include/linux | |
parent | 4638a83e8615de9c16c39dfed234951d0f468cf1 (diff) |
block: add partition resize function to blkpg ioctl
Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.
This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.
Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().
v2: Add CONFIG_LBDAF test to UP preempt case as suggested by Phillip.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Phillip Susi <psusi@ubuntu.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/blkpg.h | 1 | ||||
-rw-r--r-- | include/linux/genhd.h | 57 |
2 files changed, 58 insertions, 0 deletions
diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h index faf8a45af210..a8519446c111 100644 --- a/include/linux/blkpg.h +++ b/include/linux/blkpg.h | |||
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg { | |||
40 | /* The subfunctions (for the op field) */ | 40 | /* The subfunctions (for the op field) */ |
41 | #define BLKPG_ADD_PARTITION 1 | 41 | #define BLKPG_ADD_PARTITION 1 |
42 | #define BLKPG_DEL_PARTITION 2 | 42 | #define BLKPG_DEL_PARTITION 2 |
43 | #define BLKPG_RESIZE_PARTITION 3 | ||
43 | 44 | ||
44 | /* Sizes of name fields. Unused at present. */ | 45 | /* Sizes of name fields. Unused at present. */ |
45 | #define BLKPG_DEVNAMELTH 64 | 46 | #define BLKPG_DEVNAMELTH 64 |
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 017a7fb5a1fc..b88723b81b3d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h | |||
@@ -98,7 +98,13 @@ struct partition_meta_info { | |||
98 | 98 | ||
99 | struct hd_struct { | 99 | struct hd_struct { |
100 | sector_t start_sect; | 100 | sector_t start_sect; |
101 | /* | ||
102 | * nr_sects is protected by sequence counter. One might extend a | ||
103 | * partition while IO is happening to it and update of nr_sects | ||
104 | * can be non-atomic on 32bit machines with 64bit sector_t. | ||
105 | */ | ||
101 | sector_t nr_sects; | 106 | sector_t nr_sects; |
107 | seqcount_t nr_sects_seq; | ||
102 | sector_t alignment_offset; | 108 | sector_t alignment_offset; |
103 | unsigned int discard_alignment; | 109 | unsigned int discard_alignment; |
104 | struct device __dev; | 110 | struct device __dev; |
@@ -648,6 +654,57 @@ static inline void hd_struct_put(struct hd_struct *part) | |||
648 | __delete_partition(part); | 654 | __delete_partition(part); |
649 | } | 655 | } |
650 | 656 | ||
657 | /* | ||
658 | * Any access of part->nr_sects which is not protected by partition | ||
659 | * bd_mutex or gendisk bdev bd_mutex, should be done using this | ||
660 | * accessor function. | ||
661 | * | ||
662 | * Code written along the lines of i_size_read() and i_size_write(). | ||
663 | * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption | ||
664 | * on. | ||
665 | */ | ||
666 | static inline sector_t part_nr_sects_read(struct hd_struct *part) | ||
667 | { | ||
668 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | ||
669 | sector_t nr_sects; | ||
670 | unsigned seq; | ||
671 | do { | ||
672 | seq = read_seqcount_begin(&part->nr_sects_seq); | ||
673 | nr_sects = part->nr_sects; | ||
674 | } while (read_seqcount_retry(&part->nr_sects_seq, seq)); | ||
675 | return nr_sects; | ||
676 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | ||
677 | sector_t nr_sects; | ||
678 | |||
679 | preempt_disable(); | ||
680 | nr_sects = part->nr_sects; | ||
681 | preempt_enable(); | ||
682 | return nr_sects; | ||
683 | #else | ||
684 | return part->nr_sects; | ||
685 | #endif | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Should be called with mutex lock held (typically bd_mutex) of partition | ||
690 | * to provide mutual exlusion among writers otherwise seqcount might be | ||
691 | * left in wrong state leaving the readers spinning infinitely. | ||
692 | */ | ||
693 | static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) | ||
694 | { | ||
695 | #if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) | ||
696 | write_seqcount_begin(&part->nr_sects_seq); | ||
697 | part->nr_sects = size; | ||
698 | write_seqcount_end(&part->nr_sects_seq); | ||
699 | #elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) | ||
700 | preempt_disable(); | ||
701 | part->nr_sects = size; | ||
702 | preempt_enable(); | ||
703 | #else | ||
704 | part->nr_sects = size; | ||
705 | #endif | ||
706 | } | ||
707 | |||
651 | #else /* CONFIG_BLOCK */ | 708 | #else /* CONFIG_BLOCK */ |
652 | 709 | ||
653 | static inline void printk_all_partitions(void) { } | 710 | static inline void printk_all_partitions(void) { } |