diff options
author | Andi Kleen <ak@suse.de> | 2008-07-24 00:27:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 13:47:17 -0400 |
commit | a137e1cc6d6e7d315fef03962a2a5a113348b13b (patch) | |
tree | b47e195c392abaa3640cc2f9187d99d58cee664a | |
parent | e5ff215941d59f8ae6bf58f6428dc5c26745a612 (diff) |
hugetlbfs: per mount huge page sizes
Add the ability to configure the hugetlb hstate used on a per mount basis.
- Add a new pagesize= option to the hugetlbfs mount that allows setting
the page size
- This option causes the mount code to find the hstate corresponding to the
specified size, and sets up a pointer to the hstate in the mount's
superblock.
- Change the hstate accessors to use this information rather than the
global_hstate they were using (requires a slight change in mm/memory.c
so we don't NULL deref in the error-unmap path -- see comments).
[np: take hstate out of hugetlbfs inode and vma->vm_private_data]
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/hugetlbfs/inode.c | 45 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 14 | ||||
-rw-r--r-- | mm/hugetlb.c | 16 | ||||
-rw-r--r-- | mm/memory.c | 18 |
4 files changed, 64 insertions, 29 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 516c581b5371..dbd01d262ca4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group; | |||
53 | enum { | 53 | enum { |
54 | Opt_size, Opt_nr_inodes, | 54 | Opt_size, Opt_nr_inodes, |
55 | Opt_mode, Opt_uid, Opt_gid, | 55 | Opt_mode, Opt_uid, Opt_gid, |
56 | Opt_pagesize, | ||
56 | Opt_err, | 57 | Opt_err, |
57 | }; | 58 | }; |
58 | 59 | ||
@@ -62,6 +63,7 @@ static match_table_t tokens = { | |||
62 | {Opt_mode, "mode=%o"}, | 63 | {Opt_mode, "mode=%o"}, |
63 | {Opt_uid, "uid=%u"}, | 64 | {Opt_uid, "uid=%u"}, |
64 | {Opt_gid, "gid=%u"}, | 65 | {Opt_gid, "gid=%u"}, |
66 | {Opt_pagesize, "pagesize=%s"}, | ||
65 | {Opt_err, NULL}, | 67 | {Opt_err, NULL}, |
66 | }; | 68 | }; |
67 | 69 | ||
@@ -750,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
750 | char *p, *rest; | 752 | char *p, *rest; |
751 | substring_t args[MAX_OPT_ARGS]; | 753 | substring_t args[MAX_OPT_ARGS]; |
752 | int option; | 754 | int option; |
755 | unsigned long long size = 0; | ||
756 | enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; | ||
753 | 757 | ||
754 | if (!options) | 758 | if (!options) |
755 | return 0; | 759 | return 0; |
@@ -780,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
780 | break; | 784 | break; |
781 | 785 | ||
782 | case Opt_size: { | 786 | case Opt_size: { |
783 | unsigned long long size; | ||
784 | /* memparse() will accept a K/M/G without a digit */ | 787 | /* memparse() will accept a K/M/G without a digit */ |
785 | if (!isdigit(*args[0].from)) | 788 | if (!isdigit(*args[0].from)) |
786 | goto bad_val; | 789 | goto bad_val; |
787 | size = memparse(args[0].from, &rest); | 790 | size = memparse(args[0].from, &rest); |
788 | if (*rest == '%') { | 791 | setsize = SIZE_STD; |
789 | size <<= HPAGE_SHIFT; | 792 | if (*rest == '%') |
790 | size *= max_huge_pages; | 793 | setsize = SIZE_PERCENT; |
791 | do_div(size, 100); | ||
792 | } | ||
793 | pconfig->nr_blocks = (size >> HPAGE_SHIFT); | ||
794 | break; | 794 | break; |
795 | } | 795 | } |
796 | 796 | ||
@@ -801,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
801 | pconfig->nr_inodes = memparse(args[0].from, &rest); | 801 | pconfig->nr_inodes = memparse(args[0].from, &rest); |
802 | break; | 802 | break; |
803 | 803 | ||
804 | case Opt_pagesize: { | ||
805 | unsigned long ps; | ||
806 | ps = memparse(args[0].from, &rest); | ||
807 | pconfig->hstate = size_to_hstate(ps); | ||
808 | if (!pconfig->hstate) { | ||
809 | printk(KERN_ERR | ||
810 | "hugetlbfs: Unsupported page size %lu MB\n", | ||
811 | ps >> 20); | ||
812 | return -EINVAL; | ||
813 | } | ||
814 | break; | ||
815 | } | ||
816 | |||
804 | default: | 817 | default: |
805 | printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", | 818 | printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", |
806 | p); | 819 | p); |
@@ -808,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
808 | break; | 821 | break; |
809 | } | 822 | } |
810 | } | 823 | } |
824 | |||
825 | /* Do size after hstate is set up */ | ||
826 | if (setsize > NO_SIZE) { | ||
827 | struct hstate *h = pconfig->hstate; | ||
828 | if (setsize == SIZE_PERCENT) { | ||
829 | size <<= huge_page_shift(h); | ||
830 | size *= h->max_huge_pages; | ||
831 | do_div(size, 100); | ||
832 | } | ||
833 | pconfig->nr_blocks = (size >> huge_page_shift(h)); | ||
834 | } | ||
835 | |||
811 | return 0; | 836 | return 0; |
812 | 837 | ||
813 | bad_val: | 838 | bad_val: |
@@ -832,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | |||
832 | config.uid = current->fsuid; | 857 | config.uid = current->fsuid; |
833 | config.gid = current->fsgid; | 858 | config.gid = current->fsgid; |
834 | config.mode = 0755; | 859 | config.mode = 0755; |
860 | config.hstate = &default_hstate; | ||
835 | ret = hugetlbfs_parse_options(data, &config); | 861 | ret = hugetlbfs_parse_options(data, &config); |
836 | if (ret) | 862 | if (ret) |
837 | return ret; | 863 | return ret; |
@@ -840,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) | |||
840 | if (!sbinfo) | 866 | if (!sbinfo) |
841 | return -ENOMEM; | 867 | return -ENOMEM; |
842 | sb->s_fs_info = sbinfo; | 868 | sb->s_fs_info = sbinfo; |
869 | sbinfo->hstate = config.hstate; | ||
843 | spin_lock_init(&sbinfo->stat_lock); | 870 | spin_lock_init(&sbinfo->stat_lock); |
844 | sbinfo->max_blocks = config.nr_blocks; | 871 | sbinfo->max_blocks = config.nr_blocks; |
845 | sbinfo->free_blocks = config.nr_blocks; | 872 | sbinfo->free_blocks = config.nr_blocks; |
846 | sbinfo->max_inodes = config.nr_inodes; | 873 | sbinfo->max_inodes = config.nr_inodes; |
847 | sbinfo->free_inodes = config.nr_inodes; | 874 | sbinfo->free_inodes = config.nr_inodes; |
848 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 875 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
849 | sb->s_blocksize = HPAGE_SIZE; | 876 | sb->s_blocksize = huge_page_size(config.hstate); |
850 | sb->s_blocksize_bits = HPAGE_SHIFT; | 877 | sb->s_blocksize_bits = huge_page_shift(config.hstate); |
851 | sb->s_magic = HUGETLBFS_MAGIC; | 878 | sb->s_magic = HUGETLBFS_MAGIC; |
852 | sb->s_op = &hugetlbfs_ops; | 879 | sb->s_op = &hugetlbfs_ops; |
853 | sb->s_time_gran = 1; | 880 | sb->s_time_gran = 1; |
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b75bdb4deba3..ba9263e631b9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -100,6 +100,7 @@ struct hugetlbfs_config { | |||
100 | umode_t mode; | 100 | umode_t mode; |
101 | long nr_blocks; | 101 | long nr_blocks; |
102 | long nr_inodes; | 102 | long nr_inodes; |
103 | struct hstate *hstate; | ||
103 | }; | 104 | }; |
104 | 105 | ||
105 | struct hugetlbfs_sb_info { | 106 | struct hugetlbfs_sb_info { |
@@ -108,6 +109,7 @@ struct hugetlbfs_sb_info { | |||
108 | long max_inodes; /* inodes allowed */ | 109 | long max_inodes; /* inodes allowed */ |
109 | long free_inodes; /* inodes free */ | 110 | long free_inodes; /* inodes free */ |
110 | spinlock_t stat_lock; | 111 | spinlock_t stat_lock; |
112 | struct hstate *hstate; | ||
111 | }; | 113 | }; |
112 | 114 | ||
113 | 115 | ||
@@ -191,19 +193,21 @@ extern unsigned int default_hstate_idx; | |||
191 | 193 | ||
192 | #define default_hstate (hstates[default_hstate_idx]) | 194 | #define default_hstate (hstates[default_hstate_idx]) |
193 | 195 | ||
194 | static inline struct hstate *hstate_vma(struct vm_area_struct *vma) | 196 | static inline struct hstate *hstate_inode(struct inode *i) |
195 | { | 197 | { |
196 | return &default_hstate; | 198 | struct hugetlbfs_sb_info *hsb; |
199 | hsb = HUGETLBFS_SB(i->i_sb); | ||
200 | return hsb->hstate; | ||
197 | } | 201 | } |
198 | 202 | ||
199 | static inline struct hstate *hstate_file(struct file *f) | 203 | static inline struct hstate *hstate_file(struct file *f) |
200 | { | 204 | { |
201 | return &default_hstate; | 205 | return hstate_inode(f->f_dentry->d_inode); |
202 | } | 206 | } |
203 | 207 | ||
204 | static inline struct hstate *hstate_inode(struct inode *i) | 208 | static inline struct hstate *hstate_vma(struct vm_area_struct *vma) |
205 | { | 209 | { |
206 | return &default_hstate; | 210 | return hstate_file(vma->vm_file); |
207 | } | 211 | } |
208 | 212 | ||
209 | static inline unsigned long huge_page_size(struct hstate *h) | 213 | static inline unsigned long huge_page_size(struct hstate *h) |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 82378d44a0c5..4cf7a90e9140 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -1439,19 +1439,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1439 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | 1439 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, |
1440 | unsigned long end, struct page *ref_page) | 1440 | unsigned long end, struct page *ref_page) |
1441 | { | 1441 | { |
1442 | /* | 1442 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); |
1443 | * It is undesirable to test vma->vm_file as it should be non-null | 1443 | __unmap_hugepage_range(vma, start, end, ref_page); |
1444 | * for valid hugetlb area. However, vm_file will be NULL in the error | 1444 | spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); |
1445 | * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, | ||
1446 | * do_mmap_pgoff() nullifies vma->vm_file before calling this function | ||
1447 | * to clean up. Since no pte has actually been setup, it is safe to | ||
1448 | * do nothing in this case. | ||
1449 | */ | ||
1450 | if (vma->vm_file) { | ||
1451 | spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
1452 | __unmap_hugepage_range(vma, start, end, ref_page); | ||
1453 | spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); | ||
1454 | } | ||
1455 | } | 1445 | } |
1456 | 1446 | ||
1457 | /* | 1447 | /* |
diff --git a/mm/memory.c b/mm/memory.c index c1c1d6d8c22b..02fc6b1047b0 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -901,9 +901,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
901 | } | 901 | } |
902 | 902 | ||
903 | if (unlikely(is_vm_hugetlb_page(vma))) { | 903 | if (unlikely(is_vm_hugetlb_page(vma))) { |
904 | unmap_hugepage_range(vma, start, end, NULL); | 904 | /* |
905 | zap_work -= (end - start) / | 905 | * It is undesirable to test vma->vm_file as it |
906 | * should be non-null for valid hugetlb area. | ||
907 | * However, vm_file will be NULL in the error | ||
908 | * cleanup path of do_mmap_pgoff. When | ||
909 | * hugetlbfs ->mmap method fails, | ||
910 | * do_mmap_pgoff() nullifies vma->vm_file | ||
911 | * before calling this function to clean up. | ||
912 | * Since no pte has actually been setup, it is | ||
913 | * safe to do nothing in this case. | ||
914 | */ | ||
915 | if (vma->vm_file) { | ||
916 | unmap_hugepage_range(vma, start, end, NULL); | ||
917 | zap_work -= (end - start) / | ||
906 | pages_per_huge_page(hstate_vma(vma)); | 918 | pages_per_huge_page(hstate_vma(vma)); |
919 | } | ||
920 | |||
907 | start = end; | 921 | start = end; |
908 | } else | 922 | } else |
909 | start = unmap_page_range(*tlbp, vma, | 923 | start = unmap_page_range(*tlbp, vma, |