aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c54
-rw-r--r--include/linux/hugetlb.h14
-rw-r--r--mm/hugetlb.c135
3 files changed, 139 insertions, 64 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4fbd9fccd550..7913e3252167 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -626,9 +626,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
626 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
627 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
628 * blocks, like simple_statfs() */ 628 * blocks, like simple_statfs() */
629 if (sbinfo->max_blocks >= 0) { 629 if (sbinfo->spool) {
630 buf->f_blocks = sbinfo->max_blocks; 630 long free_pages;
631 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 631
632 spin_lock(&sbinfo->spool->lock);
633 buf->f_blocks = sbinfo->spool->max_hpages;
634 free_pages = sbinfo->spool->max_hpages
635 - sbinfo->spool->used_hpages;
636 buf->f_bavail = buf->f_bfree = free_pages;
637 spin_unlock(&sbinfo->spool->lock);
632 buf->f_files = sbinfo->max_inodes; 638 buf->f_files = sbinfo->max_inodes;
633 buf->f_ffree = sbinfo->free_inodes; 639 buf->f_ffree = sbinfo->free_inodes;
634 } 640 }
@@ -644,6 +650,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
644 650
645 if (sbi) { 651 if (sbi) {
646 sb->s_fs_info = NULL; 652 sb->s_fs_info = NULL;
653
654 if (sbi->spool)
655 hugepage_put_subpool(sbi->spool);
656
647 kfree(sbi); 657 kfree(sbi);
648 } 658 }
649} 659}
@@ -874,10 +884,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
874 sb->s_fs_info = sbinfo; 884 sb->s_fs_info = sbinfo;
875 sbinfo->hstate = config.hstate; 885 sbinfo->hstate = config.hstate;
876 spin_lock_init(&sbinfo->stat_lock); 886 spin_lock_init(&sbinfo->stat_lock);
877 sbinfo->max_blocks = config.nr_blocks;
878 sbinfo->free_blocks = config.nr_blocks;
879 sbinfo->max_inodes = config.nr_inodes; 887 sbinfo->max_inodes = config.nr_inodes;
880 sbinfo->free_inodes = config.nr_inodes; 888 sbinfo->free_inodes = config.nr_inodes;
889 sbinfo->spool = NULL;
890 if (config.nr_blocks != -1) {
891 sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
892 if (!sbinfo->spool)
893 goto out_free;
894 }
881 sb->s_maxbytes = MAX_LFS_FILESIZE; 895 sb->s_maxbytes = MAX_LFS_FILESIZE;
882 sb->s_blocksize = huge_page_size(config.hstate); 896 sb->s_blocksize = huge_page_size(config.hstate);
883 sb->s_blocksize_bits = huge_page_shift(config.hstate); 897 sb->s_blocksize_bits = huge_page_shift(config.hstate);
@@ -896,38 +910,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
896 sb->s_root = root; 910 sb->s_root = root;
897 return 0; 911 return 0;
898out_free: 912out_free:
913 if (sbinfo->spool)
914 kfree(sbinfo->spool);
899 kfree(sbinfo); 915 kfree(sbinfo);
900 return -ENOMEM; 916 return -ENOMEM;
901} 917}
902 918
903int hugetlb_get_quota(struct address_space *mapping, long delta)
904{
905 int ret = 0;
906 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
907
908 if (sbinfo->free_blocks > -1) {
909 spin_lock(&sbinfo->stat_lock);
910 if (sbinfo->free_blocks - delta >= 0)
911 sbinfo->free_blocks -= delta;
912 else
913 ret = -ENOMEM;
914 spin_unlock(&sbinfo->stat_lock);
915 }
916
917 return ret;
918}
919
920void hugetlb_put_quota(struct address_space *mapping, long delta)
921{
922 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
923
924 if (sbinfo->free_blocks > -1) {
925 spin_lock(&sbinfo->stat_lock);
926 sbinfo->free_blocks += delta;
927 spin_unlock(&sbinfo->stat_lock);
928 }
929}
930
931static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, 919static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
932 int flags, const char *dev_name, void *data) 920 int flags, const char *dev_name, void *data)
933{ 921{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7adc4923e7ac..cf0181738c9e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,15 @@ struct user_struct;
14#include <linux/shm.h> 14#include <linux/shm.h>
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16 16
17struct hugepage_subpool {
18 spinlock_t lock;
19 long count;
20 long max_hpages, used_hpages;
21};
22
23struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
24void hugepage_put_subpool(struct hugepage_subpool *spool);
25
17int PageHuge(struct page *page); 26int PageHuge(struct page *page);
18 27
19void reset_vma_resv_huge_pages(struct vm_area_struct *vma); 28void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
@@ -129,12 +138,11 @@ enum {
129 138
130#ifdef CONFIG_HUGETLBFS 139#ifdef CONFIG_HUGETLBFS
131struct hugetlbfs_sb_info { 140struct hugetlbfs_sb_info {
132 long max_blocks; /* blocks allowed */
133 long free_blocks; /* blocks free */
134 long max_inodes; /* inodes allowed */ 141 long max_inodes; /* inodes allowed */
135 long free_inodes; /* inodes free */ 142 long free_inodes; /* inodes free */
136 spinlock_t stat_lock; 143 spinlock_t stat_lock;
137 struct hstate *hstate; 144 struct hstate *hstate;
145 struct hugepage_subpool *spool;
138}; 146};
139 147
140static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 148static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
@@ -146,8 +154,6 @@ extern const struct file_operations hugetlbfs_file_operations;
146extern const struct vm_operations_struct hugetlb_vm_ops; 154extern const struct vm_operations_struct hugetlb_vm_ops;
147struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 155struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
148 struct user_struct **user, int creat_flags); 156 struct user_struct **user, int creat_flags);
149int hugetlb_get_quota(struct address_space *mapping, long delta);
150void hugetlb_put_quota(struct address_space *mapping, long delta);
151 157
152static inline int is_file_hugepages(struct file *file) 158static inline int is_file_hugepages(struct file *file)
153{ 159{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b1c314877334..afa057a1d3fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -53,6 +53,84 @@ static unsigned long __initdata default_hstate_size;
53 */ 53 */
54static DEFINE_SPINLOCK(hugetlb_lock); 54static DEFINE_SPINLOCK(hugetlb_lock);
55 55
56static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
57{
58 bool free = (spool->count == 0) && (spool->used_hpages == 0);
59
60 spin_unlock(&spool->lock);
61
62 /* If no pages are used, and no other handles to the subpool
63 * remain, free the subpool the subpool remain */
64 if (free)
65 kfree(spool);
66}
67
68struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
69{
70 struct hugepage_subpool *spool;
71
72 spool = kmalloc(sizeof(*spool), GFP_KERNEL);
73 if (!spool)
74 return NULL;
75
76 spin_lock_init(&spool->lock);
77 spool->count = 1;
78 spool->max_hpages = nr_blocks;
79 spool->used_hpages = 0;
80
81 return spool;
82}
83
84void hugepage_put_subpool(struct hugepage_subpool *spool)
85{
86 spin_lock(&spool->lock);
87 BUG_ON(!spool->count);
88 spool->count--;
89 unlock_or_release_subpool(spool);
90}
91
92static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
93 long delta)
94{
95 int ret = 0;
96
97 if (!spool)
98 return 0;
99
100 spin_lock(&spool->lock);
101 if ((spool->used_hpages + delta) <= spool->max_hpages) {
102 spool->used_hpages += delta;
103 } else {
104 ret = -ENOMEM;
105 }
106 spin_unlock(&spool->lock);
107
108 return ret;
109}
110
111static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
112 long delta)
113{
114 if (!spool)
115 return;
116
117 spin_lock(&spool->lock);
118 spool->used_hpages -= delta;
119 /* If hugetlbfs_put_super couldn't free spool due to
120 * an outstanding quota reference, free it now. */
121 unlock_or_release_subpool(spool);
122}
123
124static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
125{
126 return HUGETLBFS_SB(inode->i_sb)->spool;
127}
128
129static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
130{
131 return subpool_inode(vma->vm_file->f_dentry->d_inode);
132}
133
56/* 134/*
57 * Region tracking -- allows tracking of reservations and instantiated pages 135 * Region tracking -- allows tracking of reservations and instantiated pages
58 * across the pages in a mapping. 136 * across the pages in a mapping.
@@ -540,9 +618,9 @@ static void free_huge_page(struct page *page)
540 */ 618 */
541 struct hstate *h = page_hstate(page); 619 struct hstate *h = page_hstate(page);
542 int nid = page_to_nid(page); 620 int nid = page_to_nid(page);
543 struct address_space *mapping; 621 struct hugepage_subpool *spool =
622 (struct hugepage_subpool *)page_private(page);
544 623
545 mapping = (struct address_space *) page_private(page);
546 set_page_private(page, 0); 624 set_page_private(page, 0);
547 page->mapping = NULL; 625 page->mapping = NULL;
548 BUG_ON(page_count(page)); 626 BUG_ON(page_count(page));
@@ -558,8 +636,7 @@ static void free_huge_page(struct page *page)
558 enqueue_huge_page(h, page); 636 enqueue_huge_page(h, page);
559 } 637 }
560 spin_unlock(&hugetlb_lock); 638 spin_unlock(&hugetlb_lock);
561 if (mapping) 639 hugepage_subpool_put_pages(spool, 1);
562 hugetlb_put_quota(mapping, 1);
563} 640}
564 641
565static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) 642static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -977,11 +1054,12 @@ static void return_unused_surplus_pages(struct hstate *h,
977/* 1054/*
978 * Determine if the huge page at addr within the vma has an associated 1055 * Determine if the huge page at addr within the vma has an associated
979 * reservation. Where it does not we will need to logically increase 1056 * reservation. Where it does not we will need to logically increase
980 * reservation and actually increase quota before an allocation can occur. 1057 * reservation and actually increase subpool usage before an allocation
981 * Where any new reservation would be required the reservation change is 1058 * can occur. Where any new reservation would be required the
982 * prepared, but not committed. Once the page has been quota'd allocated 1059 * reservation change is prepared, but not committed. Once the page
983 * an instantiated the change should be committed via vma_commit_reservation. 1060 * has been allocated from the subpool and instantiated the change should
984 * No action is required on failure. 1061 * be committed via vma_commit_reservation. No action is required on
1062 * failure.
985 */ 1063 */
986static long vma_needs_reservation(struct hstate *h, 1064static long vma_needs_reservation(struct hstate *h,
987 struct vm_area_struct *vma, unsigned long addr) 1065 struct vm_area_struct *vma, unsigned long addr)
@@ -1030,24 +1108,24 @@ static void vma_commit_reservation(struct hstate *h,
1030static struct page *alloc_huge_page(struct vm_area_struct *vma, 1108static struct page *alloc_huge_page(struct vm_area_struct *vma,
1031 unsigned long addr, int avoid_reserve) 1109 unsigned long addr, int avoid_reserve)
1032{ 1110{
1111 struct hugepage_subpool *spool = subpool_vma(vma);
1033 struct hstate *h = hstate_vma(vma); 1112 struct hstate *h = hstate_vma(vma);
1034 struct page *page; 1113 struct page *page;
1035 struct address_space *mapping = vma->vm_file->f_mapping;
1036 struct inode *inode = mapping->host;
1037 long chg; 1114 long chg;
1038 1115
1039 /* 1116 /*
1040 * Processes that did not create the mapping will have no reserves and 1117 * Processes that did not create the mapping will have no
1041 * will not have accounted against quota. Check that the quota can be 1118 * reserves and will not have accounted against subpool
1042 * made before satisfying the allocation 1119 * limit. Check that the subpool limit can be made before
1043 * MAP_NORESERVE mappings may also need pages and quota allocated 1120 * satisfying the allocation MAP_NORESERVE mappings may also
1044 * if no reserve mapping overlaps. 1121 * need pages and subpool limit allocated allocated if no reserve
1122 * mapping overlaps.
1045 */ 1123 */
1046 chg = vma_needs_reservation(h, vma, addr); 1124 chg = vma_needs_reservation(h, vma, addr);
1047 if (chg < 0) 1125 if (chg < 0)
1048 return ERR_PTR(-VM_FAULT_OOM); 1126 return ERR_PTR(-VM_FAULT_OOM);
1049 if (chg) 1127 if (chg)
1050 if (hugetlb_get_quota(inode->i_mapping, chg)) 1128 if (hugepage_subpool_get_pages(spool, chg))
1051 return ERR_PTR(-VM_FAULT_SIGBUS); 1129 return ERR_PTR(-VM_FAULT_SIGBUS);
1052 1130
1053 spin_lock(&hugetlb_lock); 1131 spin_lock(&hugetlb_lock);
@@ -1057,12 +1135,12 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
1057 if (!page) { 1135 if (!page) {
1058 page = alloc_buddy_huge_page(h, NUMA_NO_NODE); 1136 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
1059 if (!page) { 1137 if (!page) {
1060 hugetlb_put_quota(inode->i_mapping, chg); 1138 hugepage_subpool_put_pages(spool, chg);
1061 return ERR_PTR(-VM_FAULT_SIGBUS); 1139 return ERR_PTR(-VM_FAULT_SIGBUS);
1062 } 1140 }
1063 } 1141 }
1064 1142
1065 set_page_private(page, (unsigned long) mapping); 1143 set_page_private(page, (unsigned long)spool);
1066 1144
1067 vma_commit_reservation(h, vma, addr); 1145 vma_commit_reservation(h, vma, addr);
1068 1146
@@ -2083,6 +2161,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2083{ 2161{
2084 struct hstate *h = hstate_vma(vma); 2162 struct hstate *h = hstate_vma(vma);
2085 struct resv_map *reservations = vma_resv_map(vma); 2163 struct resv_map *reservations = vma_resv_map(vma);
2164 struct hugepage_subpool *spool = subpool_vma(vma);
2086 unsigned long reserve; 2165 unsigned long reserve;
2087 unsigned long start; 2166 unsigned long start;
2088 unsigned long end; 2167 unsigned long end;
@@ -2098,7 +2177,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
2098 2177
2099 if (reserve) { 2178 if (reserve) {
2100 hugetlb_acct_memory(h, -reserve); 2179 hugetlb_acct_memory(h, -reserve);
2101 hugetlb_put_quota(vma->vm_file->f_mapping, reserve); 2180 hugepage_subpool_put_pages(spool, reserve);
2102 } 2181 }
2103 } 2182 }
2104} 2183}
@@ -2331,7 +2410,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2331 */ 2410 */
2332 address = address & huge_page_mask(h); 2411 address = address & huge_page_mask(h);
2333 pgoff = vma_hugecache_offset(h, vma, address); 2412 pgoff = vma_hugecache_offset(h, vma, address);
2334 mapping = (struct address_space *)page_private(page); 2413 mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
2335 2414
2336 /* 2415 /*
2337 * Take the mapping lock for the duration of the table walk. As 2416 * Take the mapping lock for the duration of the table walk. As
@@ -2884,11 +2963,12 @@ int hugetlb_reserve_pages(struct inode *inode,
2884{ 2963{
2885 long ret, chg; 2964 long ret, chg;
2886 struct hstate *h = hstate_inode(inode); 2965 struct hstate *h = hstate_inode(inode);
2966 struct hugepage_subpool *spool = subpool_inode(inode);
2887 2967
2888 /* 2968 /*
2889 * Only apply hugepage reservation if asked. At fault time, an 2969 * Only apply hugepage reservation if asked. At fault time, an
2890 * attempt will be made for VM_NORESERVE to allocate a page 2970 * attempt will be made for VM_NORESERVE to allocate a page
2891 * and filesystem quota without using reserves 2971 * without using reserves
2892 */ 2972 */
2893 if (vm_flags & VM_NORESERVE) 2973 if (vm_flags & VM_NORESERVE)
2894 return 0; 2974 return 0;
@@ -2915,17 +2995,17 @@ int hugetlb_reserve_pages(struct inode *inode,
2915 if (chg < 0) 2995 if (chg < 0)
2916 return chg; 2996 return chg;
2917 2997
2918 /* There must be enough filesystem quota for the mapping */ 2998 /* There must be enough pages in the subpool for the mapping */
2919 if (hugetlb_get_quota(inode->i_mapping, chg)) 2999 if (hugepage_subpool_get_pages(spool, chg))
2920 return -ENOSPC; 3000 return -ENOSPC;
2921 3001
2922 /* 3002 /*
2923 * Check enough hugepages are available for the reservation. 3003 * Check enough hugepages are available for the reservation.
2924 * Hand back the quota if there are not 3004 * Hand the pages back to the subpool if there are not
2925 */ 3005 */
2926 ret = hugetlb_acct_memory(h, chg); 3006 ret = hugetlb_acct_memory(h, chg);
2927 if (ret < 0) { 3007 if (ret < 0) {
2928 hugetlb_put_quota(inode->i_mapping, chg); 3008 hugepage_subpool_put_pages(spool, chg);
2929 return ret; 3009 return ret;
2930 } 3010 }
2931 3011
@@ -2949,12 +3029,13 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
2949{ 3029{
2950 struct hstate *h = hstate_inode(inode); 3030 struct hstate *h = hstate_inode(inode);
2951 long chg = region_truncate(&inode->i_mapping->private_list, offset); 3031 long chg = region_truncate(&inode->i_mapping->private_list, offset);
3032 struct hugepage_subpool *spool = subpool_inode(inode);
2952 3033
2953 spin_lock(&inode->i_lock); 3034 spin_lock(&inode->i_lock);
2954 inode->i_blocks -= (blocks_per_huge_page(h) * freed); 3035 inode->i_blocks -= (blocks_per_huge_page(h) * freed);
2955 spin_unlock(&inode->i_lock); 3036 spin_unlock(&inode->i_lock);
2956 3037
2957 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 3038 hugepage_subpool_put_pages(spool, (chg - freed));
2958 hugetlb_acct_memory(h, -(chg - freed)); 3039 hugetlb_acct_memory(h, -(chg - freed));
2959} 3040}
2960 3041