aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric B Munson <ebmunson@us.ibm.com>2009-09-21 20:03:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:41 -0400
commit6bfde05bf5c9682e255c6a2c669dc80f91af6296 (patch)
tree3ff62bb87fc12c1ce808a54f789c42c35be7c049
parentf8dbf0a7a4c5d98e8b70da9f7f4f6a89f3b7a7bb (diff)
hugetlbfs: allow the creation of files suitable for MAP_PRIVATE on the vfs internal mount
This patchset adds a flag to mmap that allows the user to request that an anonymous mapping be backed with huge pages. This mapping will borrow functionality from the huge page shm code to create a file on the kernel internal mount and use it to approximate an anonymous mapping. The MAP_HUGETLB flag is a modifier to MAP_ANONYMOUS and will not work without both flags being preset. A new flag is necessary because there is no other way to hook into huge pages without creating a file on a hugetlbfs mount which wouldn't be MAP_ANONYMOUS. To userspace, this mapping will behave just like an anonymous mapping because the file is not accessible outside of the kernel. This patchset is meant to simplify the programming model. Presently there is a large chunk of boiler platecode, contained in libhugetlbfs, required to create private, hugepage backed mappings. This patch set would allow use of hugepages without linking to libhugetlbfs or having hugetblfs mounted. Unification of the VM code would provide these same benefits, but it has been resisted each time that it has been suggested for several reasons: it would break PAGE_SIZE assumptions across the kernel, it makes page-table abstractions really expensive, and it does not provide any benefit on architectures that do not support huge pages, incurring fast path penalties without providing any benefit on these architectures. This patch: There are two means of creating mappings backed by huge pages: 1. mmap() a file created on hugetlbfs 2. Use shm which creates a file on an internal mount which essentially maps it MAP_SHARED The internal mount is only used for shared mappings but there is very little that stops it being used for private mappings. This patch extends hugetlbfs_file_setup() to deal with the creation of files that will be mapped MAP_PRIVATE on the internal hugetlbfs mount. This extended API is used in a subsequent patch to implement the MAP_HUGETLB mmap() flag. Signed-off-by: Eric Munson <ebmunson@us.ibm.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Adam Litke <agl@us.ibm.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/hugetlbfs/inode.c21
-rw-r--r--include/linux/hugetlb.h12
-rw-r--r--ipc/shm.c2
3 files changed, 28 insertions, 7 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885311d8..06b7c2623f99 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 507 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
508 INIT_LIST_HEAD(&inode->i_mapping->private_list); 508 INIT_LIST_HEAD(&inode->i_mapping->private_list);
509 info = HUGETLBFS_I(inode); 509 info = HUGETLBFS_I(inode);
510 /*
511 * The policy is initialized here even if we are creating a
512 * private inode because initialization simply creates an
513 * an empty rb tree and calls spin_lock_init(), later when we
514 * call mpol_free_shared_policy() it will just return because
515 * the rb tree will still be empty.
516 */
510 mpol_shared_policy_init(&info->policy, NULL); 517 mpol_shared_policy_init(&info->policy, NULL);
511 switch (mode & S_IFMT) { 518 switch (mode & S_IFMT) {
512 default: 519 default:
@@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {
931 938
932static struct vfsmount *hugetlbfs_vfsmount; 939static struct vfsmount *hugetlbfs_vfsmount;
933 940
934static int can_do_hugetlb_shm(void) 941static int can_do_hugetlb_shm(int creat_flags)
935{ 942{
936 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 943 if (creat_flags != HUGETLB_SHMFS_INODE)
944 return 0;
945 if (capable(CAP_IPC_LOCK))
946 return 1;
947 if (in_group_p(sysctl_hugetlb_shm_group))
948 return 1;
949 return 0;
937} 950}
938 951
939struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 952struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
940 struct user_struct **user) 953 struct user_struct **user, int creat_flags)
941{ 954{
942 int error = -ENOMEM; 955 int error = -ENOMEM;
943 struct file *file; 956 struct file *file;
@@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
949 if (!hugetlbfs_vfsmount) 962 if (!hugetlbfs_vfsmount)
950 return ERR_PTR(-ENOENT); 963 return ERR_PTR(-ENOENT);
951 964
952 if (!can_do_hugetlb_shm()) { 965 if (!can_do_hugetlb_shm(creat_flags)) {
953 *user = current_user(); 966 *user = current_user();
954 if (user_shm_lock(size, *user)) { 967 if (user_shm_lock(size, *user)) {
955 WARN_ONCE(1, 968 WARN_ONCE(1,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e7f0fabfa1c2..f6505ad86657 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,6 +112,14 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
112 112
113#endif /* !CONFIG_HUGETLB_PAGE */ 113#endif /* !CONFIG_HUGETLB_PAGE */
114 114
115enum {
116 /*
117 * The file will be used as an shm file so shmfs accounting rules
118 * apply
119 */
120 HUGETLB_SHMFS_INODE = 1,
121};
122
115#ifdef CONFIG_HUGETLBFS 123#ifdef CONFIG_HUGETLBFS
116struct hugetlbfs_config { 124struct hugetlbfs_config {
117 uid_t uid; 125 uid_t uid;
@@ -150,7 +158,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
150extern const struct file_operations hugetlbfs_file_operations; 158extern const struct file_operations hugetlbfs_file_operations;
151extern struct vm_operations_struct hugetlb_vm_ops; 159extern struct vm_operations_struct hugetlb_vm_ops;
152struct file *hugetlb_file_setup(const char *name, size_t size, int acct, 160struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
153 struct user_struct **user); 161 struct user_struct **user, int creat_flags);
154int hugetlb_get_quota(struct address_space *mapping, long delta); 162int hugetlb_get_quota(struct address_space *mapping, long delta);
155void hugetlb_put_quota(struct address_space *mapping, long delta); 163void hugetlb_put_quota(struct address_space *mapping, long delta);
156 164
@@ -172,7 +180,7 @@ static inline void set_file_hugepages(struct file *file)
172 180
173#define is_file_hugepages(file) 0 181#define is_file_hugepages(file) 0
174#define set_file_hugepages(file) BUG() 182#define set_file_hugepages(file) BUG()
175#define hugetlb_file_setup(name,size,acct,user) ERR_PTR(-ENOSYS) 183#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS)
176 184
177#endif /* !CONFIG_HUGETLBFS */ 185#endif /* !CONFIG_HUGETLBFS */
178 186
diff --git a/ipc/shm.c b/ipc/shm.c
index 30162a59621a..9eb1488b543b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
370 if (shmflg & SHM_NORESERVE) 370 if (shmflg & SHM_NORESERVE)
371 acctflag = VM_NORESERVE; 371 acctflag = VM_NORESERVE;
372 file = hugetlb_file_setup(name, size, acctflag, 372 file = hugetlb_file_setup(name, size, acctflag,
373 &shp->mlock_user); 373 &shp->mlock_user, HUGETLB_SHMFS_INODE);
374 } else { 374 } else {
375 /* 375 /*
376 * Do not allow no accounting for OVERCOMMIT_NEVER, even 376 * Do not allow no accounting for OVERCOMMIT_NEVER, even