aboutsummaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
authorMike Kravetz <mike.kravetz@oracle.com>2017-02-22 18:43:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-22 19:41:28 -0500
commitcab350afcbc9c8a744e0d164d1c26560568f770b (patch)
tree57eb4021a19e2cd735454a85de03f271ebcb826d /fs/userfaultfd.c
parent1a1aad8a9b7bd34f60cdf98cd7915f00ae892c45 (diff)
userfaultfd: hugetlbfs: allow registration of ranges containing huge pages
Expand the userfaultfd_register/unregister routines to allow VM_HUGETLB vmas. huge page alignment checking is performed after a VM_HUGETLB vma is encountered. Also, since there is no UFFDIO_ZEROPAGE support for huge pages do not return that as a valid ioctl method for huge page ranges. Link: http://lkml.kernel.org/r/20161216144821.5183-22-aarcange@redhat.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Michael Rapoport <RAPOPORT@il.ibm.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c55
1 files changed, 50 insertions, 5 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 26e1ef00b63c..5139d05f80e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -27,6 +27,7 @@
27#include <linux/mempolicy.h> 27#include <linux/mempolicy.h>
28#include <linux/ioctl.h> 28#include <linux/ioctl.h>
29#include <linux/security.h> 29#include <linux/security.h>
30#include <linux/hugetlb.h>
30 31
31static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; 32static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
32 33
@@ -1058,6 +1059,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1058 struct uffdio_register __user *user_uffdio_register; 1059 struct uffdio_register __user *user_uffdio_register;
1059 unsigned long vm_flags, new_flags; 1060 unsigned long vm_flags, new_flags;
1060 bool found; 1061 bool found;
1062 bool huge_pages;
1061 unsigned long start, end, vma_end; 1063 unsigned long start, end, vma_end;
1062 1064
1063 user_uffdio_register = (struct uffdio_register __user *) arg; 1065 user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1109,6 +1111,17 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1109 goto out_unlock; 1111 goto out_unlock;
1110 1112
1111 /* 1113 /*
1114 * If the first vma contains huge pages, make sure start address
1115 * is aligned to huge page size.
1116 */
1117 if (is_vm_hugetlb_page(vma)) {
1118 unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
1119
1120 if (start & (vma_hpagesize - 1))
1121 goto out_unlock;
1122 }
1123
1124 /*
1112 * Search for not compatible vmas. 1125 * Search for not compatible vmas.
1113 * 1126 *
1114 * FIXME: this shall be relaxed later so that it doesn't fail 1127 * FIXME: this shall be relaxed later so that it doesn't fail
@@ -1116,6 +1129,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1116 * on anonymous vmas). 1129 * on anonymous vmas).
1117 */ 1130 */
1118 found = false; 1131 found = false;
1132 huge_pages = false;
1119 for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { 1133 for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
1120 cond_resched(); 1134 cond_resched();
1121 1135
@@ -1124,8 +1138,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1124 1138
1125 /* check not compatible vmas */ 1139 /* check not compatible vmas */
1126 ret = -EINVAL; 1140 ret = -EINVAL;
1127 if (!vma_is_anonymous(cur)) 1141 if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
1128 goto out_unlock; 1142 goto out_unlock;
1143 /*
1144 * If this vma contains ending address, and huge pages
1145 * check alignment.
1146 */
1147 if (is_vm_hugetlb_page(cur) && end <= cur->vm_end &&
1148 end > cur->vm_start) {
1149 unsigned long vma_hpagesize = vma_kernel_pagesize(cur);
1150
1151 ret = -EINVAL;
1152
1153 if (end & (vma_hpagesize - 1))
1154 goto out_unlock;
1155 }
1129 1156
1130 /* 1157 /*
1131 * Check that this vma isn't already owned by a 1158 * Check that this vma isn't already owned by a
@@ -1138,6 +1165,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1138 cur->vm_userfaultfd_ctx.ctx != ctx) 1165 cur->vm_userfaultfd_ctx.ctx != ctx)
1139 goto out_unlock; 1166 goto out_unlock;
1140 1167
1168 /*
1169 * Note vmas containing huge pages
1170 */
1171 if (is_vm_hugetlb_page(cur))
1172 huge_pages = true;
1173
1141 found = true; 1174 found = true;
1142 } 1175 }
1143 BUG_ON(!found); 1176 BUG_ON(!found);
@@ -1149,7 +1182,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
1149 do { 1182 do {
1150 cond_resched(); 1183 cond_resched();
1151 1184
1152 BUG_ON(!vma_is_anonymous(vma)); 1185 BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
1153 BUG_ON(vma->vm_userfaultfd_ctx.ctx && 1186 BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
1154 vma->vm_userfaultfd_ctx.ctx != ctx); 1187 vma->vm_userfaultfd_ctx.ctx != ctx);
1155 1188
@@ -1207,7 +1240,8 @@ out_unlock:
1207 * userland which ioctls methods are guaranteed to 1240 * userland which ioctls methods are guaranteed to
1208 * succeed on this range. 1241 * succeed on this range.
1209 */ 1242 */
1210 if (put_user(UFFD_API_RANGE_IOCTLS, 1243 if (put_user(huge_pages ? UFFD_API_RANGE_IOCTLS_HPAGE :
1244 UFFD_API_RANGE_IOCTLS,
1211 &user_uffdio_register->ioctls)) 1245 &user_uffdio_register->ioctls))
1212 ret = -EFAULT; 1246 ret = -EFAULT;
1213 } 1247 }
@@ -1254,6 +1288,17 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
1254 goto out_unlock; 1288 goto out_unlock;
1255 1289
1256 /* 1290 /*
1291 * If the first vma contains huge pages, make sure start address
1292 * is aligned to huge page size.
1293 */
1294 if (is_vm_hugetlb_page(vma)) {
1295 unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
1296
1297 if (start & (vma_hpagesize - 1))
1298 goto out_unlock;
1299 }
1300
1301 /*
1257 * Search for not compatible vmas. 1302 * Search for not compatible vmas.
1258 * 1303 *
1259 * FIXME: this shall be relaxed later so that it doesn't fail 1304 * FIXME: this shall be relaxed later so that it doesn't fail
@@ -1275,7 +1320,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
1275 * provides for more strict behavior to notice 1320 * provides for more strict behavior to notice
1276 * unregistration errors. 1321 * unregistration errors.
1277 */ 1322 */
1278 if (!vma_is_anonymous(cur)) 1323 if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
1279 goto out_unlock; 1324 goto out_unlock;
1280 1325
1281 found = true; 1326 found = true;
@@ -1289,7 +1334,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
1289 do { 1334 do {
1290 cond_resched(); 1335 cond_resched();
1291 1336
1292 BUG_ON(!vma_is_anonymous(vma)); 1337 BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
1293 1338
1294 /* 1339 /*
1295 * Nothing to do: this vma is already registered into this 1340 * Nothing to do: this vma is already registered into this