aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/direct-io.c57
-rw-r--r--fs/eventpoll.c234
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c2
10 files changed, 269 insertions, 50 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index afe74dda632b..0e575d1304b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1139,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1139 mutex_lock_nested(&bdev->bd_mutex, for_part); 1139 mutex_lock_nested(&bdev->bd_mutex, for_part);
1140 if (!bdev->bd_openers) { 1140 if (!bdev->bd_openers) {
1141 bdev->bd_disk = disk; 1141 bdev->bd_disk = disk;
1142 bdev->bd_queue = disk->queue;
1142 bdev->bd_contains = bdev; 1143 bdev->bd_contains = bdev;
1143 if (!partno) { 1144 if (!partno) {
1144 struct backing_dev_info *bdi; 1145 struct backing_dev_info *bdi;
@@ -1159,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1159 disk_put_part(bdev->bd_part); 1160 disk_put_part(bdev->bd_part);
1160 bdev->bd_part = NULL; 1161 bdev->bd_part = NULL;
1161 bdev->bd_disk = NULL; 1162 bdev->bd_disk = NULL;
1163 bdev->bd_queue = NULL;
1162 mutex_unlock(&bdev->bd_mutex); 1164 mutex_unlock(&bdev->bd_mutex);
1163 disk_unblock_events(disk); 1165 disk_unblock_events(disk);
1164 put_disk(disk); 1166 put_disk(disk);
@@ -1232,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1232 disk_put_part(bdev->bd_part); 1234 disk_put_part(bdev->bd_part);
1233 bdev->bd_disk = NULL; 1235 bdev->bd_disk = NULL;
1234 bdev->bd_part = NULL; 1236 bdev->bd_part = NULL;
1237 bdev->bd_queue = NULL;
1235 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); 1238 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1236 if (bdev != bdev->bd_contains) 1239 if (bdev != bdev->bd_contains)
1237 __blkdev_put(bdev->bd_contains, mode, 1); 1240 __blkdev_put(bdev->bd_contains, mode, 1);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f99a099a7747..d8525662ca7a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
872 872
873#ifdef CONFIG_MIGRATION 873#ifdef CONFIG_MIGRATION
874static int btree_migratepage(struct address_space *mapping, 874static int btree_migratepage(struct address_space *mapping,
875 struct page *newpage, struct page *page) 875 struct page *newpage, struct page *page,
876 enum migrate_mode mode)
876{ 877{
877 /* 878 /*
878 * we can't safely write a btree page from here, 879 * we can't safely write a btree page from here,
@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping,
887 if (page_has_private(page) && 888 if (page_has_private(page) &&
888 !try_to_release_page(page, GFP_KERNEL)) 889 !try_to_release_page(page, GFP_KERNEL))
889 return -EAGAIN; 890 return -EAGAIN;
890 return migrate_page(mapping, newpage, page); 891 return migrate_page(mapping, newpage, page, mode);
891} 892}
892#endif 893#endif
893 894
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d740ab67ff6e..4a588dbd11bf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -36,6 +36,7 @@
36#include <linux/rwsem.h> 36#include <linux/rwsem.h>
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h>
39 40
40/* 41/*
41 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
580{ 581{
581 int ret; 582 int ret;
582 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ 583 sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
584 sector_t fs_endblk; /* Into file, in filesystem-sized blocks */
583 unsigned long fs_count; /* Number of filesystem-sized blocks */ 585 unsigned long fs_count; /* Number of filesystem-sized blocks */
584 unsigned long dio_count;/* Number of dio_block-sized blocks */
585 unsigned long blkmask;
586 int create; 586 int create;
587 587
588 /* 588 /*
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
593 if (ret == 0) { 593 if (ret == 0) {
594 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); 594 BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
595 fs_startblk = sdio->block_in_file >> sdio->blkfactor; 595 fs_startblk = sdio->block_in_file >> sdio->blkfactor;
596 dio_count = sdio->final_block_in_request - sdio->block_in_file; 596 fs_endblk = (sdio->final_block_in_request - 1) >>
597 fs_count = dio_count >> sdio->blkfactor; 597 sdio->blkfactor;
598 blkmask = (1 << sdio->blkfactor) - 1; 598 fs_count = fs_endblk - fs_startblk + 1;
599 if (dio_count & blkmask)
600 fs_count++;
601 599
602 map_bh->b_state = 0; 600 map_bh->b_state = 0;
603 map_bh->b_size = fs_count << dio->inode->i_blkbits; 601 map_bh->b_size = fs_count << dio->inode->i_blkbits;
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
1090 * individual fields and will generate much worse code. This is important 1088 * individual fields and will generate much worse code. This is important
1091 * for the whole file. 1089 * for the whole file.
1092 */ 1090 */
1093ssize_t 1091static inline ssize_t
1094__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1092do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1095 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1093 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1096 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1094 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1097 dio_submit_t submit_io, int flags) 1095 dio_submit_t submit_io, int flags)
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1100 size_t size; 1098 size_t size;
1101 unsigned long addr; 1099 unsigned long addr;
1102 unsigned blkbits = inode->i_blkbits; 1100 unsigned blkbits = inode->i_blkbits;
1103 unsigned bdev_blkbits = 0;
1104 unsigned blocksize_mask = (1 << blkbits) - 1; 1101 unsigned blocksize_mask = (1 << blkbits) - 1;
1105 ssize_t retval = -EINVAL; 1102 ssize_t retval = -EINVAL;
1106 loff_t end = offset; 1103 loff_t end = offset;
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1113 if (rw & WRITE) 1110 if (rw & WRITE)
1114 rw = WRITE_ODIRECT; 1111 rw = WRITE_ODIRECT;
1115 1112
1116 if (bdev) 1113 /*
1117 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1114 * Avoid references to bdev if not absolutely needed to give
1115 * the early prefetch in the caller enough time.
1116 */
1118 1117
1119 if (offset & blocksize_mask) { 1118 if (offset & blocksize_mask) {
1120 if (bdev) 1119 if (bdev)
1121 blkbits = bdev_blkbits; 1120 blkbits = blksize_bits(bdev_logical_block_size(bdev));
1122 blocksize_mask = (1 << blkbits) - 1; 1121 blocksize_mask = (1 << blkbits) - 1;
1123 if (offset & blocksize_mask) 1122 if (offset & blocksize_mask)
1124 goto out; 1123 goto out;
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1129 addr = (unsigned long)iov[seg].iov_base; 1128 addr = (unsigned long)iov[seg].iov_base;
1130 size = iov[seg].iov_len; 1129 size = iov[seg].iov_len;
1131 end += size; 1130 end += size;
1132 if ((addr & blocksize_mask) || (size & blocksize_mask)) { 1131 if (unlikely((addr & blocksize_mask) ||
1132 (size & blocksize_mask))) {
1133 if (bdev) 1133 if (bdev)
1134 blkbits = bdev_blkbits; 1134 blkbits = blksize_bits(
1135 bdev_logical_block_size(bdev));
1135 blocksize_mask = (1 << blkbits) - 1; 1136 blocksize_mask = (1 << blkbits) - 1;
1136 if ((addr & blocksize_mask) || (size & blocksize_mask)) 1137 if ((addr & blocksize_mask) || (size & blocksize_mask))
1137 goto out; 1138 goto out;
1138 } 1139 }
1139 } 1140 }
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1316out: 1317out:
1317 return retval; 1318 return retval;
1318} 1319}
1320
1321ssize_t
1322__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1323 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1324 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1325 dio_submit_t submit_io, int flags)
1326{
1327 /*
1328 * The block device state is needed in the end to finally
1329 * submit everything. Since it's likely to be cache cold
1330 * prefetch it here as first thing to hide some of the
1331 * latency.
1332 *
1333 * Attempt to prefetch the pieces we likely need later.
1334 */
1335 prefetch(&bdev->bd_disk->part_tbl);
1336 prefetch(bdev->bd_queue);
1337 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1338
1339 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
1340 nr_segs, get_block, end_io,
1341 submit_io, flags);
1342}
1343
1319EXPORT_SYMBOL(__blockdev_direct_IO); 1344EXPORT_SYMBOL(__blockdev_direct_IO);
1320 1345
1321static __init int dio_init(void) 1346static __init int dio_init(void)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 828e750af23a..aabdfc38cf24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
197 197
198 /* The user that created the eventpoll descriptor */ 198 /* The user that created the eventpoll descriptor */
199 struct user_struct *user; 199 struct user_struct *user;
200
201 struct file *file;
202
203 /* used to optimize loop detection check */
204 int visited;
205 struct list_head visited_list_link;
200}; 206};
201 207
202/* Wait structure used by the poll hooks */ 208/* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
255/* Slab cache used to allocate "struct eppoll_entry" */ 261/* Slab cache used to allocate "struct eppoll_entry" */
256static struct kmem_cache *pwq_cache __read_mostly; 262static struct kmem_cache *pwq_cache __read_mostly;
257 263
264/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
265static LIST_HEAD(visited_list);
266
267/*
268 * List of files with newly added links, where we may need to limit the number
269 * of emanating paths. Protected by the epmutex.
270 */
271static LIST_HEAD(tfile_check_list);
272
258#ifdef CONFIG_SYSCTL 273#ifdef CONFIG_SYSCTL
259 274
260#include <linux/sysctl.h> 275#include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
276}; 291};
277#endif /* CONFIG_SYSCTL */ 292#endif /* CONFIG_SYSCTL */
278 293
294static const struct file_operations eventpoll_fops;
295
296static inline int is_file_epoll(struct file *f)
297{
298 return f->f_op == &eventpoll_fops;
299}
279 300
280/* Setup the structure that is used as key for the RB tree */ 301/* Setup the structure that is used as key for the RB tree */
281static inline void ep_set_ffd(struct epoll_filefd *ffd, 302static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = {
711 .llseek = noop_llseek, 732 .llseek = noop_llseek,
712}; 733};
713 734
714/* Fast test to see if the file is an eventpoll file */
715static inline int is_file_epoll(struct file *f)
716{
717 return f->f_op == &eventpoll_fops;
718}
719
720/* 735/*
721 * This is called from eventpoll_release() to unlink files from the eventpoll 736 * This is called from eventpoll_release() to unlink files from the eventpoll
722 * interface. We need to have this facility to cleanup correctly files that are 737 * interface. We need to have this facility to cleanup correctly files that are
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
926 rb_insert_color(&epi->rbn, &ep->rbr); 941 rb_insert_color(&epi->rbn, &ep->rbr);
927} 942}
928 943
944
945
946#define PATH_ARR_SIZE 5
947/*
948 * These are the number paths of length 1 to 5, that we are allowing to emanate
949 * from a single file of interest. For example, we allow 1000 paths of length
950 * 1, to emanate from each file of interest. This essentially represents the
951 * potential wakeup paths, which need to be limited in order to avoid massive
952 * uncontrolled wakeup storms. The common use case should be a single ep which
953 * is connected to n file sources. In this case each file source has 1 path
954 * of length 1. Thus, the numbers below should be more than sufficient. These
955 * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
956 * and delete can't add additional paths. Protected by the epmutex.
957 */
958static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
959static int path_count[PATH_ARR_SIZE];
960
961static int path_count_inc(int nests)
962{
963 if (++path_count[nests] > path_limits[nests])
964 return -1;
965 return 0;
966}
967
968static void path_count_init(void)
969{
970 int i;
971
972 for (i = 0; i < PATH_ARR_SIZE; i++)
973 path_count[i] = 0;
974}
975
976static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
977{
978 int error = 0;
979 struct file *file = priv;
980 struct file *child_file;
981 struct epitem *epi;
982
983 list_for_each_entry(epi, &file->f_ep_links, fllink) {
984 child_file = epi->ep->file;
985 if (is_file_epoll(child_file)) {
986 if (list_empty(&child_file->f_ep_links)) {
987 if (path_count_inc(call_nests)) {
988 error = -1;
989 break;
990 }
991 } else {
992 error = ep_call_nested(&poll_loop_ncalls,
993 EP_MAX_NESTS,
994 reverse_path_check_proc,
995 child_file, child_file,
996 current);
997 }
998 if (error != 0)
999 break;
1000 } else {
1001 printk(KERN_ERR "reverse_path_check_proc: "
1002 "file is not an ep!\n");
1003 }
1004 }
1005 return error;
1006}
1007
1008/**
1009 * reverse_path_check - The tfile_check_list is list of file *, which have
1010 * links that are proposed to be newly added. We need to
1011 * make sure that those added links don't add too many
1012 * paths such that we will spend all our time waking up
1013 * eventpoll objects.
1014 *
1015 * Returns: Returns zero if the proposed links don't create too many paths,
1016 * -1 otherwise.
1017 */
1018static int reverse_path_check(void)
1019{
1020 int length = 0;
1021 int error = 0;
1022 struct file *current_file;
1023
1024 /* let's call this for all tfiles */
1025 list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
1026 length++;
1027 path_count_init();
1028 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1029 reverse_path_check_proc, current_file,
1030 current_file, current);
1031 if (error)
1032 break;
1033 }
1034 return error;
1035}
1036
929/* 1037/*
930 * Must be called with "mtx" held. 1038 * Must be called with "mtx" held.
931 */ 1039 */
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
987 */ 1095 */
988 ep_rbtree_insert(ep, epi); 1096 ep_rbtree_insert(ep, epi);
989 1097
1098 /* now check if we've created too many backpaths */
1099 error = -EINVAL;
1100 if (reverse_path_check())
1101 goto error_remove_epi;
1102
990 /* We have to drop the new item inside our item list to keep track of it */ 1103 /* We have to drop the new item inside our item list to keep track of it */
991 spin_lock_irqsave(&ep->lock, flags); 1104 spin_lock_irqsave(&ep->lock, flags);
992 1105
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
1011 1124
1012 return 0; 1125 return 0;
1013 1126
1127error_remove_epi:
1128 spin_lock(&tfile->f_lock);
1129 if (ep_is_linked(&epi->fllink))
1130 list_del_init(&epi->fllink);
1131 spin_unlock(&tfile->f_lock);
1132
1133 rb_erase(&epi->rbn, &ep->rbr);
1134
1014error_unregister: 1135error_unregister:
1015 ep_unregister_pollwait(ep, epi); 1136 ep_unregister_pollwait(ep, epi);
1016 1137
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1275 int error = 0; 1396 int error = 0;
1276 struct file *file = priv; 1397 struct file *file = priv;
1277 struct eventpoll *ep = file->private_data; 1398 struct eventpoll *ep = file->private_data;
1399 struct eventpoll *ep_tovisit;
1278 struct rb_node *rbp; 1400 struct rb_node *rbp;
1279 struct epitem *epi; 1401 struct epitem *epi;
1280 1402
1281 mutex_lock_nested(&ep->mtx, call_nests + 1); 1403 mutex_lock_nested(&ep->mtx, call_nests + 1);
1404 ep->visited = 1;
1405 list_add(&ep->visited_list_link, &visited_list);
1282 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1406 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1283 epi = rb_entry(rbp, struct epitem, rbn); 1407 epi = rb_entry(rbp, struct epitem, rbn);
1284 if (unlikely(is_file_epoll(epi->ffd.file))) { 1408 if (unlikely(is_file_epoll(epi->ffd.file))) {
1409 ep_tovisit = epi->ffd.file->private_data;
1410 if (ep_tovisit->visited)
1411 continue;
1285 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1412 error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1286 ep_loop_check_proc, epi->ffd.file, 1413 ep_loop_check_proc, epi->ffd.file,
1287 epi->ffd.file->private_data, current); 1414 ep_tovisit, current);
1288 if (error != 0) 1415 if (error != 0)
1289 break; 1416 break;
1417 } else {
1418 /*
1419 * If we've reached a file that is not associated with
1420 * an ep, then we need to check if the newly added
1421 * links are going to add too many wakeup paths. We do
1422 * this by adding it to the tfile_check_list, if it's
1423 * not already there, and calling reverse_path_check()
1424 * during ep_insert().
1425 */
1426 if (list_empty(&epi->ffd.file->f_tfile_llink))
1427 list_add(&epi->ffd.file->f_tfile_llink,
1428 &tfile_check_list);
1290 } 1429 }
1291 } 1430 }
1292 mutex_unlock(&ep->mtx); 1431 mutex_unlock(&ep->mtx);
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1307 */ 1446 */
1308static int ep_loop_check(struct eventpoll *ep, struct file *file) 1447static int ep_loop_check(struct eventpoll *ep, struct file *file)
1309{ 1448{
1310 return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, 1449 int ret;
1450 struct eventpoll *ep_cur, *ep_next;
1451
1452 ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
1311 ep_loop_check_proc, file, ep, current); 1453 ep_loop_check_proc, file, ep, current);
1454 /* clear visited list */
1455 list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
1456 visited_list_link) {
1457 ep_cur->visited = 0;
1458 list_del(&ep_cur->visited_list_link);
1459 }
1460 return ret;
1461}
1462
1463static void clear_tfile_check_list(void)
1464{
1465 struct file *file;
1466
1467 /* first clear the tfile_check_list */
1468 while (!list_empty(&tfile_check_list)) {
1469 file = list_first_entry(&tfile_check_list, struct file,
1470 f_tfile_llink);
1471 list_del_init(&file->f_tfile_llink);
1472 }
1473 INIT_LIST_HEAD(&tfile_check_list);
1312} 1474}
1313 1475
1314/* 1476/*
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
1316 */ 1478 */
1317SYSCALL_DEFINE1(epoll_create1, int, flags) 1479SYSCALL_DEFINE1(epoll_create1, int, flags)
1318{ 1480{
1319 int error; 1481 int error, fd;
1320 struct eventpoll *ep = NULL; 1482 struct eventpoll *ep = NULL;
1483 struct file *file;
1321 1484
1322 /* Check the EPOLL_* constant for consistency. */ 1485 /* Check the EPOLL_* constant for consistency. */
1323 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); 1486 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1334 * Creates all the items needed to setup an eventpoll file. That is, 1497 * Creates all the items needed to setup an eventpoll file. That is,
1335 * a file structure and a free file descriptor. 1498 * a file structure and a free file descriptor.
1336 */ 1499 */
1337 error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, 1500 fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
1501 if (fd < 0) {
1502 error = fd;
1503 goto out_free_ep;
1504 }
1505 file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
1338 O_RDWR | (flags & O_CLOEXEC)); 1506 O_RDWR | (flags & O_CLOEXEC));
1339 if (error < 0) 1507 if (IS_ERR(file)) {
1340 ep_free(ep); 1508 error = PTR_ERR(file);
1341 1509 goto out_free_fd;
1510 }
1511 fd_install(fd, file);
1512 ep->file = file;
1513 return fd;
1514
1515out_free_fd:
1516 put_unused_fd(fd);
1517out_free_ep:
1518 ep_free(ep);
1342 return error; 1519 return error;
1343} 1520}
1344 1521
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1404 /* 1581 /*
1405 * When we insert an epoll file descriptor, inside another epoll file 1582 * When we insert an epoll file descriptor, inside another epoll file
1406 * descriptor, there is the change of creating closed loops, which are 1583 * descriptor, there is the change of creating closed loops, which are
1407 * better be handled here, than in more critical paths. 1584 * better be handled here, than in more critical paths. While we are
1585 * checking for loops we also determine the list of files reachable
1586 * and hang them on the tfile_check_list, so we can check that we
1587 * haven't created too many possible wakeup paths.
1408 * 1588 *
1409 * We hold epmutex across the loop check and the insert in this case, in 1589 * We need to hold the epmutex across both ep_insert and ep_remove
1410 * order to prevent two separate inserts from racing and each doing the 1590 * b/c we want to make sure we are looking at a coherent view of
1411 * insert "at the same time" such that ep_loop_check passes on both 1591 * epoll network.
1412 * before either one does the insert, thereby creating a cycle.
1413 */ 1592 */
1414 if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { 1593 if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
1415 mutex_lock(&epmutex); 1594 mutex_lock(&epmutex);
1416 did_lock_epmutex = 1; 1595 did_lock_epmutex = 1;
1417 error = -ELOOP;
1418 if (ep_loop_check(ep, tfile) != 0)
1419 goto error_tgt_fput;
1420 } 1596 }
1421 1597 if (op == EPOLL_CTL_ADD) {
1598 if (is_file_epoll(tfile)) {
1599 error = -ELOOP;
1600 if (ep_loop_check(ep, tfile) != 0)
1601 goto error_tgt_fput;
1602 } else
1603 list_add(&tfile->f_tfile_llink, &tfile_check_list);
1604 }
1422 1605
1423 mutex_lock_nested(&ep->mtx, 0); 1606 mutex_lock_nested(&ep->mtx, 0);
1424 1607
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1437 error = ep_insert(ep, &epds, tfile, fd); 1620 error = ep_insert(ep, &epds, tfile, fd);
1438 } else 1621 } else
1439 error = -EEXIST; 1622 error = -EEXIST;
1623 clear_tfile_check_list();
1440 break; 1624 break;
1441 case EPOLL_CTL_DEL: 1625 case EPOLL_CTL_DEL:
1442 if (epi) 1626 if (epi)
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1455 mutex_unlock(&ep->mtx); 1639 mutex_unlock(&ep->mtx);
1456 1640
1457error_tgt_fput: 1641error_tgt_fput:
1458 if (unlikely(did_lock_epmutex)) 1642 if (did_lock_epmutex)
1459 mutex_unlock(&epmutex); 1643 mutex_unlock(&epmutex);
1460 1644
1461 fput(tfile); 1645 fput(tfile);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e425ad9d0490..1e85a7ac0217 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
583} 583}
584 584
585static int hugetlbfs_migrate_page(struct address_space *mapping, 585static int hugetlbfs_migrate_page(struct address_space *mapping,
586 struct page *newpage, struct page *page) 586 struct page *newpage, struct page *page,
587 enum migrate_mode mode)
587{ 588{
588 int rc; 589 int rc;
589 590
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ee92538b063..8102db9b926c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
332 332
333#ifdef CONFIG_MIGRATION 333#ifdef CONFIG_MIGRATION
334extern int nfs_migrate_page(struct address_space *, 334extern int nfs_migrate_page(struct address_space *,
335 struct page *, struct page *); 335 struct page *, struct page *, enum migrate_mode);
336#else 336#else
337#define nfs_migrate_page NULL 337#define nfs_migrate_page NULL
338#endif 338#endif
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0c3885255f97..834f0fe96f89 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1688,7 +1688,7 @@ out_error:
1688 1688
1689#ifdef CONFIG_MIGRATION 1689#ifdef CONFIG_MIGRATION
1690int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1690int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1691 struct page *page) 1691 struct page *page, enum migrate_mode mode)
1692{ 1692{
1693 /* 1693 /*
1694 * If PagePrivate is set, then the page is currently associated with 1694 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1703 1703
1704 nfs_fscache_release_page(page, GFP_KERNEL); 1704 nfs_fscache_release_page(page, GFP_KERNEL);
1705 1705
1706 return migrate_page(mapping, newpage, page); 1706 return migrate_page(mapping, newpage, page, mode);
1707} 1707}
1708#endif 1708#endif
1709 1709
diff --git a/fs/pipe.c b/fs/pipe.c
index f0e485d54e64..a932ced92a16 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1137,7 +1137,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1137 if (nr_pages < pipe->nrbufs) 1137 if (nr_pages < pipe->nrbufs)
1138 return -EBUSY; 1138 return -EBUSY;
1139 1139
1140 bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); 1140 bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
1141 if (unlikely(!bufs)) 1141 if (unlikely(!bufs))
1142 return -ENOMEM; 1142 return -ENOMEM;
1143 1143
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 8c344f037bd0..9252ee3b71e3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
464 464
465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ 465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", 467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
468 pid_nr_ns(pid, ns), 468 pid_nr_ns(pid, ns),
469 tcomm, 469 tcomm,
470 state, 470 state,
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
511 task->policy, 511 task->policy,
512 (unsigned long long)delayacct_blkio_ticks(task), 512 (unsigned long long)delayacct_blkio_ticks(task),
513 cputime_to_clock_t(gtime), 513 cputime_to_clock_t(gtime),
514 cputime_to_clock_t(cgtime)); 514 cputime_to_clock_t(cgtime),
515 (mm && permitted) ? mm->start_data : 0,
516 (mm && permitted) ? mm->end_data : 0,
517 (mm && permitted) ? mm->start_brk : 0);
515 if (mm) 518 if (mm)
516 mmput(mm); 519 mmput(mm);
517 return 0; 520 return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8173dfd89cb2..5485a5388ecb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -654,6 +654,8 @@ static int proc_pid_permission(struct inode *inode, int mask)
654 bool has_perms; 654 bool has_perms;
655 655
656 task = get_proc_task(inode); 656 task = get_proc_task(inode);
657 if (!task)
658 return -ESRCH;
657 has_perms = has_pid_permissions(pid, task, 1); 659 has_perms = has_pid_permissions(pid, task, 1);
658 put_task_struct(task); 660 put_task_struct(task);
659 661