diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 5 | ||||
-rw-r--r-- | fs/direct-io.c | 57 | ||||
-rw-r--r-- | fs/eventpoll.c | 234 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 3 | ||||
-rw-r--r-- | fs/nfs/internal.h | 2 | ||||
-rw-r--r-- | fs/nfs/write.c | 4 | ||||
-rw-r--r-- | fs/pipe.c | 2 | ||||
-rw-r--r-- | fs/proc/array.c | 7 | ||||
-rw-r--r-- | fs/proc/base.c | 2 |
10 files changed, 269 insertions, 50 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index afe74dda632b..0e575d1304b4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1139,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1139 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1139 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1140 | if (!bdev->bd_openers) { | 1140 | if (!bdev->bd_openers) { |
1141 | bdev->bd_disk = disk; | 1141 | bdev->bd_disk = disk; |
1142 | bdev->bd_queue = disk->queue; | ||
1142 | bdev->bd_contains = bdev; | 1143 | bdev->bd_contains = bdev; |
1143 | if (!partno) { | 1144 | if (!partno) { |
1144 | struct backing_dev_info *bdi; | 1145 | struct backing_dev_info *bdi; |
@@ -1159,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1159 | disk_put_part(bdev->bd_part); | 1160 | disk_put_part(bdev->bd_part); |
1160 | bdev->bd_part = NULL; | 1161 | bdev->bd_part = NULL; |
1161 | bdev->bd_disk = NULL; | 1162 | bdev->bd_disk = NULL; |
1163 | bdev->bd_queue = NULL; | ||
1162 | mutex_unlock(&bdev->bd_mutex); | 1164 | mutex_unlock(&bdev->bd_mutex); |
1163 | disk_unblock_events(disk); | 1165 | disk_unblock_events(disk); |
1164 | put_disk(disk); | 1166 | put_disk(disk); |
@@ -1232,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1232 | disk_put_part(bdev->bd_part); | 1234 | disk_put_part(bdev->bd_part); |
1233 | bdev->bd_disk = NULL; | 1235 | bdev->bd_disk = NULL; |
1234 | bdev->bd_part = NULL; | 1236 | bdev->bd_part = NULL; |
1237 | bdev->bd_queue = NULL; | ||
1235 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); | 1238 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); |
1236 | if (bdev != bdev->bd_contains) | 1239 | if (bdev != bdev->bd_contains) |
1237 | __blkdev_put(bdev->bd_contains, mode, 1); | 1240 | __blkdev_put(bdev->bd_contains, mode, 1); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f99a099a7747..d8525662ca7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
872 | 872 | ||
873 | #ifdef CONFIG_MIGRATION | 873 | #ifdef CONFIG_MIGRATION |
874 | static int btree_migratepage(struct address_space *mapping, | 874 | static int btree_migratepage(struct address_space *mapping, |
875 | struct page *newpage, struct page *page) | 875 | struct page *newpage, struct page *page, |
876 | enum migrate_mode mode) | ||
876 | { | 877 | { |
877 | /* | 878 | /* |
878 | * we can't safely write a btree page from here, | 879 | * we can't safely write a btree page from here, |
@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping, | |||
887 | if (page_has_private(page) && | 888 | if (page_has_private(page) && |
888 | !try_to_release_page(page, GFP_KERNEL)) | 889 | !try_to_release_page(page, GFP_KERNEL)) |
889 | return -EAGAIN; | 890 | return -EAGAIN; |
890 | return migrate_page(mapping, newpage, page); | 891 | return migrate_page(mapping, newpage, page, mode); |
891 | } | 892 | } |
892 | #endif | 893 | #endif |
893 | 894 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index d740ab67ff6e..4a588dbd11bf 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/rwsem.h> | 36 | #include <linux/rwsem.h> |
37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
38 | #include <linux/atomic.h> | 38 | #include <linux/atomic.h> |
39 | #include <linux/prefetch.h> | ||
39 | 40 | ||
40 | /* | 41 | /* |
41 | * How many user pages to map in one call to get_user_pages(). This determines | 42 | * How many user pages to map in one call to get_user_pages(). This determines |
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
580 | { | 581 | { |
581 | int ret; | 582 | int ret; |
582 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ | 583 | sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ |
584 | sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ | ||
583 | unsigned long fs_count; /* Number of filesystem-sized blocks */ | 585 | unsigned long fs_count; /* Number of filesystem-sized blocks */ |
584 | unsigned long dio_count;/* Number of dio_block-sized blocks */ | ||
585 | unsigned long blkmask; | ||
586 | int create; | 586 | int create; |
587 | 587 | ||
588 | /* | 588 | /* |
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
593 | if (ret == 0) { | 593 | if (ret == 0) { |
594 | BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); | 594 | BUG_ON(sdio->block_in_file >= sdio->final_block_in_request); |
595 | fs_startblk = sdio->block_in_file >> sdio->blkfactor; | 595 | fs_startblk = sdio->block_in_file >> sdio->blkfactor; |
596 | dio_count = sdio->final_block_in_request - sdio->block_in_file; | 596 | fs_endblk = (sdio->final_block_in_request - 1) >> |
597 | fs_count = dio_count >> sdio->blkfactor; | 597 | sdio->blkfactor; |
598 | blkmask = (1 << sdio->blkfactor) - 1; | 598 | fs_count = fs_endblk - fs_startblk + 1; |
599 | if (dio_count & blkmask) | ||
600 | fs_count++; | ||
601 | 599 | ||
602 | map_bh->b_state = 0; | 600 | map_bh->b_state = 0; |
603 | map_bh->b_size = fs_count << dio->inode->i_blkbits; | 601 | map_bh->b_size = fs_count << dio->inode->i_blkbits; |
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio) | |||
1090 | * individual fields and will generate much worse code. This is important | 1088 | * individual fields and will generate much worse code. This is important |
1091 | * for the whole file. | 1089 | * for the whole file. |
1092 | */ | 1090 | */ |
1093 | ssize_t | 1091 | static inline ssize_t |
1094 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1092 | do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1095 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1093 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1096 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1094 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1097 | dio_submit_t submit_io, int flags) | 1095 | dio_submit_t submit_io, int flags) |
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1100 | size_t size; | 1098 | size_t size; |
1101 | unsigned long addr; | 1099 | unsigned long addr; |
1102 | unsigned blkbits = inode->i_blkbits; | 1100 | unsigned blkbits = inode->i_blkbits; |
1103 | unsigned bdev_blkbits = 0; | ||
1104 | unsigned blocksize_mask = (1 << blkbits) - 1; | 1101 | unsigned blocksize_mask = (1 << blkbits) - 1; |
1105 | ssize_t retval = -EINVAL; | 1102 | ssize_t retval = -EINVAL; |
1106 | loff_t end = offset; | 1103 | loff_t end = offset; |
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1113 | if (rw & WRITE) | 1110 | if (rw & WRITE) |
1114 | rw = WRITE_ODIRECT; | 1111 | rw = WRITE_ODIRECT; |
1115 | 1112 | ||
1116 | if (bdev) | 1113 | /* |
1117 | bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); | 1114 | * Avoid references to bdev if not absolutely needed to give |
1115 | * the early prefetch in the caller enough time. | ||
1116 | */ | ||
1118 | 1117 | ||
1119 | if (offset & blocksize_mask) { | 1118 | if (offset & blocksize_mask) { |
1120 | if (bdev) | 1119 | if (bdev) |
1121 | blkbits = bdev_blkbits; | 1120 | blkbits = blksize_bits(bdev_logical_block_size(bdev)); |
1122 | blocksize_mask = (1 << blkbits) - 1; | 1121 | blocksize_mask = (1 << blkbits) - 1; |
1123 | if (offset & blocksize_mask) | 1122 | if (offset & blocksize_mask) |
1124 | goto out; | 1123 | goto out; |
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1129 | addr = (unsigned long)iov[seg].iov_base; | 1128 | addr = (unsigned long)iov[seg].iov_base; |
1130 | size = iov[seg].iov_len; | 1129 | size = iov[seg].iov_len; |
1131 | end += size; | 1130 | end += size; |
1132 | if ((addr & blocksize_mask) || (size & blocksize_mask)) { | 1131 | if (unlikely((addr & blocksize_mask) || |
1132 | (size & blocksize_mask))) { | ||
1133 | if (bdev) | 1133 | if (bdev) |
1134 | blkbits = bdev_blkbits; | 1134 | blkbits = blksize_bits( |
1135 | bdev_logical_block_size(bdev)); | ||
1135 | blocksize_mask = (1 << blkbits) - 1; | 1136 | blocksize_mask = (1 << blkbits) - 1; |
1136 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | 1137 | if ((addr & blocksize_mask) || (size & blocksize_mask)) |
1137 | goto out; | 1138 | goto out; |
1138 | } | 1139 | } |
1139 | } | 1140 | } |
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1316 | out: | 1317 | out: |
1317 | return retval; | 1318 | return retval; |
1318 | } | 1319 | } |
1320 | |||
1321 | ssize_t | ||
1322 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | ||
1323 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | ||
1324 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | ||
1325 | dio_submit_t submit_io, int flags) | ||
1326 | { | ||
1327 | /* | ||
1328 | * The block device state is needed in the end to finally | ||
1329 | * submit everything. Since it's likely to be cache cold | ||
1330 | * prefetch it here as first thing to hide some of the | ||
1331 | * latency. | ||
1332 | * | ||
1333 | * Attempt to prefetch the pieces we likely need later. | ||
1334 | */ | ||
1335 | prefetch(&bdev->bd_disk->part_tbl); | ||
1336 | prefetch(bdev->bd_queue); | ||
1337 | prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); | ||
1338 | |||
1339 | return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | ||
1340 | nr_segs, get_block, end_io, | ||
1341 | submit_io, flags); | ||
1342 | } | ||
1343 | |||
1319 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1344 | EXPORT_SYMBOL(__blockdev_direct_IO); |
1320 | 1345 | ||
1321 | static __init int dio_init(void) | 1346 | static __init int dio_init(void) |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 828e750af23a..aabdfc38cf24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -197,6 +197,12 @@ struct eventpoll { | |||
197 | 197 | ||
198 | /* The user that created the eventpoll descriptor */ | 198 | /* The user that created the eventpoll descriptor */ |
199 | struct user_struct *user; | 199 | struct user_struct *user; |
200 | |||
201 | struct file *file; | ||
202 | |||
203 | /* used to optimize loop detection check */ | ||
204 | int visited; | ||
205 | struct list_head visited_list_link; | ||
200 | }; | 206 | }; |
201 | 207 | ||
202 | /* Wait structure used by the poll hooks */ | 208 | /* Wait structure used by the poll hooks */ |
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly; | |||
255 | /* Slab cache used to allocate "struct eppoll_entry" */ | 261 | /* Slab cache used to allocate "struct eppoll_entry" */ |
256 | static struct kmem_cache *pwq_cache __read_mostly; | 262 | static struct kmem_cache *pwq_cache __read_mostly; |
257 | 263 | ||
264 | /* Visited nodes during ep_loop_check(), so we can unset them when we finish */ | ||
265 | static LIST_HEAD(visited_list); | ||
266 | |||
267 | /* | ||
268 | * List of files with newly added links, where we may need to limit the number | ||
269 | * of emanating paths. Protected by the epmutex. | ||
270 | */ | ||
271 | static LIST_HEAD(tfile_check_list); | ||
272 | |||
258 | #ifdef CONFIG_SYSCTL | 273 | #ifdef CONFIG_SYSCTL |
259 | 274 | ||
260 | #include <linux/sysctl.h> | 275 | #include <linux/sysctl.h> |
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = { | |||
276 | }; | 291 | }; |
277 | #endif /* CONFIG_SYSCTL */ | 292 | #endif /* CONFIG_SYSCTL */ |
278 | 293 | ||
294 | static const struct file_operations eventpoll_fops; | ||
295 | |||
296 | static inline int is_file_epoll(struct file *f) | ||
297 | { | ||
298 | return f->f_op == &eventpoll_fops; | ||
299 | } | ||
279 | 300 | ||
280 | /* Setup the structure that is used as key for the RB tree */ | 301 | /* Setup the structure that is used as key for the RB tree */ |
281 | static inline void ep_set_ffd(struct epoll_filefd *ffd, | 302 | static inline void ep_set_ffd(struct epoll_filefd *ffd, |
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = { | |||
711 | .llseek = noop_llseek, | 732 | .llseek = noop_llseek, |
712 | }; | 733 | }; |
713 | 734 | ||
714 | /* Fast test to see if the file is an eventpoll file */ | ||
715 | static inline int is_file_epoll(struct file *f) | ||
716 | { | ||
717 | return f->f_op == &eventpoll_fops; | ||
718 | } | ||
719 | |||
720 | /* | 735 | /* |
721 | * This is called from eventpoll_release() to unlink files from the eventpoll | 736 | * This is called from eventpoll_release() to unlink files from the eventpoll |
722 | * interface. We need to have this facility to cleanup correctly files that are | 737 | * interface. We need to have this facility to cleanup correctly files that are |
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) | |||
926 | rb_insert_color(&epi->rbn, &ep->rbr); | 941 | rb_insert_color(&epi->rbn, &ep->rbr); |
927 | } | 942 | } |
928 | 943 | ||
944 | |||
945 | |||
946 | #define PATH_ARR_SIZE 5 | ||
947 | /* | ||
948 | * These are the number paths of length 1 to 5, that we are allowing to emanate | ||
949 | * from a single file of interest. For example, we allow 1000 paths of length | ||
950 | * 1, to emanate from each file of interest. This essentially represents the | ||
951 | * potential wakeup paths, which need to be limited in order to avoid massive | ||
952 | * uncontrolled wakeup storms. The common use case should be a single ep which | ||
953 | * is connected to n file sources. In this case each file source has 1 path | ||
954 | * of length 1. Thus, the numbers below should be more than sufficient. These | ||
955 | * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify | ||
956 | * and delete can't add additional paths. Protected by the epmutex. | ||
957 | */ | ||
958 | static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; | ||
959 | static int path_count[PATH_ARR_SIZE]; | ||
960 | |||
961 | static int path_count_inc(int nests) | ||
962 | { | ||
963 | if (++path_count[nests] > path_limits[nests]) | ||
964 | return -1; | ||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | static void path_count_init(void) | ||
969 | { | ||
970 | int i; | ||
971 | |||
972 | for (i = 0; i < PATH_ARR_SIZE; i++) | ||
973 | path_count[i] = 0; | ||
974 | } | ||
975 | |||
976 | static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) | ||
977 | { | ||
978 | int error = 0; | ||
979 | struct file *file = priv; | ||
980 | struct file *child_file; | ||
981 | struct epitem *epi; | ||
982 | |||
983 | list_for_each_entry(epi, &file->f_ep_links, fllink) { | ||
984 | child_file = epi->ep->file; | ||
985 | if (is_file_epoll(child_file)) { | ||
986 | if (list_empty(&child_file->f_ep_links)) { | ||
987 | if (path_count_inc(call_nests)) { | ||
988 | error = -1; | ||
989 | break; | ||
990 | } | ||
991 | } else { | ||
992 | error = ep_call_nested(&poll_loop_ncalls, | ||
993 | EP_MAX_NESTS, | ||
994 | reverse_path_check_proc, | ||
995 | child_file, child_file, | ||
996 | current); | ||
997 | } | ||
998 | if (error != 0) | ||
999 | break; | ||
1000 | } else { | ||
1001 | printk(KERN_ERR "reverse_path_check_proc: " | ||
1002 | "file is not an ep!\n"); | ||
1003 | } | ||
1004 | } | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | /** | ||
1009 | * reverse_path_check - The tfile_check_list is list of file *, which have | ||
1010 | * links that are proposed to be newly added. We need to | ||
1011 | * make sure that those added links don't add too many | ||
1012 | * paths such that we will spend all our time waking up | ||
1013 | * eventpoll objects. | ||
1014 | * | ||
1015 | * Returns: Returns zero if the proposed links don't create too many paths, | ||
1016 | * -1 otherwise. | ||
1017 | */ | ||
1018 | static int reverse_path_check(void) | ||
1019 | { | ||
1020 | int length = 0; | ||
1021 | int error = 0; | ||
1022 | struct file *current_file; | ||
1023 | |||
1024 | /* let's call this for all tfiles */ | ||
1025 | list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { | ||
1026 | length++; | ||
1027 | path_count_init(); | ||
1028 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1029 | reverse_path_check_proc, current_file, | ||
1030 | current_file, current); | ||
1031 | if (error) | ||
1032 | break; | ||
1033 | } | ||
1034 | return error; | ||
1035 | } | ||
1036 | |||
929 | /* | 1037 | /* |
930 | * Must be called with "mtx" held. | 1038 | * Must be called with "mtx" held. |
931 | */ | 1039 | */ |
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
987 | */ | 1095 | */ |
988 | ep_rbtree_insert(ep, epi); | 1096 | ep_rbtree_insert(ep, epi); |
989 | 1097 | ||
1098 | /* now check if we've created too many backpaths */ | ||
1099 | error = -EINVAL; | ||
1100 | if (reverse_path_check()) | ||
1101 | goto error_remove_epi; | ||
1102 | |||
990 | /* We have to drop the new item inside our item list to keep track of it */ | 1103 | /* We have to drop the new item inside our item list to keep track of it */ |
991 | spin_lock_irqsave(&ep->lock, flags); | 1104 | spin_lock_irqsave(&ep->lock, flags); |
992 | 1105 | ||
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, | |||
1011 | 1124 | ||
1012 | return 0; | 1125 | return 0; |
1013 | 1126 | ||
1127 | error_remove_epi: | ||
1128 | spin_lock(&tfile->f_lock); | ||
1129 | if (ep_is_linked(&epi->fllink)) | ||
1130 | list_del_init(&epi->fllink); | ||
1131 | spin_unlock(&tfile->f_lock); | ||
1132 | |||
1133 | rb_erase(&epi->rbn, &ep->rbr); | ||
1134 | |||
1014 | error_unregister: | 1135 | error_unregister: |
1015 | ep_unregister_pollwait(ep, epi); | 1136 | ep_unregister_pollwait(ep, epi); |
1016 | 1137 | ||
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1275 | int error = 0; | 1396 | int error = 0; |
1276 | struct file *file = priv; | 1397 | struct file *file = priv; |
1277 | struct eventpoll *ep = file->private_data; | 1398 | struct eventpoll *ep = file->private_data; |
1399 | struct eventpoll *ep_tovisit; | ||
1278 | struct rb_node *rbp; | 1400 | struct rb_node *rbp; |
1279 | struct epitem *epi; | 1401 | struct epitem *epi; |
1280 | 1402 | ||
1281 | mutex_lock_nested(&ep->mtx, call_nests + 1); | 1403 | mutex_lock_nested(&ep->mtx, call_nests + 1); |
1404 | ep->visited = 1; | ||
1405 | list_add(&ep->visited_list_link, &visited_list); | ||
1282 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { | 1406 | for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { |
1283 | epi = rb_entry(rbp, struct epitem, rbn); | 1407 | epi = rb_entry(rbp, struct epitem, rbn); |
1284 | if (unlikely(is_file_epoll(epi->ffd.file))) { | 1408 | if (unlikely(is_file_epoll(epi->ffd.file))) { |
1409 | ep_tovisit = epi->ffd.file->private_data; | ||
1410 | if (ep_tovisit->visited) | ||
1411 | continue; | ||
1285 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1412 | error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, |
1286 | ep_loop_check_proc, epi->ffd.file, | 1413 | ep_loop_check_proc, epi->ffd.file, |
1287 | epi->ffd.file->private_data, current); | 1414 | ep_tovisit, current); |
1288 | if (error != 0) | 1415 | if (error != 0) |
1289 | break; | 1416 | break; |
1417 | } else { | ||
1418 | /* | ||
1419 | * If we've reached a file that is not associated with | ||
1420 | * an ep, then we need to check if the newly added | ||
1421 | * links are going to add too many wakeup paths. We do | ||
1422 | * this by adding it to the tfile_check_list, if it's | ||
1423 | * not already there, and calling reverse_path_check() | ||
1424 | * during ep_insert(). | ||
1425 | */ | ||
1426 | if (list_empty(&epi->ffd.file->f_tfile_llink)) | ||
1427 | list_add(&epi->ffd.file->f_tfile_llink, | ||
1428 | &tfile_check_list); | ||
1290 | } | 1429 | } |
1291 | } | 1430 | } |
1292 | mutex_unlock(&ep->mtx); | 1431 | mutex_unlock(&ep->mtx); |
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) | |||
1307 | */ | 1446 | */ |
1308 | static int ep_loop_check(struct eventpoll *ep, struct file *file) | 1447 | static int ep_loop_check(struct eventpoll *ep, struct file *file) |
1309 | { | 1448 | { |
1310 | return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | 1449 | int ret; |
1450 | struct eventpoll *ep_cur, *ep_next; | ||
1451 | |||
1452 | ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, | ||
1311 | ep_loop_check_proc, file, ep, current); | 1453 | ep_loop_check_proc, file, ep, current); |
1454 | /* clear visited list */ | ||
1455 | list_for_each_entry_safe(ep_cur, ep_next, &visited_list, | ||
1456 | visited_list_link) { | ||
1457 | ep_cur->visited = 0; | ||
1458 | list_del(&ep_cur->visited_list_link); | ||
1459 | } | ||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static void clear_tfile_check_list(void) | ||
1464 | { | ||
1465 | struct file *file; | ||
1466 | |||
1467 | /* first clear the tfile_check_list */ | ||
1468 | while (!list_empty(&tfile_check_list)) { | ||
1469 | file = list_first_entry(&tfile_check_list, struct file, | ||
1470 | f_tfile_llink); | ||
1471 | list_del_init(&file->f_tfile_llink); | ||
1472 | } | ||
1473 | INIT_LIST_HEAD(&tfile_check_list); | ||
1312 | } | 1474 | } |
1313 | 1475 | ||
1314 | /* | 1476 | /* |
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) | |||
1316 | */ | 1478 | */ |
1317 | SYSCALL_DEFINE1(epoll_create1, int, flags) | 1479 | SYSCALL_DEFINE1(epoll_create1, int, flags) |
1318 | { | 1480 | { |
1319 | int error; | 1481 | int error, fd; |
1320 | struct eventpoll *ep = NULL; | 1482 | struct eventpoll *ep = NULL; |
1483 | struct file *file; | ||
1321 | 1484 | ||
1322 | /* Check the EPOLL_* constant for consistency. */ | 1485 | /* Check the EPOLL_* constant for consistency. */ |
1323 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); | 1486 | BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); |
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) | |||
1334 | * Creates all the items needed to setup an eventpoll file. That is, | 1497 | * Creates all the items needed to setup an eventpoll file. That is, |
1335 | * a file structure and a free file descriptor. | 1498 | * a file structure and a free file descriptor. |
1336 | */ | 1499 | */ |
1337 | error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep, | 1500 | fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); |
1501 | if (fd < 0) { | ||
1502 | error = fd; | ||
1503 | goto out_free_ep; | ||
1504 | } | ||
1505 | file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, | ||
1338 | O_RDWR | (flags & O_CLOEXEC)); | 1506 | O_RDWR | (flags & O_CLOEXEC)); |
1339 | if (error < 0) | 1507 | if (IS_ERR(file)) { |
1340 | ep_free(ep); | 1508 | error = PTR_ERR(file); |
1341 | 1509 | goto out_free_fd; | |
1510 | } | ||
1511 | fd_install(fd, file); | ||
1512 | ep->file = file; | ||
1513 | return fd; | ||
1514 | |||
1515 | out_free_fd: | ||
1516 | put_unused_fd(fd); | ||
1517 | out_free_ep: | ||
1518 | ep_free(ep); | ||
1342 | return error; | 1519 | return error; |
1343 | } | 1520 | } |
1344 | 1521 | ||
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1404 | /* | 1581 | /* |
1405 | * When we insert an epoll file descriptor, inside another epoll file | 1582 | * When we insert an epoll file descriptor, inside another epoll file |
1406 | * descriptor, there is the change of creating closed loops, which are | 1583 | * descriptor, there is the change of creating closed loops, which are |
1407 | * better be handled here, than in more critical paths. | 1584 | * better be handled here, than in more critical paths. While we are |
1585 | * checking for loops we also determine the list of files reachable | ||
1586 | * and hang them on the tfile_check_list, so we can check that we | ||
1587 | * haven't created too many possible wakeup paths. | ||
1408 | * | 1588 | * |
1409 | * We hold epmutex across the loop check and the insert in this case, in | 1589 | * We need to hold the epmutex across both ep_insert and ep_remove |
1410 | * order to prevent two separate inserts from racing and each doing the | 1590 | * b/c we want to make sure we are looking at a coherent view of |
1411 | * insert "at the same time" such that ep_loop_check passes on both | 1591 | * epoll network. |
1412 | * before either one does the insert, thereby creating a cycle. | ||
1413 | */ | 1592 | */ |
1414 | if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) { | 1593 | if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { |
1415 | mutex_lock(&epmutex); | 1594 | mutex_lock(&epmutex); |
1416 | did_lock_epmutex = 1; | 1595 | did_lock_epmutex = 1; |
1417 | error = -ELOOP; | ||
1418 | if (ep_loop_check(ep, tfile) != 0) | ||
1419 | goto error_tgt_fput; | ||
1420 | } | 1596 | } |
1421 | 1597 | if (op == EPOLL_CTL_ADD) { | |
1598 | if (is_file_epoll(tfile)) { | ||
1599 | error = -ELOOP; | ||
1600 | if (ep_loop_check(ep, tfile) != 0) | ||
1601 | goto error_tgt_fput; | ||
1602 | } else | ||
1603 | list_add(&tfile->f_tfile_llink, &tfile_check_list); | ||
1604 | } | ||
1422 | 1605 | ||
1423 | mutex_lock_nested(&ep->mtx, 0); | 1606 | mutex_lock_nested(&ep->mtx, 0); |
1424 | 1607 | ||
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1437 | error = ep_insert(ep, &epds, tfile, fd); | 1620 | error = ep_insert(ep, &epds, tfile, fd); |
1438 | } else | 1621 | } else |
1439 | error = -EEXIST; | 1622 | error = -EEXIST; |
1623 | clear_tfile_check_list(); | ||
1440 | break; | 1624 | break; |
1441 | case EPOLL_CTL_DEL: | 1625 | case EPOLL_CTL_DEL: |
1442 | if (epi) | 1626 | if (epi) |
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1455 | mutex_unlock(&ep->mtx); | 1639 | mutex_unlock(&ep->mtx); |
1456 | 1640 | ||
1457 | error_tgt_fput: | 1641 | error_tgt_fput: |
1458 | if (unlikely(did_lock_epmutex)) | 1642 | if (did_lock_epmutex) |
1459 | mutex_unlock(&epmutex); | 1643 | mutex_unlock(&epmutex); |
1460 | 1644 | ||
1461 | fput(tfile); | 1645 | fput(tfile); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e425ad9d0490..1e85a7ac0217 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
583 | } | 583 | } |
584 | 584 | ||
585 | static int hugetlbfs_migrate_page(struct address_space *mapping, | 585 | static int hugetlbfs_migrate_page(struct address_space *mapping, |
586 | struct page *newpage, struct page *page) | 586 | struct page *newpage, struct page *page, |
587 | enum migrate_mode mode) | ||
587 | { | 588 | { |
588 | int rc; | 589 | int rc; |
589 | 590 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ee92538b063..8102db9b926c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data); | |||
332 | 332 | ||
333 | #ifdef CONFIG_MIGRATION | 333 | #ifdef CONFIG_MIGRATION |
334 | extern int nfs_migrate_page(struct address_space *, | 334 | extern int nfs_migrate_page(struct address_space *, |
335 | struct page *, struct page *); | 335 | struct page *, struct page *, enum migrate_mode); |
336 | #else | 336 | #else |
337 | #define nfs_migrate_page NULL | 337 | #define nfs_migrate_page NULL |
338 | #endif | 338 | #endif |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0c3885255f97..834f0fe96f89 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1688,7 +1688,7 @@ out_error: | |||
1688 | 1688 | ||
1689 | #ifdef CONFIG_MIGRATION | 1689 | #ifdef CONFIG_MIGRATION |
1690 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | 1690 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, |
1691 | struct page *page) | 1691 | struct page *page, enum migrate_mode mode) |
1692 | { | 1692 | { |
1693 | /* | 1693 | /* |
1694 | * If PagePrivate is set, then the page is currently associated with | 1694 | * If PagePrivate is set, then the page is currently associated with |
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | |||
1703 | 1703 | ||
1704 | nfs_fscache_release_page(page, GFP_KERNEL); | 1704 | nfs_fscache_release_page(page, GFP_KERNEL); |
1705 | 1705 | ||
1706 | return migrate_page(mapping, newpage, page); | 1706 | return migrate_page(mapping, newpage, page, mode); |
1707 | } | 1707 | } |
1708 | #endif | 1708 | #endif |
1709 | 1709 | ||
@@ -1137,7 +1137,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) | |||
1137 | if (nr_pages < pipe->nrbufs) | 1137 | if (nr_pages < pipe->nrbufs) |
1138 | return -EBUSY; | 1138 | return -EBUSY; |
1139 | 1139 | ||
1140 | bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); | 1140 | bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN); |
1141 | if (unlikely(!bufs)) | 1141 | if (unlikely(!bufs)) |
1142 | return -ENOMEM; | 1142 | return -ENOMEM; |
1143 | 1143 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 8c344f037bd0..9252ee3b71e3 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
464 | 464 | ||
465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ | 465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ |
466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", | 467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", |
468 | pid_nr_ns(pid, ns), | 468 | pid_nr_ns(pid, ns), |
469 | tcomm, | 469 | tcomm, |
470 | state, | 470 | state, |
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
511 | task->policy, | 511 | task->policy, |
512 | (unsigned long long)delayacct_blkio_ticks(task), | 512 | (unsigned long long)delayacct_blkio_ticks(task), |
513 | cputime_to_clock_t(gtime), | 513 | cputime_to_clock_t(gtime), |
514 | cputime_to_clock_t(cgtime)); | 514 | cputime_to_clock_t(cgtime), |
515 | (mm && permitted) ? mm->start_data : 0, | ||
516 | (mm && permitted) ? mm->end_data : 0, | ||
517 | (mm && permitted) ? mm->start_brk : 0); | ||
515 | if (mm) | 518 | if (mm) |
516 | mmput(mm); | 519 | mmput(mm); |
517 | return 0; | 520 | return 0; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 8173dfd89cb2..5485a5388ecb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -654,6 +654,8 @@ static int proc_pid_permission(struct inode *inode, int mask) | |||
654 | bool has_perms; | 654 | bool has_perms; |
655 | 655 | ||
656 | task = get_proc_task(inode); | 656 | task = get_proc_task(inode); |
657 | if (!task) | ||
658 | return -ESRCH; | ||
657 | has_perms = has_pid_permissions(pid, task, 1); | 659 | has_perms = has_pid_permissions(pid, task, 1); |
658 | put_task_struct(task); | 660 | put_task_struct(task); |
659 | 661 | ||