diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-04 18:37:44 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-06-04 18:37:44 -0400 |
| commit | d2dd328b7f7bc6cebe167648289337755944ad2a (patch) | |
| tree | 5d664a2db1ac209f7537452ddc02597972f7aa37 /fs | |
| parent | c1518f12bab97a6d409a25aaccb02dc8895800f3 (diff) | |
| parent | 1abec4fdbb142e3ccb6ce99832fae42129134a96 (diff) | |
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits)
block: make blk_init_free_list and elevator_init idempotent
block: avoid unconditionally freeing previously allocated request_queue
pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface
pipe: change the privilege required for growing a pipe beyond system max
pipe: adjust minimum pipe size to 1 page
block: disable preemption before using sched_clock()
cciss: call BUG() earlier
Preparing 8.3.8rc2
drbd: Reduce verbosity
drbd: use drbd specific ratelimit instead of global printk_ratelimit
drbd: fix hang on local read errors while disconnected
drbd: Removed the now empty w_io_error() function
drbd: removed duplicated #includes
drbd: improve usage of MSG_MORE
drbd: need to set socket bufsize early to take effect
drbd: improve network latency, TCP_QUICKACK
drbd: Revert "drbd: Create new current UUID as late as possible"
brd: support discard
Revert "writeback: fix WB_SYNC_NONE writeback from umount"
Revert "writeback: ensure that WB_SYNC_NONE writeback with sb pinned is sync"
...
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/fs-writeback.c | 64 | ||||
| -rw-r--r-- | fs/pipe.c | 77 | ||||
| -rw-r--r-- | fs/splice.c | 2 | ||||
| -rw-r--r-- | fs/sync.c | 2 |
4 files changed, 71 insertions, 74 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ea8592b9069..1d1088f48bc 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -45,7 +45,6 @@ struct wb_writeback_args { | |||
| 45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
| 46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
| 47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
| 48 | unsigned int sb_pinned:1; | ||
| 49 | }; | 48 | }; |
| 50 | 49 | ||
| 51 | /* | 50 | /* |
| @@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) | |||
| 193 | } | 192 | } |
| 194 | 193 | ||
| 195 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | 194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, |
| 196 | struct wb_writeback_args *args, | 195 | struct wb_writeback_args *args) |
| 197 | int wait) | ||
| 198 | { | 196 | { |
| 199 | struct bdi_work *work; | 197 | struct bdi_work *work; |
| 200 | 198 | ||
| @@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | |||
| 206 | if (work) { | 204 | if (work) { |
| 207 | bdi_work_init(work, args); | 205 | bdi_work_init(work, args); |
| 208 | bdi_queue_work(bdi, work); | 206 | bdi_queue_work(bdi, work); |
| 209 | if (wait) | ||
| 210 | bdi_wait_on_work_clear(work); | ||
| 211 | } else { | 207 | } else { |
| 212 | struct bdi_writeback *wb = &bdi->wb; | 208 | struct bdi_writeback *wb = &bdi->wb; |
| 213 | 209 | ||
| @@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
| 234 | .sync_mode = WB_SYNC_ALL, | 230 | .sync_mode = WB_SYNC_ALL, |
| 235 | .nr_pages = LONG_MAX, | 231 | .nr_pages = LONG_MAX, |
| 236 | .range_cyclic = 0, | 232 | .range_cyclic = 0, |
| 237 | /* | ||
| 238 | * Setting sb_pinned is not necessary for WB_SYNC_ALL, but | ||
| 239 | * lets make it explicitly clear. | ||
| 240 | */ | ||
| 241 | .sb_pinned = 1, | ||
| 242 | }; | 233 | }; |
| 243 | struct bdi_work work; | 234 | struct bdi_work work; |
| 244 | 235 | ||
| @@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, | |||
| 254 | * @bdi: the backing device to write from | 245 | * @bdi: the backing device to write from |
| 255 | * @sb: write inodes from this super_block | 246 | * @sb: write inodes from this super_block |
| 256 | * @nr_pages: the number of pages to write | 247 | * @nr_pages: the number of pages to write |
| 257 | * @sb_locked: caller already holds sb umount sem. | ||
| 258 | * | 248 | * |
| 259 | * Description: | 249 | * Description: |
| 260 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
| 261 | * started when this function returns, we make no guarentees on | 251 | * started when this function returns, we make no guarentees on |
| 262 | * completion. Caller specifies whether sb umount sem is held already or not. | 252 | * completion. Caller need not hold sb s_umount semaphore. |
| 263 | * | 253 | * |
| 264 | */ | 254 | */ |
| 265 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, |
| 266 | long nr_pages, int sb_locked) | 256 | long nr_pages) |
| 267 | { | 257 | { |
| 268 | struct wb_writeback_args args = { | 258 | struct wb_writeback_args args = { |
| 269 | .sb = sb, | 259 | .sb = sb, |
| 270 | .sync_mode = WB_SYNC_NONE, | 260 | .sync_mode = WB_SYNC_NONE, |
| 271 | .nr_pages = nr_pages, | 261 | .nr_pages = nr_pages, |
| 272 | .range_cyclic = 1, | 262 | .range_cyclic = 1, |
| 273 | .sb_pinned = sb_locked, | ||
| 274 | }; | 263 | }; |
| 275 | 264 | ||
| 276 | /* | 265 | /* |
| @@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | |||
| 282 | args.for_background = 1; | 271 | args.for_background = 1; |
| 283 | } | 272 | } |
| 284 | 273 | ||
| 285 | bdi_alloc_queue_work(bdi, &args, sb_locked); | 274 | bdi_alloc_queue_work(bdi, &args); |
| 286 | } | 275 | } |
| 287 | 276 | ||
| 288 | /* | 277 | /* |
| @@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | |||
| 595 | /* | 584 | /* |
| 596 | * Caller must already hold the ref for this | 585 | * Caller must already hold the ref for this |
| 597 | */ | 586 | */ |
| 598 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { | 587 | if (wbc->sync_mode == WB_SYNC_ALL) { |
| 599 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 588 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
| 600 | return SB_NOT_PINNED; | 589 | return SB_NOT_PINNED; |
| 601 | } | 590 | } |
| @@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
| 769 | .for_kupdate = args->for_kupdate, | 758 | .for_kupdate = args->for_kupdate, |
| 770 | .for_background = args->for_background, | 759 | .for_background = args->for_background, |
| 771 | .range_cyclic = args->range_cyclic, | 760 | .range_cyclic = args->range_cyclic, |
| 772 | .sb_pinned = args->sb_pinned, | ||
| 773 | }; | 761 | }; |
| 774 | unsigned long oldest_jif; | 762 | unsigned long oldest_jif; |
| 775 | long wrote = 0; | 763 | long wrote = 0; |
| @@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 912 | 900 | ||
| 913 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 901 | while ((work = get_next_work_item(bdi, wb)) != NULL) { |
| 914 | struct wb_writeback_args args = work->args; | 902 | struct wb_writeback_args args = work->args; |
| 915 | int post_clear; | ||
| 916 | 903 | ||
| 917 | /* | 904 | /* |
| 918 | * Override sync mode, in case we must wait for completion | 905 | * Override sync mode, in case we must wait for completion |
| @@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 920 | if (force_wait) | 907 | if (force_wait) |
| 921 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 908 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; |
| 922 | 909 | ||
| 923 | post_clear = WB_SYNC_ALL || args.sb_pinned; | ||
| 924 | |||
| 925 | /* | 910 | /* |
| 926 | * If this isn't a data integrity operation, just notify | 911 | * If this isn't a data integrity operation, just notify |
| 927 | * that we have seen this work and we are now starting it. | 912 | * that we have seen this work and we are now starting it. |
| 928 | */ | 913 | */ |
| 929 | if (!post_clear) | 914 | if (args.sync_mode == WB_SYNC_NONE) |
| 930 | wb_clear_pending(wb, work); | 915 | wb_clear_pending(wb, work); |
| 931 | 916 | ||
| 932 | wrote += wb_writeback(wb, &args); | 917 | wrote += wb_writeback(wb, &args); |
| @@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
| 935 | * This is a data integrity writeback, so only do the | 920 | * This is a data integrity writeback, so only do the |
| 936 | * notification when we have completed the work. | 921 | * notification when we have completed the work. |
| 937 | */ | 922 | */ |
| 938 | if (post_clear) | 923 | if (args.sync_mode == WB_SYNC_ALL) |
| 939 | wb_clear_pending(wb, work); | 924 | wb_clear_pending(wb, work); |
| 940 | } | 925 | } |
| 941 | 926 | ||
| @@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) | |||
| 1011 | if (!bdi_has_dirty_io(bdi)) | 996 | if (!bdi_has_dirty_io(bdi)) |
| 1012 | continue; | 997 | continue; |
| 1013 | 998 | ||
| 1014 | bdi_alloc_queue_work(bdi, &args, 0); | 999 | bdi_alloc_queue_work(bdi, &args); |
| 1015 | } | 1000 | } |
| 1016 | 1001 | ||
| 1017 | rcu_read_unlock(); | 1002 | rcu_read_unlock(); |
| @@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb) | |||
| 1220 | iput(old_inode); | 1205 | iput(old_inode); |
| 1221 | } | 1206 | } |
| 1222 | 1207 | ||
| 1223 | static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | ||
| 1224 | { | ||
| 1225 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
| 1226 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
| 1227 | long nr_to_write; | ||
| 1228 | |||
| 1229 | nr_to_write = nr_dirty + nr_unstable + | ||
| 1230 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
| 1231 | |||
| 1232 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | /** | 1208 | /** |
| 1236 | * writeback_inodes_sb - writeback dirty inodes from given super_block | 1209 | * writeback_inodes_sb - writeback dirty inodes from given super_block |
| 1237 | * @sb: the superblock | 1210 | * @sb: the superblock |
| @@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) | |||
| 1243 | */ | 1216 | */ |
| 1244 | void writeback_inodes_sb(struct super_block *sb) | 1217 | void writeback_inodes_sb(struct super_block *sb) |
| 1245 | { | 1218 | { |
| 1246 | __writeback_inodes_sb(sb, 0); | 1219 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
| 1247 | } | 1220 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
| 1248 | EXPORT_SYMBOL(writeback_inodes_sb); | 1221 | long nr_to_write; |
| 1249 | 1222 | ||
| 1250 | /** | 1223 | nr_to_write = nr_dirty + nr_unstable + |
| 1251 | * writeback_inodes_sb_locked - writeback dirty inodes from given super_block | 1224 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
| 1252 | * @sb: the superblock | 1225 | |
| 1253 | * | 1226 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); |
| 1254 | * Like writeback_inodes_sb(), except the caller already holds the | ||
| 1255 | * sb umount sem. | ||
| 1256 | */ | ||
| 1257 | void writeback_inodes_sb_locked(struct super_block *sb) | ||
| 1258 | { | ||
| 1259 | __writeback_inodes_sb(sb, 1); | ||
| 1260 | } | 1227 | } |
| 1228 | EXPORT_SYMBOL(writeback_inodes_sb); | ||
| 1261 | 1229 | ||
| 1262 | /** | 1230 | /** |
| 1263 | * writeback_inodes_sb_if_idle - start writeback if none underway | 1231 | * writeback_inodes_sb_if_idle - start writeback if none underway |
| @@ -26,9 +26,14 @@ | |||
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * The max size that a non-root user is allowed to grow the pipe. Can | 28 | * The max size that a non-root user is allowed to grow the pipe. Can |
| 29 | * be set by root in /proc/sys/fs/pipe-max-pages | 29 | * be set by root in /proc/sys/fs/pipe-max-size |
| 30 | */ | 30 | */ |
| 31 | unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; | 31 | unsigned int pipe_max_size = 1048576; |
| 32 | |||
| 33 | /* | ||
| 34 | * Minimum pipe size, as required by POSIX | ||
| 35 | */ | ||
| 36 | unsigned int pipe_min_size = PAGE_SIZE; | ||
| 32 | 37 | ||
| 33 | /* | 38 | /* |
| 34 | * We use a start+len construction, which provides full use of the | 39 | * We use a start+len construction, which provides full use of the |
| @@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) | |||
| 1118 | * Allocate a new array of pipe buffers and copy the info over. Returns the | 1123 | * Allocate a new array of pipe buffers and copy the info over. Returns the |
| 1119 | * pipe size if successful, or return -ERROR on error. | 1124 | * pipe size if successful, or return -ERROR on error. |
| 1120 | */ | 1125 | */ |
| 1121 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | 1126 | static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages) |
| 1122 | { | 1127 | { |
| 1123 | struct pipe_buffer *bufs; | 1128 | struct pipe_buffer *bufs; |
| 1124 | 1129 | ||
| 1125 | /* | 1130 | /* |
| 1126 | * Must be a power-of-2 currently | ||
| 1127 | */ | ||
| 1128 | if (!is_power_of_2(arg)) | ||
| 1129 | return -EINVAL; | ||
| 1130 | |||
| 1131 | /* | ||
| 1132 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't | 1131 | * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't |
| 1133 | * expect a lot of shrink+grow operations, just free and allocate | 1132 | * expect a lot of shrink+grow operations, just free and allocate |
| 1134 | * again like we would do for growing. If the pipe currently | 1133 | * again like we would do for growing. If the pipe currently |
| 1135 | * contains more buffers than arg, then return busy. | 1134 | * contains more buffers than arg, then return busy. |
| 1136 | */ | 1135 | */ |
| 1137 | if (arg < pipe->nrbufs) | 1136 | if (nr_pages < pipe->nrbufs) |
| 1138 | return -EBUSY; | 1137 | return -EBUSY; |
| 1139 | 1138 | ||
| 1140 | bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); | 1139 | bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); |
| 1141 | if (unlikely(!bufs)) | 1140 | if (unlikely(!bufs)) |
| 1142 | return -ENOMEM; | 1141 | return -ENOMEM; |
| 1143 | 1142 | ||
| @@ -1158,8 +1157,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) | |||
| 1158 | pipe->curbuf = 0; | 1157 | pipe->curbuf = 0; |
| 1159 | kfree(pipe->bufs); | 1158 | kfree(pipe->bufs); |
| 1160 | pipe->bufs = bufs; | 1159 | pipe->bufs = bufs; |
| 1161 | pipe->buffers = arg; | 1160 | pipe->buffers = nr_pages; |
| 1162 | return arg; | 1161 | return nr_pages * PAGE_SIZE; |
| 1162 | } | ||
| 1163 | |||
| 1164 | /* | ||
| 1165 | * Currently we rely on the pipe array holding a power-of-2 number | ||
| 1166 | * of pages. | ||
| 1167 | */ | ||
| 1168 | static inline unsigned int round_pipe_size(unsigned int size) | ||
| 1169 | { | ||
| 1170 | unsigned long nr_pages; | ||
| 1171 | |||
| 1172 | nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 1173 | return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | /* | ||
| 1177 | * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax | ||
| 1178 | * will return an error. | ||
| 1179 | */ | ||
| 1180 | int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, | ||
| 1181 | size_t *lenp, loff_t *ppos) | ||
| 1182 | { | ||
| 1183 | int ret; | ||
| 1184 | |||
| 1185 | ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); | ||
| 1186 | if (ret < 0 || !write) | ||
| 1187 | return ret; | ||
| 1188 | |||
| 1189 | pipe_max_size = round_pipe_size(pipe_max_size); | ||
| 1190 | return ret; | ||
| 1163 | } | 1191 | } |
| 1164 | 1192 | ||
| 1165 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | 1193 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) |
| @@ -1174,23 +1202,24 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 1174 | mutex_lock(&pipe->inode->i_mutex); | 1202 | mutex_lock(&pipe->inode->i_mutex); |
| 1175 | 1203 | ||
| 1176 | switch (cmd) { | 1204 | switch (cmd) { |
| 1177 | case F_SETPIPE_SZ: | 1205 | case F_SETPIPE_SZ: { |
| 1178 | if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { | 1206 | unsigned int size, nr_pages; |
| 1179 | ret = -EINVAL; | 1207 | |
| 1208 | size = round_pipe_size(arg); | ||
| 1209 | nr_pages = size >> PAGE_SHIFT; | ||
| 1210 | |||
| 1211 | if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) { | ||
| 1212 | ret = -EPERM; | ||
| 1180 | goto out; | 1213 | goto out; |
| 1181 | } | 1214 | } else if (nr_pages < PAGE_SIZE) { |
| 1182 | /* | ||
| 1183 | * The pipe needs to be at least 2 pages large to | ||
| 1184 | * guarantee POSIX behaviour. | ||
| 1185 | */ | ||
| 1186 | if (arg < 2) { | ||
| 1187 | ret = -EINVAL; | 1215 | ret = -EINVAL; |
| 1188 | goto out; | 1216 | goto out; |
| 1189 | } | 1217 | } |
| 1190 | ret = pipe_set_size(pipe, arg); | 1218 | ret = pipe_set_size(pipe, nr_pages); |
| 1191 | break; | 1219 | break; |
| 1220 | } | ||
| 1192 | case F_GETPIPE_SZ: | 1221 | case F_GETPIPE_SZ: |
| 1193 | ret = pipe->buffers; | 1222 | ret = pipe->buffers * PAGE_SIZE; |
| 1194 | break; | 1223 | break; |
| 1195 | default: | 1224 | default: |
| 1196 | ret = -EINVAL; | 1225 | ret = -EINVAL; |
diff --git a/fs/splice.c b/fs/splice.c index ac22b00d86c..740e6b9faf7 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
| 354 | break; | 354 | break; |
| 355 | 355 | ||
| 356 | error = add_to_page_cache_lru(page, mapping, index, | 356 | error = add_to_page_cache_lru(page, mapping, index, |
| 357 | mapping_gfp_mask(mapping)); | 357 | GFP_KERNEL); |
| 358 | if (unlikely(error)) { | 358 | if (unlikely(error)) { |
| 359 | page_cache_release(page); | 359 | page_cache_release(page); |
| 360 | if (error == -EEXIST) | 360 | if (error == -EEXIST) |
| @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) | |||
| 42 | if (wait) | 42 | if (wait) |
| 43 | sync_inodes_sb(sb); | 43 | sync_inodes_sb(sb); |
| 44 | else | 44 | else |
| 45 | writeback_inodes_sb_locked(sb); | 45 | writeback_inodes_sb(sb); |
| 46 | 46 | ||
| 47 | if (sb->s_op->sync_fs) | 47 | if (sb->s_op->sync_fs) |
| 48 | sb->s_op->sync_fs(sb, wait); | 48 | sb->s_op->sync_fs(sb, wait); |
