aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-06-04 18:37:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-06-04 18:37:44 -0400
commitd2dd328b7f7bc6cebe167648289337755944ad2a (patch)
tree5d664a2db1ac209f7537452ddc02597972f7aa37 /fs
parentc1518f12bab97a6d409a25aaccb02dc8895800f3 (diff)
parent1abec4fdbb142e3ccb6ce99832fae42129134a96 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits) block: make blk_init_free_list and elevator_init idempotent block: avoid unconditionally freeing previously allocated request_queue pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface pipe: change the privilege required for growing a pipe beyond system max pipe: adjust minimum pipe size to 1 page block: disable preemption before using sched_clock() cciss: call BUG() earlier Preparing 8.3.8rc2 drbd: Reduce verbosity drbd: use drbd specific ratelimit instead of global printk_ratelimit drbd: fix hang on local read errors while disconnected drbd: Removed the now empty w_io_error() function drbd: removed duplicated #includes drbd: improve usage of MSG_MORE drbd: need to set socket bufsize early to take effect drbd: improve network latency, TCP_QUICKACK drbd: Revert "drbd: Create new current UUID as late as possible" brd: support discard Revert "writeback: fix WB_SYNC_NONE writeback from umount" Revert "writeback: ensure that WB_SYNC_NONE writeback with sb pinned is sync" ...
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c64
-rw-r--r--fs/pipe.c77
-rw-r--r--fs/splice.c2
-rw-r--r--fs/sync.c2
4 files changed, 71 insertions, 74 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b90696..1d1088f48bc2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_writeback_args {
45 unsigned int for_kupdate:1; 45 unsigned int for_kupdate:1;
46 unsigned int range_cyclic:1; 46 unsigned int range_cyclic:1;
47 unsigned int for_background:1; 47 unsigned int for_background:1;
48 unsigned int sb_pinned:1;
49}; 48};
50 49
51/* 50/*
@@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
193} 192}
194 193
195static void bdi_alloc_queue_work(struct backing_dev_info *bdi, 194static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
196 struct wb_writeback_args *args, 195 struct wb_writeback_args *args)
197 int wait)
198{ 196{
199 struct bdi_work *work; 197 struct bdi_work *work;
200 198
@@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
206 if (work) { 204 if (work) {
207 bdi_work_init(work, args); 205 bdi_work_init(work, args);
208 bdi_queue_work(bdi, work); 206 bdi_queue_work(bdi, work);
209 if (wait)
210 bdi_wait_on_work_clear(work);
211 } else { 207 } else {
212 struct bdi_writeback *wb = &bdi->wb; 208 struct bdi_writeback *wb = &bdi->wb;
213 209
@@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
234 .sync_mode = WB_SYNC_ALL, 230 .sync_mode = WB_SYNC_ALL,
235 .nr_pages = LONG_MAX, 231 .nr_pages = LONG_MAX,
236 .range_cyclic = 0, 232 .range_cyclic = 0,
237 /*
238 * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
239 * lets make it explicitly clear.
240 */
241 .sb_pinned = 1,
242 }; 233 };
243 struct bdi_work work; 234 struct bdi_work work;
244 235
@@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
254 * @bdi: the backing device to write from 245 * @bdi: the backing device to write from
255 * @sb: write inodes from this super_block 246 * @sb: write inodes from this super_block
256 * @nr_pages: the number of pages to write 247 * @nr_pages: the number of pages to write
257 * @sb_locked: caller already holds sb umount sem.
258 * 248 *
259 * Description: 249 * Description:
260 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 250 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
261 * started when this function returns, we make no guarentees on 251 * started when this function returns, we make no guarentees on
262 * completion. Caller specifies whether sb umount sem is held already or not. 252 * completion. Caller need not hold sb s_umount semaphore.
263 * 253 *
264 */ 254 */
265void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 255void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
266 long nr_pages, int sb_locked) 256 long nr_pages)
267{ 257{
268 struct wb_writeback_args args = { 258 struct wb_writeback_args args = {
269 .sb = sb, 259 .sb = sb,
270 .sync_mode = WB_SYNC_NONE, 260 .sync_mode = WB_SYNC_NONE,
271 .nr_pages = nr_pages, 261 .nr_pages = nr_pages,
272 .range_cyclic = 1, 262 .range_cyclic = 1,
273 .sb_pinned = sb_locked,
274 }; 263 };
275 264
276 /* 265 /*
@@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
282 args.for_background = 1; 271 args.for_background = 1;
283 } 272 }
284 273
285 bdi_alloc_queue_work(bdi, &args, sb_locked); 274 bdi_alloc_queue_work(bdi, &args);
286} 275}
287 276
288/* 277/*
@@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
595 /* 584 /*
596 * Caller must already hold the ref for this 585 * Caller must already hold the ref for this
597 */ 586 */
598 if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { 587 if (wbc->sync_mode == WB_SYNC_ALL) {
599 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 588 WARN_ON(!rwsem_is_locked(&sb->s_umount));
600 return SB_NOT_PINNED; 589 return SB_NOT_PINNED;
601 } 590 }
@@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb,
769 .for_kupdate = args->for_kupdate, 758 .for_kupdate = args->for_kupdate,
770 .for_background = args->for_background, 759 .for_background = args->for_background,
771 .range_cyclic = args->range_cyclic, 760 .range_cyclic = args->range_cyclic,
772 .sb_pinned = args->sb_pinned,
773 }; 761 };
774 unsigned long oldest_jif; 762 unsigned long oldest_jif;
775 long wrote = 0; 763 long wrote = 0;
@@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
912 900
913 while ((work = get_next_work_item(bdi, wb)) != NULL) { 901 while ((work = get_next_work_item(bdi, wb)) != NULL) {
914 struct wb_writeback_args args = work->args; 902 struct wb_writeback_args args = work->args;
915 int post_clear;
916 903
917 /* 904 /*
918 * Override sync mode, in case we must wait for completion 905 * Override sync mode, in case we must wait for completion
@@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
920 if (force_wait) 907 if (force_wait)
921 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 908 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
922 909
923 post_clear = WB_SYNC_ALL || args.sb_pinned;
924
925 /* 910 /*
926 * If this isn't a data integrity operation, just notify 911 * If this isn't a data integrity operation, just notify
927 * that we have seen this work and we are now starting it. 912 * that we have seen this work and we are now starting it.
928 */ 913 */
929 if (!post_clear) 914 if (args.sync_mode == WB_SYNC_NONE)
930 wb_clear_pending(wb, work); 915 wb_clear_pending(wb, work);
931 916
932 wrote += wb_writeback(wb, &args); 917 wrote += wb_writeback(wb, &args);
@@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
935 * This is a data integrity writeback, so only do the 920 * This is a data integrity writeback, so only do the
936 * notification when we have completed the work. 921 * notification when we have completed the work.
937 */ 922 */
938 if (post_clear) 923 if (args.sync_mode == WB_SYNC_ALL)
939 wb_clear_pending(wb, work); 924 wb_clear_pending(wb, work);
940 } 925 }
941 926
@@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
1011 if (!bdi_has_dirty_io(bdi)) 996 if (!bdi_has_dirty_io(bdi))
1012 continue; 997 continue;
1013 998
1014 bdi_alloc_queue_work(bdi, &args, 0); 999 bdi_alloc_queue_work(bdi, &args);
1015 } 1000 }
1016 1001
1017 rcu_read_unlock(); 1002 rcu_read_unlock();
@@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb)
1220 iput(old_inode); 1205 iput(old_inode);
1221} 1206}
1222 1207
1223static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1224{
1225 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1226 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1227 long nr_to_write;
1228
1229 nr_to_write = nr_dirty + nr_unstable +
1230 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1231
1232 bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
1233}
1234
1235/** 1208/**
1236 * writeback_inodes_sb - writeback dirty inodes from given super_block 1209 * writeback_inodes_sb - writeback dirty inodes from given super_block
1237 * @sb: the superblock 1210 * @sb: the superblock
@@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
1243 */ 1216 */
1244void writeback_inodes_sb(struct super_block *sb) 1217void writeback_inodes_sb(struct super_block *sb)
1245{ 1218{
1246 __writeback_inodes_sb(sb, 0); 1219 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1247} 1220 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1248EXPORT_SYMBOL(writeback_inodes_sb); 1221 long nr_to_write;
1249 1222
1250/** 1223 nr_to_write = nr_dirty + nr_unstable +
1251 * writeback_inodes_sb_locked - writeback dirty inodes from given super_block 1224 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1252 * @sb: the superblock 1225
1253 * 1226 bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
1254 * Like writeback_inodes_sb(), except the caller already holds the
1255 * sb umount sem.
1256 */
1257void writeback_inodes_sb_locked(struct super_block *sb)
1258{
1259 __writeback_inodes_sb(sb, 1);
1260} 1227}
1228EXPORT_SYMBOL(writeback_inodes_sb);
1261 1229
1262/** 1230/**
1263 * writeback_inodes_sb_if_idle - start writeback if none underway 1231 * writeback_inodes_sb_if_idle - start writeback if none underway
diff --git a/fs/pipe.c b/fs/pipe.c
index db6eaaba0dd8..69c4c7c13ea9 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
26 26
27/* 27/*
28 * The max size that a non-root user is allowed to grow the pipe. Can 28 * The max size that a non-root user is allowed to grow the pipe. Can
29 * be set by root in /proc/sys/fs/pipe-max-pages 29 * be set by root in /proc/sys/fs/pipe-max-size
30 */ 30 */
31unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; 31unsigned int pipe_max_size = 1048576;
32
33/*
34 * Minimum pipe size, as required by POSIX
35 */
36unsigned int pipe_min_size = PAGE_SIZE;
32 37
33/* 38/*
34 * We use a start+len construction, which provides full use of the 39 * We use a start+len construction, which provides full use of the
@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
1118 * Allocate a new array of pipe buffers and copy the info over. Returns the 1123 * Allocate a new array of pipe buffers and copy the info over. Returns the
1119 * pipe size if successful, or return -ERROR on error. 1124 * pipe size if successful, or return -ERROR on error.
1120 */ 1125 */
1121static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1126static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1122{ 1127{
1123 struct pipe_buffer *bufs; 1128 struct pipe_buffer *bufs;
1124 1129
1125 /* 1130 /*
1126 * Must be a power-of-2 currently
1127 */
1128 if (!is_power_of_2(arg))
1129 return -EINVAL;
1130
1131 /*
1132 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't 1131 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
1133 * expect a lot of shrink+grow operations, just free and allocate 1132 * expect a lot of shrink+grow operations, just free and allocate
1134 * again like we would do for growing. If the pipe currently 1133 * again like we would do for growing. If the pipe currently
1135 * contains more buffers than arg, then return busy. 1134 * contains more buffers than arg, then return busy.
1136 */ 1135 */
1137 if (arg < pipe->nrbufs) 1136 if (nr_pages < pipe->nrbufs)
1138 return -EBUSY; 1137 return -EBUSY;
1139 1138
1140 bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); 1139 bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
1141 if (unlikely(!bufs)) 1140 if (unlikely(!bufs))
1142 return -ENOMEM; 1141 return -ENOMEM;
1143 1142
@@ -1158,8 +1157,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1158 pipe->curbuf = 0; 1157 pipe->curbuf = 0;
1159 kfree(pipe->bufs); 1158 kfree(pipe->bufs);
1160 pipe->bufs = bufs; 1159 pipe->bufs = bufs;
1161 pipe->buffers = arg; 1160 pipe->buffers = nr_pages;
1162 return arg; 1161 return nr_pages * PAGE_SIZE;
1162}
1163
1164/*
1165 * Currently we rely on the pipe array holding a power-of-2 number
1166 * of pages.
1167 */
1168static inline unsigned int round_pipe_size(unsigned int size)
1169{
1170 unsigned long nr_pages;
1171
1172 nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
1173 return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
1174}
1175
1176/*
1177 * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
1178 * will return an error.
1179 */
1180int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1181 size_t *lenp, loff_t *ppos)
1182{
1183 int ret;
1184
1185 ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
1186 if (ret < 0 || !write)
1187 return ret;
1188
1189 pipe_max_size = round_pipe_size(pipe_max_size);
1190 return ret;
1163} 1191}
1164 1192
1165long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1193long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1174,23 +1202,24 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1174 mutex_lock(&pipe->inode->i_mutex); 1202 mutex_lock(&pipe->inode->i_mutex);
1175 1203
1176 switch (cmd) { 1204 switch (cmd) {
1177 case F_SETPIPE_SZ: 1205 case F_SETPIPE_SZ: {
1178 if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { 1206 unsigned int size, nr_pages;
1179 ret = -EINVAL; 1207
1208 size = round_pipe_size(arg);
1209 nr_pages = size >> PAGE_SHIFT;
1210
1211 if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1212 ret = -EPERM;
1180 goto out; 1213 goto out;
1181 } 1214 } else if (nr_pages < PAGE_SIZE) {
1182 /*
1183 * The pipe needs to be at least 2 pages large to
1184 * guarantee POSIX behaviour.
1185 */
1186 if (arg < 2) {
1187 ret = -EINVAL; 1215 ret = -EINVAL;
1188 goto out; 1216 goto out;
1189 } 1217 }
1190 ret = pipe_set_size(pipe, arg); 1218 ret = pipe_set_size(pipe, nr_pages);
1191 break; 1219 break;
1220 }
1192 case F_GETPIPE_SZ: 1221 case F_GETPIPE_SZ:
1193 ret = pipe->buffers; 1222 ret = pipe->buffers * PAGE_SIZE;
1194 break; 1223 break;
1195 default: 1224 default:
1196 ret = -EINVAL; 1225 ret = -EINVAL;
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00d86c3..740e6b9faf7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
354 break; 354 break;
355 355
356 error = add_to_page_cache_lru(page, mapping, index, 356 error = add_to_page_cache_lru(page, mapping, index,
357 mapping_gfp_mask(mapping)); 357 GFP_KERNEL);
358 if (unlikely(error)) { 358 if (unlikely(error)) {
359 page_cache_release(page); 359 page_cache_release(page);
360 if (error == -EEXIST) 360 if (error == -EEXIST)
diff --git a/fs/sync.c b/fs/sync.c
index c9f83f480ec5..15aa6f03b2da 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
42 if (wait) 42 if (wait)
43 sync_inodes_sb(sb); 43 sync_inodes_sb(sb);
44 else 44 else
45 writeback_inodes_sb_locked(sb); 45 writeback_inodes_sb(sb);
46 46
47 if (sb->s_op->sync_fs) 47 if (sb->s_op->sync_fs)
48 sb->s_op->sync_fs(sb, wait); 48 sb->s_op->sync_fs(sb, wait);