aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustin TerAvest <teravest@google.com>2011-07-12 02:31:45 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-07-12 02:35:10 -0400
commit4aede84b33d6beb401136a3deca0651ae07c5e99 (patch)
tree831266dbb15227584da5011ae4bb1e5038a69677
parenta07405b7802691d29ab3b23bdc76ee6d006aad0b (diff)
fixlet: Remove fs_excl from struct task.
fs_excl is a poor man's priority inheritance for filesystems to hint to the block layer that an operation is important. It was never clearly specified, not widely adopted, and will not prevent starvation in many cases (like across cgroups). fs_excl was introduced with the time sliced CFQ IO scheduler, to indicate when a process held FS exclusive resources and thus needed a boost. It doesn't cover all file systems, and it was never fully complete. Lets kill it. Signed-off-by: Justin TerAvest <teravest@google.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--block/cfq-iosched.c28
-rw-r--r--fs/reiserfs/journal.c13
-rw-r--r--fs/super.c4
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
8 files changed, 1 insertions, 52 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 762bd509b71b..d8b108737b72 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,7 +134,7 @@ struct cfq_queue {
134 134
135 /* io prio of this group */ 135 /* io prio of this group */
136 unsigned short ioprio, org_ioprio; 136 unsigned short ioprio, org_ioprio;
137 unsigned short ioprio_class, org_ioprio_class; 137 unsigned short ioprio_class;
138 138
139 pid_t pid; 139 pid_t pid;
140 140
@@ -2869,7 +2869,6 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2869 * elevate the priority of this queue 2869 * elevate the priority of this queue
2870 */ 2870 */
2871 cfqq->org_ioprio = cfqq->ioprio; 2871 cfqq->org_ioprio = cfqq->ioprio;
2872 cfqq->org_ioprio_class = cfqq->ioprio_class;
2873 cfq_clear_cfqq_prio_changed(cfqq); 2872 cfq_clear_cfqq_prio_changed(cfqq);
2874} 2873}
2875 2874
@@ -3593,30 +3592,6 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
3593 cfq_schedule_dispatch(cfqd); 3592 cfq_schedule_dispatch(cfqd);
3594} 3593}
3595 3594
3596/*
3597 * we temporarily boost lower priority queues if they are holding fs exclusive
3598 * resources. they are boosted to normal prio (CLASS_BE/4)
3599 */
3600static void cfq_prio_boost(struct cfq_queue *cfqq)
3601{
3602 if (has_fs_excl()) {
3603 /*
3604 * boost idle prio on transactions that would lock out other
3605 * users of the filesystem
3606 */
3607 if (cfq_class_idle(cfqq))
3608 cfqq->ioprio_class = IOPRIO_CLASS_BE;
3609 if (cfqq->ioprio > IOPRIO_NORM)
3610 cfqq->ioprio = IOPRIO_NORM;
3611 } else {
3612 /*
3613 * unboost the queue (if needed)
3614 */
3615 cfqq->ioprio_class = cfqq->org_ioprio_class;
3616 cfqq->ioprio = cfqq->org_ioprio;
3617 }
3618}
3619
3620static inline int __cfq_may_queue(struct cfq_queue *cfqq) 3595static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3621{ 3596{
3622 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { 3597 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -3647,7 +3622,6 @@ static int cfq_may_queue(struct request_queue *q, int rw)
3647 cfqq = cic_to_cfqq(cic, rw_is_sync(rw)); 3622 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3648 if (cfqq) { 3623 if (cfqq) {
3649 cfq_init_prio_data(cfqq, cic->ioc); 3624 cfq_init_prio_data(cfqq, cic->ioc);
3650 cfq_prio_boost(cfqq);
3651 3625
3652 return __cfq_may_queue(cfqq); 3626 return __cfq_may_queue(cfqq);
3653 } 3627 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c5e82ece7c6c..a159ba5a35e7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -678,23 +678,19 @@ struct buffer_chunk {
678static void write_chunk(struct buffer_chunk *chunk) 678static void write_chunk(struct buffer_chunk *chunk)
679{ 679{
680 int i; 680 int i;
681 get_fs_excl();
682 for (i = 0; i < chunk->nr; i++) { 681 for (i = 0; i < chunk->nr; i++) {
683 submit_logged_buffer(chunk->bh[i]); 682 submit_logged_buffer(chunk->bh[i]);
684 } 683 }
685 chunk->nr = 0; 684 chunk->nr = 0;
686 put_fs_excl();
687} 685}
688 686
689static void write_ordered_chunk(struct buffer_chunk *chunk) 687static void write_ordered_chunk(struct buffer_chunk *chunk)
690{ 688{
691 int i; 689 int i;
692 get_fs_excl();
693 for (i = 0; i < chunk->nr; i++) { 690 for (i = 0; i < chunk->nr; i++) {
694 submit_ordered_buffer(chunk->bh[i]); 691 submit_ordered_buffer(chunk->bh[i]);
695 } 692 }
696 chunk->nr = 0; 693 chunk->nr = 0;
697 put_fs_excl();
698} 694}
699 695
700static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 696static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
@@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s,
986 return 0; 982 return 0;
987 } 983 }
988 984
989 get_fs_excl();
990
991 /* before we can put our commit blocks on disk, we have to make sure everyone older than 985 /* before we can put our commit blocks on disk, we have to make sure everyone older than
992 ** us is on disk too 986 ** us is on disk too
993 */ 987 */
@@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s,
1145 if (retval) 1139 if (retval)
1146 reiserfs_abort(s, retval, "Journal write error in %s", 1140 reiserfs_abort(s, retval, "Journal write error in %s",
1147 __func__); 1141 __func__);
1148 put_fs_excl();
1149 return retval; 1142 return retval;
1150} 1143}
1151 1144
@@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s,
1374 return 0; 1367 return 0;
1375 } 1368 }
1376 1369
1377 get_fs_excl();
1378
1379 /* if all the work is already done, get out of here */ 1370 /* if all the work is already done, get out of here */
1380 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1371 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1381 atomic_read(&(jl->j_commit_left)) <= 0) { 1372 atomic_read(&(jl->j_commit_left)) <= 0) {
@@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s,
1597 put_journal_list(s, jl); 1588 put_journal_list(s, jl);
1598 if (flushall) 1589 if (flushall)
1599 mutex_unlock(&journal->j_flush_mutex); 1590 mutex_unlock(&journal->j_flush_mutex);
1600 put_fs_excl();
1601 return err; 1591 return err;
1602} 1592}
1603 1593
@@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3108 th->t_trans_id = journal->j_trans_id; 3098 th->t_trans_id = journal->j_trans_id;
3109 unlock_journal(sb); 3099 unlock_journal(sb);
3110 INIT_LIST_HEAD(&th->t_list); 3100 INIT_LIST_HEAD(&th->t_list);
3111 get_fs_excl();
3112 return 0; 3101 return 0;
3113 3102
3114 out_fail: 3103 out_fail:
@@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3964 flush = flags & FLUSH_ALL; 3953 flush = flags & FLUSH_ALL;
3965 wait_on_commit = flags & WAIT; 3954 wait_on_commit = flags & WAIT;
3966 3955
3967 put_fs_excl();
3968 current->journal_info = th->t_handle_save; 3956 current->journal_info = th->t_handle_save;
3969 reiserfs_check_lock_depth(sb, "journal end"); 3957 reiserfs_check_lock_depth(sb, "journal end");
3970 if (journal->j_len == 0) { 3958 if (journal->j_len == 0) {
@@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno)
4316 dump_stack(); 4304 dump_stack();
4317#endif 4305#endif
4318} 4306}
4319
diff --git a/fs/super.c b/fs/super.c
index ab3d672db0de..cf12ba50973b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -245,13 +245,11 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
245 */ 245 */
246void lock_super(struct super_block * sb) 246void lock_super(struct super_block * sb)
247{ 247{
248 get_fs_excl();
249 mutex_lock(&sb->s_lock); 248 mutex_lock(&sb->s_lock);
250} 249}
251 250
252void unlock_super(struct super_block * sb) 251void unlock_super(struct super_block * sb)
253{ 252{
254 put_fs_excl();
255 mutex_unlock(&sb->s_lock); 253 mutex_unlock(&sb->s_lock);
256} 254}
257 255
@@ -280,7 +278,6 @@ void generic_shutdown_super(struct super_block *sb)
280 if (sb->s_root) { 278 if (sb->s_root) {
281 shrink_dcache_for_umount(sb); 279 shrink_dcache_for_umount(sb);
282 sync_filesystem(sb); 280 sync_filesystem(sb);
283 get_fs_excl();
284 sb->s_flags &= ~MS_ACTIVE; 281 sb->s_flags &= ~MS_ACTIVE;
285 282
286 fsnotify_unmount_inodes(&sb->s_inodes); 283 fsnotify_unmount_inodes(&sb->s_inodes);
@@ -295,7 +292,6 @@ void generic_shutdown_super(struct super_block *sb)
295 "Self-destruct in 5 seconds. Have a nice day...\n", 292 "Self-destruct in 5 seconds. Have a nice day...\n",
296 sb->s_id); 293 sb->s_id);
297 } 294 }
298 put_fs_excl();
299 } 295 }
300 spin_lock(&sb_lock); 296 spin_lock(&sb_lock);
301 /* should be initialized for __put_super_and_need_restart() */ 297 /* should be initialized for __put_super_and_need_restart() */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e73e2e9ae33..f6c866c287b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1453,10 +1453,6 @@ enum {
1453#define vfs_check_frozen(sb, level) \ 1453#define vfs_check_frozen(sb, level) \
1454 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) 1454 wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
1455 1455
1456#define get_fs_excl() atomic_inc(&current->fs_excl)
1457#define put_fs_excl() atomic_dec(&current->fs_excl)
1458#define has_fs_excl() atomic_read(&current->fs_excl)
1459
1460/* 1456/*
1461 * until VFS tracks user namespaces for inodes, just make all files 1457 * until VFS tracks user namespaces for inodes, just make all files
1462 * belong to init_user_ns 1458 * belong to init_user_ns
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 580f70c02391..d14e058aaeed 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -176,7 +176,6 @@ extern struct cred init_cred;
176 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ 176 .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
177 .journal_info = NULL, \ 177 .journal_info = NULL, \
178 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ 178 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
179 .fs_excl = ATOMIC_INIT(0), \
180 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ 179 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
181 .timer_slack_ns = 50000, /* 50 usec default slack */ \ 180 .timer_slack_ns = 50000, /* 50 usec default slack */ \
182 .pids = { \ 181 .pids = { \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a837b20ba190..22f54249cde1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1503,7 +1503,6 @@ struct task_struct {
1503 short il_next; 1503 short il_next;
1504 short pref_node_fork; 1504 short pref_node_fork;
1505#endif 1505#endif
1506 atomic_t fs_excl; /* holding fs exclusive resources */
1507 struct rcu_head rcu; 1506 struct rcu_head rcu;
1508 1507
1509 /* 1508 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index f2b321bae440..b412df45ea6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -906,7 +906,6 @@ NORET_TYPE void do_exit(long code)
906 906
907 profile_task_exit(tsk); 907 profile_task_exit(tsk);
908 908
909 WARN_ON(atomic_read(&tsk->fs_excl));
910 WARN_ON(blk_needs_flush_plug(tsk)); 909 WARN_ON(blk_needs_flush_plug(tsk));
911 910
912 if (unlikely(in_interrupt())) 911 if (unlikely(in_interrupt()))
diff --git a/kernel/fork.c b/kernel/fork.c
index 0276c30401a0..30a0e8607223 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -291,7 +291,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
291 291
292 /* One for us, one for whoever does the "release_task()" (usually parent) */ 292 /* One for us, one for whoever does the "release_task()" (usually parent) */
293 atomic_set(&tsk->usage,2); 293 atomic_set(&tsk->usage,2);
294 atomic_set(&tsk->fs_excl, 0);
295#ifdef CONFIG_BLK_DEV_IO_TRACE 294#ifdef CONFIG_BLK_DEV_IO_TRACE
296 tsk->btrace_seq = 0; 295 tsk->btrace_seq = 0;
297#endif 296#endif