diff options
57 files changed, 588 insertions, 560 deletions
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt index bee2a5f93d60..a1c052cbba35 100644 --- a/Documentation/filesystems/caching/operations.txt +++ b/Documentation/filesystems/caching/operations.txt | |||
@@ -90,7 +90,7 @@ operations: | |||
90 | to be cleared before proceeding: | 90 | to be cleared before proceeding: |
91 | 91 | ||
92 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, | 92 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, |
93 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | 93 | TASK_UNINTERRUPTIBLE); |
94 | 94 | ||
95 | 95 | ||
96 | (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it | 96 | (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it |
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 2479b2a0c77c..4da42616939f 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt | |||
@@ -1515,7 +1515,7 @@ Doing the same with chrt -r 5 and function-trace set. | |||
1515 | <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep | 1515 | <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep |
1516 | <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up | 1516 | <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up |
1517 | <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup | 1517 | <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup |
1518 | <idle>-0 3d.h3 3us : resched_task <-check_preempt_curr | 1518 | <idle>-0 3d.h3 3us : resched_curr <-check_preempt_curr |
1519 | <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup | 1519 | <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup |
1520 | <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up | 1520 | <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up |
1521 | <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock | 1521 | <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock |
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index d724459860d9..ab472c557d18 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -615,16 +615,6 @@ static void write_endio(struct bio *bio, int error) | |||
615 | } | 615 | } |
616 | 616 | ||
617 | /* | 617 | /* |
618 | * This function is called when wait_on_bit is actually waiting. | ||
619 | */ | ||
620 | static int do_io_schedule(void *word) | ||
621 | { | ||
622 | io_schedule(); | ||
623 | |||
624 | return 0; | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * Initiate a write on a dirty buffer, but don't wait for it. | 618 | * Initiate a write on a dirty buffer, but don't wait for it. |
629 | * | 619 | * |
630 | * - If the buffer is not dirty, exit. | 620 | * - If the buffer is not dirty, exit. |
@@ -640,8 +630,7 @@ static void __write_dirty_buffer(struct dm_buffer *b, | |||
640 | return; | 630 | return; |
641 | 631 | ||
642 | clear_bit(B_DIRTY, &b->state); | 632 | clear_bit(B_DIRTY, &b->state); |
643 | wait_on_bit_lock(&b->state, B_WRITING, | 633 | wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); |
644 | do_io_schedule, TASK_UNINTERRUPTIBLE); | ||
645 | 634 | ||
646 | if (!write_list) | 635 | if (!write_list) |
647 | submit_io(b, WRITE, b->block, write_endio); | 636 | submit_io(b, WRITE, b->block, write_endio); |
@@ -675,9 +664,9 @@ static void __make_buffer_clean(struct dm_buffer *b) | |||
675 | if (!b->state) /* fast case */ | 664 | if (!b->state) /* fast case */ |
676 | return; | 665 | return; |
677 | 666 | ||
678 | wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); | 667 | wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); |
679 | __write_dirty_buffer(b, NULL); | 668 | __write_dirty_buffer(b, NULL); |
680 | wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); | 669 | wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); |
681 | } | 670 | } |
682 | 671 | ||
683 | /* | 672 | /* |
@@ -1030,7 +1019,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, | |||
1030 | if (need_submit) | 1019 | if (need_submit) |
1031 | submit_io(b, READ, b->block, read_endio); | 1020 | submit_io(b, READ, b->block, read_endio); |
1032 | 1021 | ||
1033 | wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); | 1022 | wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); |
1034 | 1023 | ||
1035 | if (b->read_error) { | 1024 | if (b->read_error) { |
1036 | int error = b->read_error; | 1025 | int error = b->read_error; |
@@ -1209,15 +1198,13 @@ again: | |||
1209 | dropped_lock = 1; | 1198 | dropped_lock = 1; |
1210 | b->hold_count++; | 1199 | b->hold_count++; |
1211 | dm_bufio_unlock(c); | 1200 | dm_bufio_unlock(c); |
1212 | wait_on_bit(&b->state, B_WRITING, | 1201 | wait_on_bit_io(&b->state, B_WRITING, |
1213 | do_io_schedule, | 1202 | TASK_UNINTERRUPTIBLE); |
1214 | TASK_UNINTERRUPTIBLE); | ||
1215 | dm_bufio_lock(c); | 1203 | dm_bufio_lock(c); |
1216 | b->hold_count--; | 1204 | b->hold_count--; |
1217 | } else | 1205 | } else |
1218 | wait_on_bit(&b->state, B_WRITING, | 1206 | wait_on_bit_io(&b->state, B_WRITING, |
1219 | do_io_schedule, | 1207 | TASK_UNINTERRUPTIBLE); |
1220 | TASK_UNINTERRUPTIBLE); | ||
1221 | } | 1208 | } |
1222 | 1209 | ||
1223 | if (!test_bit(B_DIRTY, &b->state) && | 1210 | if (!test_bit(B_DIRTY, &b->state) && |
@@ -1321,15 +1308,15 @@ retry: | |||
1321 | 1308 | ||
1322 | __write_dirty_buffer(b, NULL); | 1309 | __write_dirty_buffer(b, NULL); |
1323 | if (b->hold_count == 1) { | 1310 | if (b->hold_count == 1) { |
1324 | wait_on_bit(&b->state, B_WRITING, | 1311 | wait_on_bit_io(&b->state, B_WRITING, |
1325 | do_io_schedule, TASK_UNINTERRUPTIBLE); | 1312 | TASK_UNINTERRUPTIBLE); |
1326 | set_bit(B_DIRTY, &b->state); | 1313 | set_bit(B_DIRTY, &b->state); |
1327 | __unlink_buffer(b); | 1314 | __unlink_buffer(b); |
1328 | __link_buffer(b, new_block, LIST_DIRTY); | 1315 | __link_buffer(b, new_block, LIST_DIRTY); |
1329 | } else { | 1316 | } else { |
1330 | sector_t old_block; | 1317 | sector_t old_block; |
1331 | wait_on_bit_lock(&b->state, B_WRITING, | 1318 | wait_on_bit_lock_io(&b->state, B_WRITING, |
1332 | do_io_schedule, TASK_UNINTERRUPTIBLE); | 1319 | TASK_UNINTERRUPTIBLE); |
1333 | /* | 1320 | /* |
1334 | * Relink buffer to "new_block" so that write_callback | 1321 | * Relink buffer to "new_block" so that write_callback |
1335 | * sees "new_block" as a block number. | 1322 | * sees "new_block" as a block number. |
@@ -1341,8 +1328,8 @@ retry: | |||
1341 | __unlink_buffer(b); | 1328 | __unlink_buffer(b); |
1342 | __link_buffer(b, new_block, b->list_mode); | 1329 | __link_buffer(b, new_block, b->list_mode); |
1343 | submit_io(b, WRITE, new_block, write_endio); | 1330 | submit_io(b, WRITE, new_block, write_endio); |
1344 | wait_on_bit(&b->state, B_WRITING, | 1331 | wait_on_bit_io(&b->state, B_WRITING, |
1345 | do_io_schedule, TASK_UNINTERRUPTIBLE); | 1332 | TASK_UNINTERRUPTIBLE); |
1346 | __unlink_buffer(b); | 1333 | __unlink_buffer(b); |
1347 | __link_buffer(b, old_block, b->list_mode); | 1334 | __link_buffer(b, old_block, b->list_mode); |
1348 | } | 1335 | } |
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5bd2290cfb1e..864b03f47727 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c | |||
@@ -1032,21 +1032,13 @@ static void start_merge(struct dm_snapshot *s) | |||
1032 | snapshot_merge_next_chunks(s); | 1032 | snapshot_merge_next_chunks(s); |
1033 | } | 1033 | } |
1034 | 1034 | ||
1035 | static int wait_schedule(void *ptr) | ||
1036 | { | ||
1037 | schedule(); | ||
1038 | |||
1039 | return 0; | ||
1040 | } | ||
1041 | |||
1042 | /* | 1035 | /* |
1043 | * Stop the merging process and wait until it finishes. | 1036 | * Stop the merging process and wait until it finishes. |
1044 | */ | 1037 | */ |
1045 | static void stop_merge(struct dm_snapshot *s) | 1038 | static void stop_merge(struct dm_snapshot *s) |
1046 | { | 1039 | { |
1047 | set_bit(SHUTDOWN_MERGE, &s->state_bits); | 1040 | set_bit(SHUTDOWN_MERGE, &s->state_bits); |
1048 | wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule, | 1041 | wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE); |
1049 | TASK_UNINTERRUPTIBLE); | ||
1050 | clear_bit(SHUTDOWN_MERGE, &s->state_bits); | 1042 | clear_bit(SHUTDOWN_MERGE, &s->state_bits); |
1051 | } | 1043 | } |
1052 | 1044 | ||
diff --git a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c index e35580618936..f296394bb7c5 100644 --- a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c +++ b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c | |||
@@ -253,13 +253,6 @@ static int dvb_usbv2_adapter_stream_exit(struct dvb_usb_adapter *adap) | |||
253 | return usb_urb_exitv2(&adap->stream); | 253 | return usb_urb_exitv2(&adap->stream); |
254 | } | 254 | } |
255 | 255 | ||
256 | static int wait_schedule(void *ptr) | ||
257 | { | ||
258 | schedule(); | ||
259 | |||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) | 256 | static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) |
264 | { | 257 | { |
265 | struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; | 258 | struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; |
@@ -273,8 +266,7 @@ static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) | |||
273 | dvbdmxfeed->pid, dvbdmxfeed->index); | 266 | dvbdmxfeed->pid, dvbdmxfeed->index); |
274 | 267 | ||
275 | /* wait init is done */ | 268 | /* wait init is done */ |
276 | wait_on_bit(&adap->state_bits, ADAP_INIT, wait_schedule, | 269 | wait_on_bit(&adap->state_bits, ADAP_INIT, TASK_UNINTERRUPTIBLE); |
277 | TASK_UNINTERRUPTIBLE); | ||
278 | 270 | ||
279 | if (adap->active_fe == -1) | 271 | if (adap->active_fe == -1) |
280 | return -EINVAL; | 272 | return -EINVAL; |
@@ -568,7 +560,7 @@ static int dvb_usb_fe_sleep(struct dvb_frontend *fe) | |||
568 | 560 | ||
569 | if (!adap->suspend_resume_active) { | 561 | if (!adap->suspend_resume_active) { |
570 | set_bit(ADAP_SLEEP, &adap->state_bits); | 562 | set_bit(ADAP_SLEEP, &adap->state_bits); |
571 | wait_on_bit(&adap->state_bits, ADAP_STREAMING, wait_schedule, | 563 | wait_on_bit(&adap->state_bits, ADAP_STREAMING, |
572 | TASK_UNINTERRUPTIBLE); | 564 | TASK_UNINTERRUPTIBLE); |
573 | } | 565 | } |
574 | 566 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a389820d158b..3e11aab9f391 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3437,16 +3437,10 @@ done_unlocked: | |||
3437 | return 0; | 3437 | return 0; |
3438 | } | 3438 | } |
3439 | 3439 | ||
3440 | static int eb_wait(void *word) | ||
3441 | { | ||
3442 | io_schedule(); | ||
3443 | return 0; | ||
3444 | } | ||
3445 | |||
3446 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb) | 3440 | void wait_on_extent_buffer_writeback(struct extent_buffer *eb) |
3447 | { | 3441 | { |
3448 | wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, | 3442 | wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK, |
3449 | TASK_UNINTERRUPTIBLE); | 3443 | TASK_UNINTERRUPTIBLE); |
3450 | } | 3444 | } |
3451 | 3445 | ||
3452 | static noinline_for_stack int | 3446 | static noinline_for_stack int |
diff --git a/fs/buffer.c b/fs/buffer.c index eba6e4f621ce..8f05111bbb8b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh) | |||
61 | } | 61 | } |
62 | EXPORT_SYMBOL(touch_buffer); | 62 | EXPORT_SYMBOL(touch_buffer); |
63 | 63 | ||
64 | static int sleep_on_buffer(void *word) | ||
65 | { | ||
66 | io_schedule(); | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | void __lock_buffer(struct buffer_head *bh) | 64 | void __lock_buffer(struct buffer_head *bh) |
71 | { | 65 | { |
72 | wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer, | 66 | wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); |
73 | TASK_UNINTERRUPTIBLE); | ||
74 | } | 67 | } |
75 | EXPORT_SYMBOL(__lock_buffer); | 68 | EXPORT_SYMBOL(__lock_buffer); |
76 | 69 | ||
@@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback); | |||
123 | */ | 116 | */ |
124 | void __wait_on_buffer(struct buffer_head * bh) | 117 | void __wait_on_buffer(struct buffer_head * bh) |
125 | { | 118 | { |
126 | wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE); | 119 | wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); |
127 | } | 120 | } |
128 | EXPORT_SYMBOL(__wait_on_buffer); | 121 | EXPORT_SYMBOL(__wait_on_buffer); |
129 | 122 | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 20d75b8ddb26..b98366f21f9e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) | |||
3934 | return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); | 3934 | return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); |
3935 | } | 3935 | } |
3936 | 3936 | ||
3937 | static int | ||
3938 | cifs_sb_tcon_pending_wait(void *unused) | ||
3939 | { | ||
3940 | schedule(); | ||
3941 | return signal_pending(current) ? -ERESTARTSYS : 0; | ||
3942 | } | ||
3943 | |||
3944 | /* find and return a tlink with given uid */ | 3937 | /* find and return a tlink with given uid */ |
3945 | static struct tcon_link * | 3938 | static struct tcon_link * |
3946 | tlink_rb_search(struct rb_root *root, kuid_t uid) | 3939 | tlink_rb_search(struct rb_root *root, kuid_t uid) |
@@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb) | |||
4039 | } else { | 4032 | } else { |
4040 | wait_for_construction: | 4033 | wait_for_construction: |
4041 | ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, | 4034 | ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, |
4042 | cifs_sb_tcon_pending_wait, | ||
4043 | TASK_INTERRUPTIBLE); | 4035 | TASK_INTERRUPTIBLE); |
4044 | if (ret) { | 4036 | if (ret) { |
4045 | cifs_put_tlink(tlink); | 4037 | cifs_put_tlink(tlink); |
4046 | return ERR_PTR(ret); | 4038 | return ERR_PTR(-ERESTARTSYS); |
4047 | } | 4039 | } |
4048 | 4040 | ||
4049 | /* if it's good, return it */ | 4041 | /* if it's good, return it */ |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e90a1e9aa627..b88b1ade4d3d 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page) | |||
3618 | return rc; | 3618 | return rc; |
3619 | } | 3619 | } |
3620 | 3620 | ||
3621 | static int | ||
3622 | cifs_pending_writers_wait(void *unused) | ||
3623 | { | ||
3624 | schedule(); | ||
3625 | return 0; | ||
3626 | } | ||
3627 | |||
3628 | void cifs_oplock_break(struct work_struct *work) | 3621 | void cifs_oplock_break(struct work_struct *work) |
3629 | { | 3622 | { |
3630 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, | 3623 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, |
@@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work) | |||
3636 | int rc = 0; | 3629 | int rc = 0; |
3637 | 3630 | ||
3638 | wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, | 3631 | wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, |
3639 | cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE); | 3632 | TASK_UNINTERRUPTIBLE); |
3640 | 3633 | ||
3641 | server->ops->downgrade_oplock(server, cinode, | 3634 | server->ops->downgrade_oplock(server, cinode, |
3642 | test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); | 3635 | test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a174605f6afa..41de3935caa0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode) | |||
1780 | * @word: long word containing the bit lock | 1780 | * @word: long word containing the bit lock |
1781 | */ | 1781 | */ |
1782 | static int | 1782 | static int |
1783 | cifs_wait_bit_killable(void *word) | 1783 | cifs_wait_bit_killable(struct wait_bit_key *key) |
1784 | { | 1784 | { |
1785 | if (fatal_signal_pending(current)) | 1785 | if (fatal_signal_pending(current)) |
1786 | return -ERESTARTSYS; | 1786 | return -ERESTARTSYS; |
@@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode) | |||
1794 | int rc; | 1794 | int rc; |
1795 | unsigned long *flags = &CIFS_I(inode)->flags; | 1795 | unsigned long *flags = &CIFS_I(inode)->flags; |
1796 | 1796 | ||
1797 | rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, | 1797 | rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, |
1798 | TASK_KILLABLE); | 1798 | TASK_KILLABLE); |
1799 | if (rc) | 1799 | if (rc) |
1800 | return rc; | 1800 | return rc; |
1801 | 1801 | ||
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 3b0c62e622da..6bf55d0ed494 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode) | |||
582 | 582 | ||
583 | start: | 583 | start: |
584 | rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, | 584 | rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, |
585 | cifs_oplock_break_wait, TASK_KILLABLE); | 585 | TASK_KILLABLE); |
586 | if (rc) | 586 | if (rc) |
587 | return rc; | 587 | return rc; |
588 | 588 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be568b7311d6..ef9bef118342 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode) | |||
342 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 342 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
343 | while (inode->i_state & I_SYNC) { | 343 | while (inode->i_state & I_SYNC) { |
344 | spin_unlock(&inode->i_lock); | 344 | spin_unlock(&inode->i_lock); |
345 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 345 | __wait_on_bit(wqh, &wq, bit_wait, |
346 | TASK_UNINTERRUPTIBLE); | ||
346 | spin_lock(&inode->i_lock); | 347 | spin_lock(&inode->i_lock); |
347 | } | 348 | } |
348 | } | 349 | } |
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index aec01be91b0a..89acec742e0b 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c | |||
@@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie, | |||
160 | _enter("%p", cookie); | 160 | _enter("%p", cookie); |
161 | 161 | ||
162 | wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, | 162 | wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, |
163 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | 163 | TASK_UNINTERRUPTIBLE); |
164 | 164 | ||
165 | if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) | 165 | if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) |
166 | goto out_unlock; | 166 | goto out_unlock; |
@@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | |||
255 | if (!fscache_defer_lookup) { | 255 | if (!fscache_defer_lookup) { |
256 | _debug("non-deferred lookup %p", &cookie->flags); | 256 | _debug("non-deferred lookup %p", &cookie->flags); |
257 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | 257 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, |
258 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | 258 | TASK_UNINTERRUPTIBLE); |
259 | _debug("complete"); | 259 | _debug("complete"); |
260 | if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) | 260 | if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) |
261 | goto unavailable; | 261 | goto unavailable; |
@@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) | |||
463 | _enter("%p", cookie); | 463 | _enter("%p", cookie); |
464 | 464 | ||
465 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, | 465 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, |
466 | fscache_wait_bit_interruptible, | ||
467 | TASK_UNINTERRUPTIBLE); | 466 | TASK_UNINTERRUPTIBLE); |
468 | 467 | ||
469 | _leave(""); | 468 | _leave(""); |
@@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) | |||
525 | } | 524 | } |
526 | 525 | ||
527 | wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, | 526 | wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, |
528 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | 527 | TASK_UNINTERRUPTIBLE); |
529 | if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) | 528 | if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) |
530 | goto out_unlock_enable; | 529 | goto out_unlock_enable; |
531 | 530 | ||
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index bc6c08fcfddd..7872a62ef30c 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void) | |||
97 | return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); | 97 | return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); |
98 | } | 98 | } |
99 | 99 | ||
100 | extern int fscache_wait_bit(void *); | ||
101 | extern int fscache_wait_bit_interruptible(void *); | ||
102 | extern int fscache_wait_atomic_t(atomic_t *); | 100 | extern int fscache_wait_atomic_t(atomic_t *); |
103 | 101 | ||
104 | /* | 102 | /* |
diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 63f868e869b9..a31b83c5cbd9 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c | |||
@@ -197,24 +197,6 @@ static void __exit fscache_exit(void) | |||
197 | module_exit(fscache_exit); | 197 | module_exit(fscache_exit); |
198 | 198 | ||
199 | /* | 199 | /* |
200 | * wait_on_bit() sleep function for uninterruptible waiting | ||
201 | */ | ||
202 | int fscache_wait_bit(void *flags) | ||
203 | { | ||
204 | schedule(); | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * wait_on_bit() sleep function for interruptible waiting | ||
210 | */ | ||
211 | int fscache_wait_bit_interruptible(void *flags) | ||
212 | { | ||
213 | schedule(); | ||
214 | return signal_pending(current); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * wait_on_atomic_t() sleep function for uninterruptible waiting | 200 | * wait_on_atomic_t() sleep function for uninterruptible waiting |
219 | */ | 201 | */ |
220 | int fscache_wait_atomic_t(atomic_t *p) | 202 | int fscache_wait_atomic_t(atomic_t *p) |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ed70714503fa..85332b9d19d1 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) | |||
298 | 298 | ||
299 | jif = jiffies; | 299 | jif = jiffies; |
300 | if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | 300 | if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, |
301 | fscache_wait_bit_interruptible, | ||
302 | TASK_INTERRUPTIBLE) != 0) { | 301 | TASK_INTERRUPTIBLE) != 0) { |
303 | fscache_stat(&fscache_n_retrievals_intr); | 302 | fscache_stat(&fscache_n_retrievals_intr); |
304 | _leave(" = -ERESTARTSYS"); | 303 | _leave(" = -ERESTARTSYS"); |
@@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, | |||
342 | if (stat_op_waits) | 341 | if (stat_op_waits) |
343 | fscache_stat(stat_op_waits); | 342 | fscache_stat(stat_op_waits); |
344 | if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, | 343 | if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, |
345 | fscache_wait_bit_interruptible, | ||
346 | TASK_INTERRUPTIBLE) != 0) { | 344 | TASK_INTERRUPTIBLE) != 0) { |
347 | ret = fscache_cancel_op(op, do_cancel); | 345 | ret = fscache_cancel_op(op, do_cancel); |
348 | if (ret == 0) | 346 | if (ret == 0) |
@@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object, | |||
351 | /* it's been removed from the pending queue by another party, | 349 | /* it's been removed from the pending queue by another party, |
352 | * so we should get to run shortly */ | 350 | * so we should get to run shortly */ |
353 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, | 351 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, |
354 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | 352 | TASK_UNINTERRUPTIBLE); |
355 | } | 353 | } |
356 | _debug("<<< GO"); | 354 | _debug("<<< GO"); |
357 | 355 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ee4e04fe60fc..7f513b1ceb2c 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -856,27 +856,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) | |||
856 | } | 856 | } |
857 | 857 | ||
858 | /** | 858 | /** |
859 | * gfs2_glock_holder_wait | ||
860 | * @word: unused | ||
861 | * | ||
862 | * This function and gfs2_glock_demote_wait both show up in the WCHAN | ||
863 | * field. Thus I've separated these otherwise identical functions in | ||
864 | * order to be more informative to the user. | ||
865 | */ | ||
866 | |||
867 | static int gfs2_glock_holder_wait(void *word) | ||
868 | { | ||
869 | schedule(); | ||
870 | return 0; | ||
871 | } | ||
872 | |||
873 | static int gfs2_glock_demote_wait(void *word) | ||
874 | { | ||
875 | schedule(); | ||
876 | return 0; | ||
877 | } | ||
878 | |||
879 | /** | ||
880 | * gfs2_glock_wait - wait on a glock acquisition | 859 | * gfs2_glock_wait - wait on a glock acquisition |
881 | * @gh: the glock holder | 860 | * @gh: the glock holder |
882 | * | 861 | * |
@@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh) | |||
888 | unsigned long time1 = jiffies; | 867 | unsigned long time1 = jiffies; |
889 | 868 | ||
890 | might_sleep(); | 869 | might_sleep(); |
891 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); | 870 | wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); |
892 | if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ | 871 | if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ |
893 | /* Lengthen the minimum hold time. */ | 872 | /* Lengthen the minimum hold time. */ |
894 | gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + | 873 | gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + |
@@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) | |||
1128 | struct gfs2_glock *gl = gh->gh_gl; | 1107 | struct gfs2_glock *gl = gh->gh_gl; |
1129 | gfs2_glock_dq(gh); | 1108 | gfs2_glock_dq(gh); |
1130 | might_sleep(); | 1109 | might_sleep(); |
1131 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); | 1110 | wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); |
1132 | } | 1111 | } |
1133 | 1112 | ||
1134 | /** | 1113 | /** |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 4fafea1c9ecf..641383a9c1bb 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -936,12 +936,6 @@ fail: | |||
936 | return error; | 936 | return error; |
937 | } | 937 | } |
938 | 938 | ||
939 | static int dlm_recovery_wait(void *word) | ||
940 | { | ||
941 | schedule(); | ||
942 | return 0; | ||
943 | } | ||
944 | |||
945 | static int control_first_done(struct gfs2_sbd *sdp) | 939 | static int control_first_done(struct gfs2_sbd *sdp) |
946 | { | 940 | { |
947 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 941 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
@@ -976,7 +970,7 @@ restart: | |||
976 | fs_info(sdp, "control_first_done wait gen %u\n", start_gen); | 970 | fs_info(sdp, "control_first_done wait gen %u\n", start_gen); |
977 | 971 | ||
978 | wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, | 972 | wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, |
979 | dlm_recovery_wait, TASK_UNINTERRUPTIBLE); | 973 | TASK_UNINTERRUPTIBLE); |
980 | goto restart; | 974 | goto restart; |
981 | } | 975 | } |
982 | 976 | ||
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index bc564c0d6d16..d3eae244076e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp) | |||
1024 | lm->lm_unmount(sdp); | 1024 | lm->lm_unmount(sdp); |
1025 | } | 1025 | } |
1026 | 1026 | ||
1027 | static int gfs2_journalid_wait(void *word) | ||
1028 | { | ||
1029 | if (signal_pending(current)) | ||
1030 | return -EINTR; | ||
1031 | schedule(); | ||
1032 | return 0; | ||
1033 | } | ||
1034 | |||
1035 | static int wait_on_journal(struct gfs2_sbd *sdp) | 1027 | static int wait_on_journal(struct gfs2_sbd *sdp) |
1036 | { | 1028 | { |
1037 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 1029 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
1038 | return 0; | 1030 | return 0; |
1039 | 1031 | ||
1040 | return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE); | 1032 | return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE) |
1033 | ? -EINTR : 0; | ||
1041 | } | 1034 | } |
1042 | 1035 | ||
1043 | void gfs2_online_uevent(struct gfs2_sbd *sdp) | 1036 | void gfs2_online_uevent(struct gfs2_sbd *sdp) |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 94555d4c5698..573bd3b758fa 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -591,12 +591,6 @@ done: | |||
591 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); | 591 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); |
592 | } | 592 | } |
593 | 593 | ||
594 | static int gfs2_recovery_wait(void *word) | ||
595 | { | ||
596 | schedule(); | ||
597 | return 0; | ||
598 | } | ||
599 | |||
600 | int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) | 594 | int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) |
601 | { | 595 | { |
602 | int rv; | 596 | int rv; |
@@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) | |||
609 | BUG_ON(!rv); | 603 | BUG_ON(!rv); |
610 | 604 | ||
611 | if (wait) | 605 | if (wait) |
612 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, | 606 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, |
613 | TASK_UNINTERRUPTIBLE); | 607 | TASK_UNINTERRUPTIBLE); |
614 | 608 | ||
615 | return wait ? jd->jd_recover_error : 0; | 609 | return wait ? jd->jd_recover_error : 0; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 1319b5c4ec68..2607ff13d486 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | |||
864 | return error; | 864 | return error; |
865 | } | 865 | } |
866 | 866 | ||
867 | static int gfs2_umount_recovery_wait(void *word) | ||
868 | { | ||
869 | schedule(); | ||
870 | return 0; | ||
871 | } | ||
872 | |||
873 | /** | 867 | /** |
874 | * gfs2_put_super - Unmount the filesystem | 868 | * gfs2_put_super - Unmount the filesystem |
875 | * @sb: The VFS superblock | 869 | * @sb: The VFS superblock |
@@ -894,7 +888,7 @@ restart: | |||
894 | continue; | 888 | continue; |
895 | spin_unlock(&sdp->sd_jindex_spin); | 889 | spin_unlock(&sdp->sd_jindex_spin); |
896 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, | 890 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, |
897 | gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); | 891 | TASK_UNINTERRUPTIBLE); |
898 | goto restart; | 892 | goto restart; |
899 | } | 893 | } |
900 | spin_unlock(&sdp->sd_jindex_spin); | 894 | spin_unlock(&sdp->sd_jindex_spin); |
diff --git a/fs/inode.c b/fs/inode.c index 6eecb7ff0b9a..5938f3928944 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode) | |||
1695 | } | 1695 | } |
1696 | EXPORT_SYMBOL(inode_needs_sync); | 1696 | EXPORT_SYMBOL(inode_needs_sync); |
1697 | 1697 | ||
1698 | int inode_wait(void *word) | ||
1699 | { | ||
1700 | schedule(); | ||
1701 | return 0; | ||
1702 | } | ||
1703 | EXPORT_SYMBOL(inode_wait); | ||
1704 | |||
1705 | /* | 1698 | /* |
1706 | * If we try to find an inode in the inode hash while it is being | 1699 | * If we try to find an inode in the inode hash while it is being |
1707 | * deleted, we have to wait until the filesystem completes its | 1700 | * deleted, we have to wait until the filesystem completes its |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6f0f590cc5a3..5f09370c90a8 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
763 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 763 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
764 | } | 764 | } |
765 | 765 | ||
766 | static int sleep_on_shadow_bh(void *word) | ||
767 | { | ||
768 | io_schedule(); | ||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | /* | 766 | /* |
773 | * If the buffer is already part of the current transaction, then there | 767 | * If the buffer is already part of the current transaction, then there |
774 | * is nothing we need to do. If it is already part of a prior | 768 | * is nothing we need to do. If it is already part of a prior |
@@ -906,8 +900,8 @@ repeat: | |||
906 | if (buffer_shadow(bh)) { | 900 | if (buffer_shadow(bh)) { |
907 | JBUFFER_TRACE(jh, "on shadow: sleep"); | 901 | JBUFFER_TRACE(jh, "on shadow: sleep"); |
908 | jbd_unlock_bh_state(bh); | 902 | jbd_unlock_bh_state(bh); |
909 | wait_on_bit(&bh->b_state, BH_Shadow, | 903 | wait_on_bit_io(&bh->b_state, BH_Shadow, |
910 | sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); | 904 | TASK_UNINTERRUPTIBLE); |
911 | goto repeat; | 905 | goto repeat; |
912 | } | 906 | } |
913 | 907 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4042ff58fe3f..524dd80d1898 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -361,8 +361,8 @@ start: | |||
361 | * Prevent starvation issues if someone is doing a consistency | 361 | * Prevent starvation issues if someone is doing a consistency |
362 | * sync-to-disk | 362 | * sync-to-disk |
363 | */ | 363 | */ |
364 | ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, | 364 | ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, |
365 | nfs_wait_bit_killable, TASK_KILLABLE); | 365 | nfs_wait_bit_killable, TASK_KILLABLE); |
366 | if (ret) | 366 | if (ret) |
367 | return ret; | 367 | return ret; |
368 | 368 | ||
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 44bf0140a4c7..e2a0361e24c6 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |||
783 | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) | 783 | static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) |
784 | { | 784 | { |
785 | might_sleep(); | 785 | might_sleep(); |
786 | wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, | 786 | wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, |
787 | nfs_wait_bit_killable, TASK_KILLABLE); | 787 | nfs_wait_bit_killable, TASK_KILLABLE); |
788 | } | 788 | } |
789 | 789 | ||
790 | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) | 790 | static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9927913c97c2..abd37a380535 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) | |||
75 | * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks | 75 | * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks |
76 | * @word: long word containing the bit lock | 76 | * @word: long word containing the bit lock |
77 | */ | 77 | */ |
78 | int nfs_wait_bit_killable(void *word) | 78 | int nfs_wait_bit_killable(struct wait_bit_key *key) |
79 | { | 79 | { |
80 | if (fatal_signal_pending(current)) | 80 | if (fatal_signal_pending(current)) |
81 | return -ERESTARTSYS; | 81 | return -ERESTARTSYS; |
@@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) | |||
1074 | * the bit lock here if it looks like we're going to be doing that. | 1074 | * the bit lock here if it looks like we're going to be doing that. |
1075 | */ | 1075 | */ |
1076 | for (;;) { | 1076 | for (;;) { |
1077 | ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, | 1077 | ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING, |
1078 | nfs_wait_bit_killable, TASK_KILLABLE); | 1078 | nfs_wait_bit_killable, TASK_KILLABLE); |
1079 | if (ret) | 1079 | if (ret) |
1080 | goto out; | 1080 | goto out; |
1081 | spin_lock(&inode->i_lock); | 1081 | spin_lock(&inode->i_lock); |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f415cbf9f6c3..617f36611d4a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -348,7 +348,7 @@ extern int nfs_drop_inode(struct inode *); | |||
348 | extern void nfs_clear_inode(struct inode *); | 348 | extern void nfs_clear_inode(struct inode *); |
349 | extern void nfs_evict_inode(struct inode *); | 349 | extern void nfs_evict_inode(struct inode *); |
350 | void nfs_zap_acl_cache(struct inode *inode); | 350 | void nfs_zap_acl_cache(struct inode *inode); |
351 | extern int nfs_wait_bit_killable(void *word); | 351 | extern int nfs_wait_bit_killable(struct wait_bit_key *key); |
352 | 352 | ||
353 | /* super.c */ | 353 | /* super.c */ |
354 | extern const struct super_operations nfs_sops; | 354 | extern const struct super_operations nfs_sops; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 848f6853c59e..42f121182167 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) | |||
1251 | might_sleep(); | 1251 | might_sleep(); |
1252 | 1252 | ||
1253 | atomic_inc(&clp->cl_count); | 1253 | atomic_inc(&clp->cl_count); |
1254 | res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, | 1254 | res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, |
1255 | nfs_wait_bit_killable, TASK_KILLABLE); | 1255 | nfs_wait_bit_killable, TASK_KILLABLE); |
1256 | if (res) | 1256 | if (res) |
1257 | goto out; | 1257 | goto out; |
1258 | if (clp->cl_cons_state < 0) | 1258 | if (clp->cl_cons_state < 0) |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 17fab89f6358..0be5050638f7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -115,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) | |||
115 | set_bit(NFS_IO_INPROGRESS, &c->flags); | 115 | set_bit(NFS_IO_INPROGRESS, &c->flags); |
116 | if (atomic_read(&c->io_count) == 0) | 116 | if (atomic_read(&c->io_count) == 0) |
117 | break; | 117 | break; |
118 | ret = nfs_wait_bit_killable(&c->flags); | 118 | ret = nfs_wait_bit_killable(&q.key); |
119 | } while (atomic_read(&c->io_count) != 0); | 119 | } while (atomic_read(&c->io_count) != 0); |
120 | finish_wait(wq, &q.wait); | 120 | finish_wait(wq, &q.wait); |
121 | return ret; | 121 | return ret; |
@@ -136,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
136 | return __nfs_iocounter_wait(c); | 136 | return __nfs_iocounter_wait(c); |
137 | } | 137 | } |
138 | 138 | ||
139 | static int nfs_wait_bit_uninterruptible(void *word) | ||
140 | { | ||
141 | io_schedule(); | ||
142 | return 0; | ||
143 | } | ||
144 | |||
145 | /* | 139 | /* |
146 | * nfs_page_group_lock - lock the head of the page group | 140 | * nfs_page_group_lock - lock the head of the page group |
147 | * @req - request in group that is to be locked | 141 | * @req - request in group that is to be locked |
@@ -156,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req) | |||
156 | WARN_ON_ONCE(head != head->wb_head); | 150 | WARN_ON_ONCE(head != head->wb_head); |
157 | 151 | ||
158 | wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, | 152 | wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, |
159 | nfs_wait_bit_uninterruptible, | ||
160 | TASK_UNINTERRUPTIBLE); | 153 | TASK_UNINTERRUPTIBLE); |
161 | } | 154 | } |
162 | 155 | ||
@@ -435,9 +428,8 @@ void nfs_release_request(struct nfs_page *req) | |||
435 | int | 428 | int |
436 | nfs_wait_on_request(struct nfs_page *req) | 429 | nfs_wait_on_request(struct nfs_page *req) |
437 | { | 430 | { |
438 | return wait_on_bit(&req->wb_flags, PG_BUSY, | 431 | return wait_on_bit_io(&req->wb_flags, PG_BUSY, |
439 | nfs_wait_bit_uninterruptible, | 432 | TASK_UNINTERRUPTIBLE); |
440 | TASK_UNINTERRUPTIBLE); | ||
441 | } | 433 | } |
442 | 434 | ||
443 | /* | 435 | /* |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6fdcd233d6f7..a8914b335617 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1885 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | 1885 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { |
1886 | if (!sync) | 1886 | if (!sync) |
1887 | goto out; | 1887 | goto out; |
1888 | status = wait_on_bit_lock(&nfsi->flags, | 1888 | status = wait_on_bit_lock_action(&nfsi->flags, |
1889 | NFS_INO_LAYOUTCOMMITTING, | 1889 | NFS_INO_LAYOUTCOMMITTING, |
1890 | nfs_wait_bit_killable, | 1890 | nfs_wait_bit_killable, |
1891 | TASK_KILLABLE); | 1891 | TASK_KILLABLE); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5e2f10304548..962c9ee758be 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -623,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
623 | int err; | 623 | int err; |
624 | 624 | ||
625 | /* Stop dirtying of new pages while we sync */ | 625 | /* Stop dirtying of new pages while we sync */ |
626 | err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, | 626 | err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING, |
627 | nfs_wait_bit_killable, TASK_KILLABLE); | 627 | nfs_wait_bit_killable, TASK_KILLABLE); |
628 | if (err) | 628 | if (err) |
629 | goto out_err; | 629 | goto out_err; |
@@ -1703,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1703 | return error; | 1703 | return error; |
1704 | if (!may_wait) | 1704 | if (!may_wait) |
1705 | goto out_mark_dirty; | 1705 | goto out_mark_dirty; |
1706 | error = wait_on_bit(&NFS_I(inode)->flags, | 1706 | error = wait_on_bit_action(&NFS_I(inode)->flags, |
1707 | NFS_INO_COMMIT, | 1707 | NFS_INO_COMMIT, |
1708 | nfs_wait_bit_killable, | 1708 | nfs_wait_bit_killable, |
1709 | TASK_KILLABLE); | 1709 | TASK_KILLABLE); |
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 19ae05d4b8ec..bf9422c3aefe 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h | |||
@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) | |||
33 | #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } | 33 | #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } |
34 | 34 | ||
35 | bool irq_work_queue(struct irq_work *work); | 35 | bool irq_work_queue(struct irq_work *work); |
36 | |||
37 | #ifdef CONFIG_SMP | ||
38 | bool irq_work_queue_on(struct irq_work *work, int cpu); | ||
39 | #endif | ||
40 | |||
36 | void irq_work_run(void); | 41 | void irq_work_run(void); |
37 | void irq_work_sync(struct irq_work *work); | 42 | void irq_work_sync(struct irq_work *work); |
38 | 43 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index b39a671cfd59..42cac4dc2157 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1437,8 +1437,6 @@ struct task_struct { | |||
1437 | struct rb_node *pi_waiters_leftmost; | 1437 | struct rb_node *pi_waiters_leftmost; |
1438 | /* Deadlock detection and priority inheritance handling */ | 1438 | /* Deadlock detection and priority inheritance handling */ |
1439 | struct rt_mutex_waiter *pi_blocked_on; | 1439 | struct rt_mutex_waiter *pi_blocked_on; |
1440 | /* Top pi_waiters task */ | ||
1441 | struct task_struct *pi_top_task; | ||
1442 | #endif | 1440 | #endif |
1443 | 1441 | ||
1444 | #ifdef CONFIG_DEBUG_MUTEXES | 1442 | #ifdef CONFIG_DEBUG_MUTEXES |
@@ -2782,7 +2780,7 @@ static inline bool __must_check current_set_polling_and_test(void) | |||
2782 | 2780 | ||
2783 | /* | 2781 | /* |
2784 | * Polling state must be visible before we test NEED_RESCHED, | 2782 | * Polling state must be visible before we test NEED_RESCHED, |
2785 | * paired by resched_task() | 2783 | * paired by resched_curr() |
2786 | */ | 2784 | */ |
2787 | smp_mb__after_atomic(); | 2785 | smp_mb__after_atomic(); |
2788 | 2786 | ||
@@ -2800,7 +2798,7 @@ static inline bool __must_check current_clr_polling_and_test(void) | |||
2800 | 2798 | ||
2801 | /* | 2799 | /* |
2802 | * Polling state must be visible before we test NEED_RESCHED, | 2800 | * Polling state must be visible before we test NEED_RESCHED, |
2803 | * paired by resched_task() | 2801 | * paired by resched_curr() |
2804 | */ | 2802 | */ |
2805 | smp_mb__after_atomic(); | 2803 | smp_mb__after_atomic(); |
2806 | 2804 | ||
@@ -2832,7 +2830,7 @@ static inline void current_clr_polling(void) | |||
2832 | * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also | 2830 | * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also |
2833 | * fold. | 2831 | * fold. |
2834 | */ | 2832 | */ |
2835 | smp_mb(); /* paired with resched_task() */ | 2833 | smp_mb(); /* paired with resched_curr() */ |
2836 | 2834 | ||
2837 | preempt_fold_need_resched(); | 2835 | preempt_fold_need_resched(); |
2838 | } | 2836 | } |
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ad7dbe2cfecd..1a8959944c5f 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h | |||
@@ -236,7 +236,7 @@ void * rpc_malloc(struct rpc_task *, size_t); | |||
236 | void rpc_free(void *); | 236 | void rpc_free(void *); |
237 | int rpciod_up(void); | 237 | int rpciod_up(void); |
238 | void rpciod_down(void); | 238 | void rpciod_down(void); |
239 | int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); | 239 | int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); |
240 | #ifdef RPC_DEBUG | 240 | #ifdef RPC_DEBUG |
241 | struct net; | 241 | struct net; |
242 | void rpc_show_tasks(struct net *); | 242 | void rpc_show_tasks(struct net *); |
diff --git a/include/linux/tick.h b/include/linux/tick.h index 06cc093ab7ad..059052306831 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h | |||
@@ -183,7 +183,13 @@ static inline bool tick_nohz_full_cpu(int cpu) | |||
183 | 183 | ||
184 | extern void tick_nohz_init(void); | 184 | extern void tick_nohz_init(void); |
185 | extern void __tick_nohz_full_check(void); | 185 | extern void __tick_nohz_full_check(void); |
186 | extern void tick_nohz_full_kick(void); | 186 | extern void tick_nohz_full_kick_cpu(int cpu); |
187 | |||
188 | static inline void tick_nohz_full_kick(void) | ||
189 | { | ||
190 | tick_nohz_full_kick_cpu(smp_processor_id()); | ||
191 | } | ||
192 | |||
187 | extern void tick_nohz_full_kick_all(void); | 193 | extern void tick_nohz_full_kick_all(void); |
188 | extern void __tick_nohz_task_switch(struct task_struct *tsk); | 194 | extern void __tick_nohz_task_switch(struct task_struct *tsk); |
189 | #else | 195 | #else |
@@ -191,6 +197,7 @@ static inline void tick_nohz_init(void) { } | |||
191 | static inline bool tick_nohz_full_enabled(void) { return false; } | 197 | static inline bool tick_nohz_full_enabled(void) { return false; } |
192 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } | 198 | static inline bool tick_nohz_full_cpu(int cpu) { return false; } |
193 | static inline void __tick_nohz_full_check(void) { } | 199 | static inline void __tick_nohz_full_check(void) { } |
200 | static inline void tick_nohz_full_kick_cpu(int cpu) { } | ||
194 | static inline void tick_nohz_full_kick(void) { } | 201 | static inline void tick_nohz_full_kick(void) { } |
195 | static inline void tick_nohz_full_kick_all(void) { } | 202 | static inline void tick_nohz_full_kick_all(void) { } |
196 | static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } | 203 | static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } |
diff --git a/include/linux/wait.h b/include/linux/wait.h index bd68819f0815..6fb1ba5f9b2f 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -25,6 +25,7 @@ struct wait_bit_key { | |||
25 | void *flags; | 25 | void *flags; |
26 | int bit_nr; | 26 | int bit_nr; |
27 | #define WAIT_ATOMIC_T_BIT_NR -1 | 27 | #define WAIT_ATOMIC_T_BIT_NR -1 |
28 | unsigned long private; | ||
28 | }; | 29 | }; |
29 | 30 | ||
30 | struct wait_bit_queue { | 31 | struct wait_bit_queue { |
@@ -141,18 +142,19 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) | |||
141 | list_del(&old->task_list); | 142 | list_del(&old->task_list); |
142 | } | 143 | } |
143 | 144 | ||
145 | typedef int wait_bit_action_f(struct wait_bit_key *); | ||
144 | void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); | 146 | void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); |
145 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); | 147 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); |
146 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); | 148 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); |
147 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); | 149 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); |
148 | void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); | 150 | void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); |
149 | void __wake_up_bit(wait_queue_head_t *, void *, int); | 151 | void __wake_up_bit(wait_queue_head_t *, void *, int); |
150 | int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); | 152 | int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); |
151 | int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); | 153 | int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); |
152 | void wake_up_bit(void *, int); | 154 | void wake_up_bit(void *, int); |
153 | void wake_up_atomic_t(atomic_t *); | 155 | void wake_up_atomic_t(atomic_t *); |
154 | int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned); | 156 | int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); |
155 | int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned); | 157 | int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); |
156 | int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); | 158 | int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); |
157 | wait_queue_head_t *bit_waitqueue(void *, int); | 159 | wait_queue_head_t *bit_waitqueue(void *, int); |
158 | 160 | ||
@@ -854,11 +856,14 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); | |||
854 | (wait)->flags = 0; \ | 856 | (wait)->flags = 0; \ |
855 | } while (0) | 857 | } while (0) |
856 | 858 | ||
859 | |||
860 | extern int bit_wait(struct wait_bit_key *); | ||
861 | extern int bit_wait_io(struct wait_bit_key *); | ||
862 | |||
857 | /** | 863 | /** |
858 | * wait_on_bit - wait for a bit to be cleared | 864 | * wait_on_bit - wait for a bit to be cleared |
859 | * @word: the word being waited on, a kernel virtual address | 865 | * @word: the word being waited on, a kernel virtual address |
860 | * @bit: the bit of the word being waited on | 866 | * @bit: the bit of the word being waited on |
861 | * @action: the function used to sleep, which may take special actions | ||
862 | * @mode: the task state to sleep in | 867 | * @mode: the task state to sleep in |
863 | * | 868 | * |
864 | * There is a standard hashed waitqueue table for generic use. This | 869 | * There is a standard hashed waitqueue table for generic use. This |
@@ -867,9 +872,62 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); | |||
867 | * call wait_on_bit() in threads waiting for the bit to clear. | 872 | * call wait_on_bit() in threads waiting for the bit to clear. |
868 | * One uses wait_on_bit() where one is waiting for the bit to clear, | 873 | * One uses wait_on_bit() where one is waiting for the bit to clear, |
869 | * but has no intention of setting it. | 874 | * but has no intention of setting it. |
875 | * Returned value will be zero if the bit was cleared, or non-zero | ||
876 | * if the process received a signal and the mode permitted wakeup | ||
877 | * on that signal. | ||
878 | */ | ||
879 | static inline int | ||
880 | wait_on_bit(void *word, int bit, unsigned mode) | ||
881 | { | ||
882 | if (!test_bit(bit, word)) | ||
883 | return 0; | ||
884 | return out_of_line_wait_on_bit(word, bit, | ||
885 | bit_wait, | ||
886 | mode); | ||
887 | } | ||
888 | |||
889 | /** | ||
890 | * wait_on_bit_io - wait for a bit to be cleared | ||
891 | * @word: the word being waited on, a kernel virtual address | ||
892 | * @bit: the bit of the word being waited on | ||
893 | * @mode: the task state to sleep in | ||
894 | * | ||
895 | * Use the standard hashed waitqueue table to wait for a bit | ||
896 | * to be cleared. This is similar to wait_on_bit(), but calls | ||
897 | * io_schedule() instead of schedule() for the actual waiting. | ||
898 | * | ||
899 | * Returned value will be zero if the bit was cleared, or non-zero | ||
900 | * if the process received a signal and the mode permitted wakeup | ||
901 | * on that signal. | ||
902 | */ | ||
903 | static inline int | ||
904 | wait_on_bit_io(void *word, int bit, unsigned mode) | ||
905 | { | ||
906 | if (!test_bit(bit, word)) | ||
907 | return 0; | ||
908 | return out_of_line_wait_on_bit(word, bit, | ||
909 | bit_wait_io, | ||
910 | mode); | ||
911 | } | ||
912 | |||
913 | /** | ||
914 | * wait_on_bit_action - wait for a bit to be cleared | ||
915 | * @word: the word being waited on, a kernel virtual address | ||
916 | * @bit: the bit of the word being waited on | ||
917 | * @action: the function used to sleep, which may take special actions | ||
918 | * @mode: the task state to sleep in | ||
919 | * | ||
920 | * Use the standard hashed waitqueue table to wait for a bit | ||
921 | * to be cleared, and allow the waiting action to be specified. | ||
922 | * This is like wait_on_bit() but allows fine control of how the waiting | ||
923 | * is done. | ||
924 | * | ||
925 | * Returned value will be zero if the bit was cleared, or non-zero | ||
926 | * if the process received a signal and the mode permitted wakeup | ||
927 | * on that signal. | ||
870 | */ | 928 | */ |
871 | static inline int | 929 | static inline int |
872 | wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) | 930 | wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) |
873 | { | 931 | { |
874 | if (!test_bit(bit, word)) | 932 | if (!test_bit(bit, word)) |
875 | return 0; | 933 | return 0; |
@@ -880,7 +938,6 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) | |||
880 | * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it | 938 | * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it |
881 | * @word: the word being waited on, a kernel virtual address | 939 | * @word: the word being waited on, a kernel virtual address |
882 | * @bit: the bit of the word being waited on | 940 | * @bit: the bit of the word being waited on |
883 | * @action: the function used to sleep, which may take special actions | ||
884 | * @mode: the task state to sleep in | 941 | * @mode: the task state to sleep in |
885 | * | 942 | * |
886 | * There is a standard hashed waitqueue table for generic use. This | 943 | * There is a standard hashed waitqueue table for generic use. This |
@@ -891,9 +948,61 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) | |||
891 | * wait_on_bit() in threads waiting to be able to set the bit. | 948 | * wait_on_bit() in threads waiting to be able to set the bit. |
892 | * One uses wait_on_bit_lock() where one is waiting for the bit to | 949 | * One uses wait_on_bit_lock() where one is waiting for the bit to |
893 | * clear with the intention of setting it, and when done, clearing it. | 950 | * clear with the intention of setting it, and when done, clearing it. |
951 | * | ||
952 | * Returns zero if the bit was (eventually) found to be clear and was | ||
953 | * set. Returns non-zero if a signal was delivered to the process and | ||
954 | * the @mode allows that signal to wake the process. | ||
955 | */ | ||
956 | static inline int | ||
957 | wait_on_bit_lock(void *word, int bit, unsigned mode) | ||
958 | { | ||
959 | if (!test_and_set_bit(bit, word)) | ||
960 | return 0; | ||
961 | return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); | ||
962 | } | ||
963 | |||
964 | /** | ||
965 | * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it | ||
966 | * @word: the word being waited on, a kernel virtual address | ||
967 | * @bit: the bit of the word being waited on | ||
968 | * @mode: the task state to sleep in | ||
969 | * | ||
970 | * Use the standard hashed waitqueue table to wait for a bit | ||
971 | * to be cleared and then to atomically set it. This is similar | ||
972 | * to wait_on_bit(), but calls io_schedule() instead of schedule() | ||
973 | * for the actual waiting. | ||
974 | * | ||
975 | * Returns zero if the bit was (eventually) found to be clear and was | ||
976 | * set. Returns non-zero if a signal was delivered to the process and | ||
977 | * the @mode allows that signal to wake the process. | ||
978 | */ | ||
979 | static inline int | ||
980 | wait_on_bit_lock_io(void *word, int bit, unsigned mode) | ||
981 | { | ||
982 | if (!test_and_set_bit(bit, word)) | ||
983 | return 0; | ||
984 | return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); | ||
985 | } | ||
986 | |||
987 | /** | ||
988 | * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it | ||
989 | * @word: the word being waited on, a kernel virtual address | ||
990 | * @bit: the bit of the word being waited on | ||
991 | * @action: the function used to sleep, which may take special actions | ||
992 | * @mode: the task state to sleep in | ||
993 | * | ||
994 | * Use the standard hashed waitqueue table to wait for a bit | ||
995 | * to be cleared and then to set it, and allow the waiting action | ||
996 | * to be specified. | ||
997 | * This is like wait_on_bit() but allows fine control of how the waiting | ||
998 | * is done. | ||
999 | * | ||
1000 | * Returns zero if the bit was (eventually) found to be clear and was | ||
1001 | * set. Returns non-zero if a signal was delivered to the process and | ||
1002 | * the @mode allows that signal to wake the process. | ||
894 | */ | 1003 | */ |
895 | static inline int | 1004 | static inline int |
896 | wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode) | 1005 | wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) |
897 | { | 1006 | { |
898 | if (!test_and_set_bit(bit, word)) | 1007 | if (!test_and_set_bit(bit, word)) |
899 | return 0; | 1008 | return 0; |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 5777c13849ba..a219be961c0a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -90,7 +90,6 @@ struct writeback_control { | |||
90 | * fs/fs-writeback.c | 90 | * fs/fs-writeback.c |
91 | */ | 91 | */ |
92 | struct bdi_writeback; | 92 | struct bdi_writeback; |
93 | int inode_wait(void *); | ||
94 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); | 93 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
95 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | 94 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
96 | enum wb_reason reason); | 95 | enum wb_reason reason); |
@@ -105,7 +104,7 @@ void inode_wait_for_writeback(struct inode *inode); | |||
105 | static inline void wait_on_inode(struct inode *inode) | 104 | static inline void wait_on_inode(struct inode *inode) |
106 | { | 105 | { |
107 | might_sleep(); | 106 | might_sleep(); |
108 | wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); | 107 | wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); |
109 | } | 108 | } |
110 | 109 | ||
111 | /* | 110 | /* |
diff --git a/kernel/cpu.c b/kernel/cpu.c index a343bde710b1..81e2a388a0f6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -274,21 +274,28 @@ void clear_tasks_mm_cpumask(int cpu) | |||
274 | rcu_read_unlock(); | 274 | rcu_read_unlock(); |
275 | } | 275 | } |
276 | 276 | ||
277 | static inline void check_for_tasks(int cpu) | 277 | static inline void check_for_tasks(int dead_cpu) |
278 | { | 278 | { |
279 | struct task_struct *p; | 279 | struct task_struct *g, *p; |
280 | cputime_t utime, stime; | ||
281 | 280 | ||
282 | write_lock_irq(&tasklist_lock); | 281 | read_lock_irq(&tasklist_lock); |
283 | for_each_process(p) { | 282 | do_each_thread(g, p) { |
284 | task_cputime(p, &utime, &stime); | 283 | if (!p->on_rq) |
285 | if (task_cpu(p) == cpu && p->state == TASK_RUNNING && | 284 | continue; |
286 | (utime || stime)) | 285 | /* |
287 | pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n", | 286 | * We do the check with unlocked task_rq(p)->lock. |
288 | p->comm, task_pid_nr(p), cpu, | 287 | * Order the reading to do not warn about a task, |
289 | p->state, p->flags); | 288 | * which was running on this cpu in the past, and |
290 | } | 289 | * it's just been woken on another cpu. |
291 | write_unlock_irq(&tasklist_lock); | 290 | */ |
291 | rmb(); | ||
292 | if (task_cpu(p) != dead_cpu) | ||
293 | continue; | ||
294 | |||
295 | pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n", | ||
296 | p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags); | ||
297 | } while_each_thread(g, p); | ||
298 | read_unlock_irq(&tasklist_lock); | ||
292 | } | 299 | } |
293 | 300 | ||
294 | struct take_cpu_down_param { | 301 | struct take_cpu_down_param { |
diff --git a/kernel/fork.c b/kernel/fork.c index 6a13c46cd87d..962885edbe53 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1095,7 +1095,6 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
1095 | p->pi_waiters = RB_ROOT; | 1095 | p->pi_waiters = RB_ROOT; |
1096 | p->pi_waiters_leftmost = NULL; | 1096 | p->pi_waiters_leftmost = NULL; |
1097 | p->pi_blocked_on = NULL; | 1097 | p->pi_blocked_on = NULL; |
1098 | p->pi_top_task = NULL; | ||
1099 | #endif | 1098 | #endif |
1100 | } | 1099 | } |
1101 | 1100 | ||
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index a82170e2fa78..e6bcbe756663 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -16,11 +16,12 @@ | |||
16 | #include <linux/tick.h> | 16 | #include <linux/tick.h> |
17 | #include <linux/cpu.h> | 17 | #include <linux/cpu.h> |
18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
19 | #include <linux/smp.h> | ||
19 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
20 | 21 | ||
21 | 22 | ||
22 | static DEFINE_PER_CPU(struct llist_head, irq_work_list); | 23 | static DEFINE_PER_CPU(struct llist_head, raised_list); |
23 | static DEFINE_PER_CPU(int, irq_work_raised); | 24 | static DEFINE_PER_CPU(struct llist_head, lazy_list); |
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Claim the entry so that no one else will poke at it. | 27 | * Claim the entry so that no one else will poke at it. |
@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void) | |||
55 | */ | 56 | */ |
56 | } | 57 | } |
57 | 58 | ||
59 | #ifdef CONFIG_SMP | ||
58 | /* | 60 | /* |
59 | * Enqueue the irq_work @entry unless it's already pending | 61 | * Enqueue the irq_work @work on @cpu unless it's already pending |
60 | * somewhere. | 62 | * somewhere. |
61 | * | 63 | * |
62 | * Can be re-enqueued while the callback is still in progress. | 64 | * Can be re-enqueued while the callback is still in progress. |
63 | */ | 65 | */ |
66 | bool irq_work_queue_on(struct irq_work *work, int cpu) | ||
67 | { | ||
68 | /* All work should have been flushed before going offline */ | ||
69 | WARN_ON_ONCE(cpu_is_offline(cpu)); | ||
70 | |||
71 | /* Arch remote IPI send/receive backend aren't NMI safe */ | ||
72 | WARN_ON_ONCE(in_nmi()); | ||
73 | |||
74 | /* Only queue if not already pending */ | ||
75 | if (!irq_work_claim(work)) | ||
76 | return false; | ||
77 | |||
78 | if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | ||
79 | arch_send_call_function_single_ipi(cpu); | ||
80 | |||
81 | return true; | ||
82 | } | ||
83 | EXPORT_SYMBOL_GPL(irq_work_queue_on); | ||
84 | #endif | ||
85 | |||
86 | /* Enqueue the irq work @work on the current CPU */ | ||
64 | bool irq_work_queue(struct irq_work *work) | 87 | bool irq_work_queue(struct irq_work *work) |
65 | { | 88 | { |
66 | /* Only queue if not already pending */ | 89 | /* Only queue if not already pending */ |
@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work) | |||
70 | /* Queue the entry and raise the IPI if needed. */ | 93 | /* Queue the entry and raise the IPI if needed. */ |
71 | preempt_disable(); | 94 | preempt_disable(); |
72 | 95 | ||
73 | llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); | 96 | /* If the work is "lazy", handle it from next tick if any */ |
74 | 97 | if (work->flags & IRQ_WORK_LAZY) { | |
75 | /* | 98 | if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) && |
76 | * If the work is not "lazy" or the tick is stopped, raise the irq | 99 | tick_nohz_tick_stopped()) |
77 | * work interrupt (if supported by the arch), otherwise, just wait | 100 | arch_irq_work_raise(); |
78 | * for the next tick. | 101 | } else { |
79 | */ | 102 | if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) |
80 | if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { | ||
81 | if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) | ||
82 | arch_irq_work_raise(); | 103 | arch_irq_work_raise(); |
83 | } | 104 | } |
84 | 105 | ||
@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue); | |||
90 | 111 | ||
91 | bool irq_work_needs_cpu(void) | 112 | bool irq_work_needs_cpu(void) |
92 | { | 113 | { |
93 | struct llist_head *this_list; | 114 | struct llist_head *raised, *lazy; |
94 | 115 | ||
95 | this_list = &__get_cpu_var(irq_work_list); | 116 | raised = &__get_cpu_var(raised_list); |
96 | if (llist_empty(this_list)) | 117 | lazy = &__get_cpu_var(lazy_list); |
118 | if (llist_empty(raised) && llist_empty(lazy)) | ||
97 | return false; | 119 | return false; |
98 | 120 | ||
99 | /* All work should have been flushed before going offline */ | 121 | /* All work should have been flushed before going offline */ |
@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void) | |||
102 | return true; | 124 | return true; |
103 | } | 125 | } |
104 | 126 | ||
105 | static void __irq_work_run(void) | 127 | static void irq_work_run_list(struct llist_head *list) |
106 | { | 128 | { |
107 | unsigned long flags; | 129 | unsigned long flags; |
108 | struct irq_work *work; | 130 | struct irq_work *work; |
109 | struct llist_head *this_list; | ||
110 | struct llist_node *llnode; | 131 | struct llist_node *llnode; |
111 | 132 | ||
133 | BUG_ON(!irqs_disabled()); | ||
112 | 134 | ||
113 | /* | 135 | if (llist_empty(list)) |
114 | * Reset the "raised" state right before we check the list because | ||
115 | * an NMI may enqueue after we find the list empty from the runner. | ||
116 | */ | ||
117 | __this_cpu_write(irq_work_raised, 0); | ||
118 | barrier(); | ||
119 | |||
120 | this_list = &__get_cpu_var(irq_work_list); | ||
121 | if (llist_empty(this_list)) | ||
122 | return; | 136 | return; |
123 | 137 | ||
124 | BUG_ON(!irqs_disabled()); | 138 | llnode = llist_del_all(list); |
125 | |||
126 | llnode = llist_del_all(this_list); | ||
127 | while (llnode != NULL) { | 139 | while (llnode != NULL) { |
128 | work = llist_entry(llnode, struct irq_work, llnode); | 140 | work = llist_entry(llnode, struct irq_work, llnode); |
129 | 141 | ||
@@ -149,13 +161,13 @@ static void __irq_work_run(void) | |||
149 | } | 161 | } |
150 | 162 | ||
151 | /* | 163 | /* |
152 | * Run the irq_work entries on this cpu. Requires to be ran from hardirq | 164 | * hotplug calls this through: |
153 | * context with local IRQs disabled. | 165 | * hotplug_cfd() -> flush_smp_call_function_queue() |
154 | */ | 166 | */ |
155 | void irq_work_run(void) | 167 | void irq_work_run(void) |
156 | { | 168 | { |
157 | BUG_ON(!in_irq()); | 169 | irq_work_run_list(&__get_cpu_var(raised_list)); |
158 | __irq_work_run(); | 170 | irq_work_run_list(&__get_cpu_var(lazy_list)); |
159 | } | 171 | } |
160 | EXPORT_SYMBOL_GPL(irq_work_run); | 172 | EXPORT_SYMBOL_GPL(irq_work_run); |
161 | 173 | ||
@@ -171,35 +183,3 @@ void irq_work_sync(struct irq_work *work) | |||
171 | cpu_relax(); | 183 | cpu_relax(); |
172 | } | 184 | } |
173 | EXPORT_SYMBOL_GPL(irq_work_sync); | 185 | EXPORT_SYMBOL_GPL(irq_work_sync); |
174 | |||
175 | #ifdef CONFIG_HOTPLUG_CPU | ||
176 | static int irq_work_cpu_notify(struct notifier_block *self, | ||
177 | unsigned long action, void *hcpu) | ||
178 | { | ||
179 | long cpu = (long)hcpu; | ||
180 | |||
181 | switch (action) { | ||
182 | case CPU_DYING: | ||
183 | /* Called from stop_machine */ | ||
184 | if (WARN_ON_ONCE(cpu != smp_processor_id())) | ||
185 | break; | ||
186 | __irq_work_run(); | ||
187 | break; | ||
188 | default: | ||
189 | break; | ||
190 | } | ||
191 | return NOTIFY_OK; | ||
192 | } | ||
193 | |||
194 | static struct notifier_block cpu_notify; | ||
195 | |||
196 | static __init int irq_work_init_cpu_notifier(void) | ||
197 | { | ||
198 | cpu_notify.notifier_call = irq_work_cpu_notify; | ||
199 | cpu_notify.priority = 0; | ||
200 | register_cpu_notifier(&cpu_notify); | ||
201 | return 0; | ||
202 | } | ||
203 | device_initcall(irq_work_init_cpu_notifier); | ||
204 | |||
205 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index adf98622cb32..54e75226c2c4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -28,12 +28,6 @@ | |||
28 | #include <linux/compat.h> | 28 | #include <linux/compat.h> |
29 | 29 | ||
30 | 30 | ||
31 | static int ptrace_trapping_sleep_fn(void *flags) | ||
32 | { | ||
33 | schedule(); | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | /* | 31 | /* |
38 | * ptrace a task: make the debugger its new parent and | 32 | * ptrace a task: make the debugger its new parent and |
39 | * move it to the ptrace list. | 33 | * move it to the ptrace list. |
@@ -371,7 +365,7 @@ unlock_creds: | |||
371 | out: | 365 | out: |
372 | if (!retval) { | 366 | if (!retval) { |
373 | wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, | 367 | wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, |
374 | ptrace_trapping_sleep_fn, TASK_UNINTERRUPTIBLE); | 368 | TASK_UNINTERRUPTIBLE); |
375 | proc_ptrace_connector(task, PTRACE_ATTACH); | 369 | proc_ptrace_connector(task, PTRACE_ATTACH); |
376 | } | 370 | } |
377 | 371 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 126f7e3f04e7..1211575a2208 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -139,6 +139,8 @@ void update_rq_clock(struct rq *rq) | |||
139 | return; | 139 | return; |
140 | 140 | ||
141 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; | 141 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
142 | if (delta < 0) | ||
143 | return; | ||
142 | rq->clock += delta; | 144 | rq->clock += delta; |
143 | update_rq_clock_task(rq, delta); | 145 | update_rq_clock_task(rq, delta); |
144 | } | 146 | } |
@@ -243,6 +245,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
243 | char buf[64]; | 245 | char buf[64]; |
244 | char *cmp; | 246 | char *cmp; |
245 | int i; | 247 | int i; |
248 | struct inode *inode; | ||
246 | 249 | ||
247 | if (cnt > 63) | 250 | if (cnt > 63) |
248 | cnt = 63; | 251 | cnt = 63; |
@@ -253,7 +256,11 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
253 | buf[cnt] = 0; | 256 | buf[cnt] = 0; |
254 | cmp = strstrip(buf); | 257 | cmp = strstrip(buf); |
255 | 258 | ||
259 | /* Ensure the static_key remains in a consistent state */ | ||
260 | inode = file_inode(filp); | ||
261 | mutex_lock(&inode->i_mutex); | ||
256 | i = sched_feat_set(cmp); | 262 | i = sched_feat_set(cmp); |
263 | mutex_unlock(&inode->i_mutex); | ||
257 | if (i == __SCHED_FEAT_NR) | 264 | if (i == __SCHED_FEAT_NR) |
258 | return -EINVAL; | 265 | return -EINVAL; |
259 | 266 | ||
@@ -587,30 +594,31 @@ static bool set_nr_if_polling(struct task_struct *p) | |||
587 | #endif | 594 | #endif |
588 | 595 | ||
589 | /* | 596 | /* |
590 | * resched_task - mark a task 'to be rescheduled now'. | 597 | * resched_curr - mark rq's current task 'to be rescheduled now'. |
591 | * | 598 | * |
592 | * On UP this means the setting of the need_resched flag, on SMP it | 599 | * On UP this means the setting of the need_resched flag, on SMP it |
593 | * might also involve a cross-CPU call to trigger the scheduler on | 600 | * might also involve a cross-CPU call to trigger the scheduler on |
594 | * the target CPU. | 601 | * the target CPU. |
595 | */ | 602 | */ |
596 | void resched_task(struct task_struct *p) | 603 | void resched_curr(struct rq *rq) |
597 | { | 604 | { |
605 | struct task_struct *curr = rq->curr; | ||
598 | int cpu; | 606 | int cpu; |
599 | 607 | ||
600 | lockdep_assert_held(&task_rq(p)->lock); | 608 | lockdep_assert_held(&rq->lock); |
601 | 609 | ||
602 | if (test_tsk_need_resched(p)) | 610 | if (test_tsk_need_resched(curr)) |
603 | return; | 611 | return; |
604 | 612 | ||
605 | cpu = task_cpu(p); | 613 | cpu = cpu_of(rq); |
606 | 614 | ||
607 | if (cpu == smp_processor_id()) { | 615 | if (cpu == smp_processor_id()) { |
608 | set_tsk_need_resched(p); | 616 | set_tsk_need_resched(curr); |
609 | set_preempt_need_resched(); | 617 | set_preempt_need_resched(); |
610 | return; | 618 | return; |
611 | } | 619 | } |
612 | 620 | ||
613 | if (set_nr_and_not_polling(p)) | 621 | if (set_nr_and_not_polling(curr)) |
614 | smp_send_reschedule(cpu); | 622 | smp_send_reschedule(cpu); |
615 | else | 623 | else |
616 | trace_sched_wake_idle_without_ipi(cpu); | 624 | trace_sched_wake_idle_without_ipi(cpu); |
@@ -623,7 +631,7 @@ void resched_cpu(int cpu) | |||
623 | 631 | ||
624 | if (!raw_spin_trylock_irqsave(&rq->lock, flags)) | 632 | if (!raw_spin_trylock_irqsave(&rq->lock, flags)) |
625 | return; | 633 | return; |
626 | resched_task(cpu_curr(cpu)); | 634 | resched_curr(rq); |
627 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 635 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
628 | } | 636 | } |
629 | 637 | ||
@@ -684,10 +692,16 @@ static void wake_up_idle_cpu(int cpu) | |||
684 | 692 | ||
685 | static bool wake_up_full_nohz_cpu(int cpu) | 693 | static bool wake_up_full_nohz_cpu(int cpu) |
686 | { | 694 | { |
695 | /* | ||
696 | * We just need the target to call irq_exit() and re-evaluate | ||
697 | * the next tick. The nohz full kick at least implies that. | ||
698 | * If needed we can still optimize that later with an | ||
699 | * empty IRQ. | ||
700 | */ | ||
687 | if (tick_nohz_full_cpu(cpu)) { | 701 | if (tick_nohz_full_cpu(cpu)) { |
688 | if (cpu != smp_processor_id() || | 702 | if (cpu != smp_processor_id() || |
689 | tick_nohz_tick_stopped()) | 703 | tick_nohz_tick_stopped()) |
690 | smp_send_reschedule(cpu); | 704 | tick_nohz_full_kick_cpu(cpu); |
691 | return true; | 705 | return true; |
692 | } | 706 | } |
693 | 707 | ||
@@ -730,18 +744,15 @@ static inline bool got_nohz_idle_kick(void) | |||
730 | #ifdef CONFIG_NO_HZ_FULL | 744 | #ifdef CONFIG_NO_HZ_FULL |
731 | bool sched_can_stop_tick(void) | 745 | bool sched_can_stop_tick(void) |
732 | { | 746 | { |
733 | struct rq *rq; | 747 | /* |
734 | 748 | * More than one running task need preemption. | |
735 | rq = this_rq(); | 749 | * nr_running update is assumed to be visible |
736 | 750 | * after IPI is sent from wakers. | |
737 | /* Make sure rq->nr_running update is visible after the IPI */ | 751 | */ |
738 | smp_rmb(); | 752 | if (this_rq()->nr_running > 1) |
739 | 753 | return false; | |
740 | /* More than one running task need preemption */ | ||
741 | if (rq->nr_running > 1) | ||
742 | return false; | ||
743 | 754 | ||
744 | return true; | 755 | return true; |
745 | } | 756 | } |
746 | #endif /* CONFIG_NO_HZ_FULL */ | 757 | #endif /* CONFIG_NO_HZ_FULL */ |
747 | 758 | ||
@@ -1022,7 +1033,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
1022 | if (class == rq->curr->sched_class) | 1033 | if (class == rq->curr->sched_class) |
1023 | break; | 1034 | break; |
1024 | if (class == p->sched_class) { | 1035 | if (class == p->sched_class) { |
1025 | resched_task(rq->curr); | 1036 | resched_curr(rq); |
1026 | break; | 1037 | break; |
1027 | } | 1038 | } |
1028 | } | 1039 | } |
@@ -1568,9 +1579,7 @@ void scheduler_ipi(void) | |||
1568 | */ | 1579 | */ |
1569 | preempt_fold_need_resched(); | 1580 | preempt_fold_need_resched(); |
1570 | 1581 | ||
1571 | if (llist_empty(&this_rq()->wake_list) | 1582 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()) |
1572 | && !tick_nohz_full_cpu(smp_processor_id()) | ||
1573 | && !got_nohz_idle_kick()) | ||
1574 | return; | 1583 | return; |
1575 | 1584 | ||
1576 | /* | 1585 | /* |
@@ -1587,7 +1596,6 @@ void scheduler_ipi(void) | |||
1587 | * somewhat pessimize the simple resched case. | 1596 | * somewhat pessimize the simple resched case. |
1588 | */ | 1597 | */ |
1589 | irq_enter(); | 1598 | irq_enter(); |
1590 | tick_nohz_full_check(); | ||
1591 | sched_ttwu_pending(); | 1599 | sched_ttwu_pending(); |
1592 | 1600 | ||
1593 | /* | 1601 | /* |
@@ -2431,7 +2439,12 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
2431 | { | 2439 | { |
2432 | u64 ns = 0; | 2440 | u64 ns = 0; |
2433 | 2441 | ||
2434 | if (task_current(rq, p)) { | 2442 | /* |
2443 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2444 | * project cycles that may never be accounted to this | ||
2445 | * thread, breaking clock_gettime(). | ||
2446 | */ | ||
2447 | if (task_current(rq, p) && p->on_rq) { | ||
2435 | update_rq_clock(rq); | 2448 | update_rq_clock(rq); |
2436 | ns = rq_clock_task(rq) - p->se.exec_start; | 2449 | ns = rq_clock_task(rq) - p->se.exec_start; |
2437 | if ((s64)ns < 0) | 2450 | if ((s64)ns < 0) |
@@ -2474,8 +2487,10 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2474 | * If we race with it leaving cpu, we'll take a lock. So we're correct. | 2487 | * If we race with it leaving cpu, we'll take a lock. So we're correct. |
2475 | * If we race with it entering cpu, unaccounted time is 0. This is | 2488 | * If we race with it entering cpu, unaccounted time is 0. This is |
2476 | * indistinguishable from the read occurring a few cycles earlier. | 2489 | * indistinguishable from the read occurring a few cycles earlier. |
2490 | * If we see ->on_cpu without ->on_rq, the task is leaving, and has | ||
2491 | * been accounted, so we're correct here as well. | ||
2477 | */ | 2492 | */ |
2478 | if (!p->on_cpu) | 2493 | if (!p->on_cpu || !p->on_rq) |
2479 | return p->se.sum_exec_runtime; | 2494 | return p->se.sum_exec_runtime; |
2480 | #endif | 2495 | #endif |
2481 | 2496 | ||
@@ -2971,7 +2986,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2971 | } | 2986 | } |
2972 | 2987 | ||
2973 | trace_sched_pi_setprio(p, prio); | 2988 | trace_sched_pi_setprio(p, prio); |
2974 | p->pi_top_task = rt_mutex_get_top_task(p); | ||
2975 | oldprio = p->prio; | 2989 | oldprio = p->prio; |
2976 | prev_class = p->sched_class; | 2990 | prev_class = p->sched_class; |
2977 | on_rq = p->on_rq; | 2991 | on_rq = p->on_rq; |
@@ -2991,8 +3005,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
2991 | * running task | 3005 | * running task |
2992 | */ | 3006 | */ |
2993 | if (dl_prio(prio)) { | 3007 | if (dl_prio(prio)) { |
2994 | if (!dl_prio(p->normal_prio) || (p->pi_top_task && | 3008 | struct task_struct *pi_task = rt_mutex_get_top_task(p); |
2995 | dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) { | 3009 | if (!dl_prio(p->normal_prio) || |
3010 | (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { | ||
2996 | p->dl.dl_boosted = 1; | 3011 | p->dl.dl_boosted = 1; |
2997 | p->dl.dl_throttled = 0; | 3012 | p->dl.dl_throttled = 0; |
2998 | enqueue_flag = ENQUEUE_REPLENISH; | 3013 | enqueue_flag = ENQUEUE_REPLENISH; |
@@ -3064,7 +3079,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
3064 | * lowered its priority, then reschedule its CPU: | 3079 | * lowered its priority, then reschedule its CPU: |
3065 | */ | 3080 | */ |
3066 | if (delta < 0 || (delta > 0 && task_running(rq, p))) | 3081 | if (delta < 0 || (delta > 0 && task_running(rq, p))) |
3067 | resched_task(rq->curr); | 3082 | resched_curr(rq); |
3068 | } | 3083 | } |
3069 | out_unlock: | 3084 | out_unlock: |
3070 | task_rq_unlock(rq, p, &flags); | 3085 | task_rq_unlock(rq, p, &flags); |
@@ -3203,12 +3218,18 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) | |||
3203 | dl_se->dl_yielded = 0; | 3218 | dl_se->dl_yielded = 0; |
3204 | } | 3219 | } |
3205 | 3220 | ||
3221 | /* | ||
3222 | * sched_setparam() passes in -1 for its policy, to let the functions | ||
3223 | * it calls know not to change it. | ||
3224 | */ | ||
3225 | #define SETPARAM_POLICY -1 | ||
3226 | |||
3206 | static void __setscheduler_params(struct task_struct *p, | 3227 | static void __setscheduler_params(struct task_struct *p, |
3207 | const struct sched_attr *attr) | 3228 | const struct sched_attr *attr) |
3208 | { | 3229 | { |
3209 | int policy = attr->sched_policy; | 3230 | int policy = attr->sched_policy; |
3210 | 3231 | ||
3211 | if (policy == -1) /* setparam */ | 3232 | if (policy == SETPARAM_POLICY) |
3212 | policy = p->policy; | 3233 | policy = p->policy; |
3213 | 3234 | ||
3214 | p->policy = policy; | 3235 | p->policy = policy; |
@@ -3557,10 +3578,8 @@ static int _sched_setscheduler(struct task_struct *p, int policy, | |||
3557 | .sched_nice = PRIO_TO_NICE(p->static_prio), | 3578 | .sched_nice = PRIO_TO_NICE(p->static_prio), |
3558 | }; | 3579 | }; |
3559 | 3580 | ||
3560 | /* | 3581 | /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ |
3561 | * Fixup the legacy SCHED_RESET_ON_FORK hack | 3582 | if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { |
3562 | */ | ||
3563 | if (policy & SCHED_RESET_ON_FORK) { | ||
3564 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; | 3583 | attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; |
3565 | policy &= ~SCHED_RESET_ON_FORK; | 3584 | policy &= ~SCHED_RESET_ON_FORK; |
3566 | attr.sched_policy = policy; | 3585 | attr.sched_policy = policy; |
@@ -3730,7 +3749,7 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, | |||
3730 | */ | 3749 | */ |
3731 | SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) | 3750 | SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) |
3732 | { | 3751 | { |
3733 | return do_sched_setscheduler(pid, -1, param); | 3752 | return do_sched_setscheduler(pid, SETPARAM_POLICY, param); |
3734 | } | 3753 | } |
3735 | 3754 | ||
3736 | /** | 3755 | /** |
@@ -4285,7 +4304,7 @@ again: | |||
4285 | * fairness. | 4304 | * fairness. |
4286 | */ | 4305 | */ |
4287 | if (preempt && rq != p_rq) | 4306 | if (preempt && rq != p_rq) |
4288 | resched_task(p_rq->curr); | 4307 | resched_curr(p_rq); |
4289 | } | 4308 | } |
4290 | 4309 | ||
4291 | out_unlock: | 4310 | out_unlock: |
@@ -6465,6 +6484,20 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6465 | sched_domain_level_max = max(sched_domain_level_max, sd->level); | 6484 | sched_domain_level_max = max(sched_domain_level_max, sd->level); |
6466 | child->parent = sd; | 6485 | child->parent = sd; |
6467 | sd->child = child; | 6486 | sd->child = child; |
6487 | |||
6488 | if (!cpumask_subset(sched_domain_span(child), | ||
6489 | sched_domain_span(sd))) { | ||
6490 | pr_err("BUG: arch topology borken\n"); | ||
6491 | #ifdef CONFIG_SCHED_DEBUG | ||
6492 | pr_err(" the %s domain not a subset of the %s domain\n", | ||
6493 | child->name, sd->name); | ||
6494 | #endif | ||
6495 | /* Fixup, ensure @sd has at least @child cpus. */ | ||
6496 | cpumask_or(sched_domain_span(sd), | ||
6497 | sched_domain_span(sd), | ||
6498 | sched_domain_span(child)); | ||
6499 | } | ||
6500 | |||
6468 | } | 6501 | } |
6469 | set_domain_attribute(sd, attr); | 6502 | set_domain_attribute(sd, attr); |
6470 | 6503 | ||
@@ -7092,7 +7125,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
7092 | __setscheduler(rq, p, &attr); | 7125 | __setscheduler(rq, p, &attr); |
7093 | if (on_rq) { | 7126 | if (on_rq) { |
7094 | enqueue_task(rq, p, 0); | 7127 | enqueue_task(rq, p, 0); |
7095 | resched_task(rq->curr); | 7128 | resched_curr(rq); |
7096 | } | 7129 | } |
7097 | 7130 | ||
7098 | check_class_changed(rq, p, prev_class, old_prio); | 7131 | check_class_changed(rq, p, prev_class, old_prio); |
@@ -7803,6 +7836,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
7803 | if (period > max_cfs_quota_period) | 7836 | if (period > max_cfs_quota_period) |
7804 | return -EINVAL; | 7837 | return -EINVAL; |
7805 | 7838 | ||
7839 | /* | ||
7840 | * Prevent race between setting of cfs_rq->runtime_enabled and | ||
7841 | * unthrottle_offline_cfs_rqs(). | ||
7842 | */ | ||
7843 | get_online_cpus(); | ||
7806 | mutex_lock(&cfs_constraints_mutex); | 7844 | mutex_lock(&cfs_constraints_mutex); |
7807 | ret = __cfs_schedulable(tg, period, quota); | 7845 | ret = __cfs_schedulable(tg, period, quota); |
7808 | if (ret) | 7846 | if (ret) |
@@ -7828,7 +7866,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
7828 | } | 7866 | } |
7829 | raw_spin_unlock_irq(&cfs_b->lock); | 7867 | raw_spin_unlock_irq(&cfs_b->lock); |
7830 | 7868 | ||
7831 | for_each_possible_cpu(i) { | 7869 | for_each_online_cpu(i) { |
7832 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; | 7870 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; |
7833 | struct rq *rq = cfs_rq->rq; | 7871 | struct rq *rq = cfs_rq->rq; |
7834 | 7872 | ||
@@ -7844,6 +7882,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
7844 | cfs_bandwidth_usage_dec(); | 7882 | cfs_bandwidth_usage_dec(); |
7845 | out_unlock: | 7883 | out_unlock: |
7846 | mutex_unlock(&cfs_constraints_mutex); | 7884 | mutex_unlock(&cfs_constraints_mutex); |
7885 | put_online_cpus(); | ||
7847 | 7886 | ||
7848 | return ret; | 7887 | return ret; |
7849 | } | 7888 | } |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fc4f98b1258f..255ce138b652 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -306,7 +306,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se, | |||
306 | * the overrunning entity can't interfere with other entity in the system and | 306 | * the overrunning entity can't interfere with other entity in the system and |
307 | * can't make them miss their deadlines. Reasons why this kind of overruns | 307 | * can't make them miss their deadlines. Reasons why this kind of overruns |
308 | * could happen are, typically, a entity voluntarily trying to overcome its | 308 | * could happen are, typically, a entity voluntarily trying to overcome its |
309 | * runtime, or it just underestimated it during sched_setscheduler_ex(). | 309 | * runtime, or it just underestimated it during sched_setattr(). |
310 | */ | 310 | */ |
311 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, | 311 | static void replenish_dl_entity(struct sched_dl_entity *dl_se, |
312 | struct sched_dl_entity *pi_se) | 312 | struct sched_dl_entity *pi_se) |
@@ -535,7 +535,7 @@ again: | |||
535 | if (task_has_dl_policy(rq->curr)) | 535 | if (task_has_dl_policy(rq->curr)) |
536 | check_preempt_curr_dl(rq, p, 0); | 536 | check_preempt_curr_dl(rq, p, 0); |
537 | else | 537 | else |
538 | resched_task(rq->curr); | 538 | resched_curr(rq); |
539 | #ifdef CONFIG_SMP | 539 | #ifdef CONFIG_SMP |
540 | /* | 540 | /* |
541 | * Queueing this task back might have overloaded rq, | 541 | * Queueing this task back might have overloaded rq, |
@@ -634,7 +634,7 @@ static void update_curr_dl(struct rq *rq) | |||
634 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); | 634 | enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); |
635 | 635 | ||
636 | if (!is_leftmost(curr, &rq->dl)) | 636 | if (!is_leftmost(curr, &rq->dl)) |
637 | resched_task(curr); | 637 | resched_curr(rq); |
638 | } | 638 | } |
639 | 639 | ||
640 | /* | 640 | /* |
@@ -964,7 +964,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |||
964 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) | 964 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) |
965 | return; | 965 | return; |
966 | 966 | ||
967 | resched_task(rq->curr); | 967 | resched_curr(rq); |
968 | } | 968 | } |
969 | 969 | ||
970 | static int pull_dl_task(struct rq *this_rq); | 970 | static int pull_dl_task(struct rq *this_rq); |
@@ -979,7 +979,7 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, | |||
979 | int flags) | 979 | int flags) |
980 | { | 980 | { |
981 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { | 981 | if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { |
982 | resched_task(rq->curr); | 982 | resched_curr(rq); |
983 | return; | 983 | return; |
984 | } | 984 | } |
985 | 985 | ||
@@ -1333,7 +1333,7 @@ retry: | |||
1333 | if (dl_task(rq->curr) && | 1333 | if (dl_task(rq->curr) && |
1334 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | 1334 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && |
1335 | rq->curr->nr_cpus_allowed > 1) { | 1335 | rq->curr->nr_cpus_allowed > 1) { |
1336 | resched_task(rq->curr); | 1336 | resched_curr(rq); |
1337 | return 0; | 1337 | return 0; |
1338 | } | 1338 | } |
1339 | 1339 | ||
@@ -1373,7 +1373,7 @@ retry: | |||
1373 | set_task_cpu(next_task, later_rq->cpu); | 1373 | set_task_cpu(next_task, later_rq->cpu); |
1374 | activate_task(later_rq, next_task, 0); | 1374 | activate_task(later_rq, next_task, 0); |
1375 | 1375 | ||
1376 | resched_task(later_rq->curr); | 1376 | resched_curr(later_rq); |
1377 | 1377 | ||
1378 | double_unlock_balance(rq, later_rq); | 1378 | double_unlock_balance(rq, later_rq); |
1379 | 1379 | ||
@@ -1632,14 +1632,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, | |||
1632 | */ | 1632 | */ |
1633 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && | 1633 | if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && |
1634 | rq->curr == p) | 1634 | rq->curr == p) |
1635 | resched_task(p); | 1635 | resched_curr(rq); |
1636 | #else | 1636 | #else |
1637 | /* | 1637 | /* |
1638 | * Again, we don't know if p has a earlier | 1638 | * Again, we don't know if p has a earlier |
1639 | * or later deadline, so let's blindly set a | 1639 | * or later deadline, so let's blindly set a |
1640 | * (maybe not needed) rescheduling point. | 1640 | * (maybe not needed) rescheduling point. |
1641 | */ | 1641 | */ |
1642 | resched_task(p); | 1642 | resched_curr(rq); |
1643 | #endif /* CONFIG_SMP */ | 1643 | #endif /* CONFIG_SMP */ |
1644 | } else | 1644 | } else |
1645 | switched_to_dl(rq, p); | 1645 | switched_to_dl(rq, p); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fea7d3335e1f..bfa3c86d0d68 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1062,7 +1062,6 @@ static void update_numa_stats(struct numa_stats *ns, int nid) | |||
1062 | if (!cpus) | 1062 | if (!cpus) |
1063 | return; | 1063 | return; |
1064 | 1064 | ||
1065 | ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity; | ||
1066 | ns->task_capacity = | 1065 | ns->task_capacity = |
1067 | DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE); | 1066 | DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE); |
1068 | ns->has_free_capacity = (ns->nr_running < ns->task_capacity); | 1067 | ns->has_free_capacity = (ns->nr_running < ns->task_capacity); |
@@ -1096,18 +1095,30 @@ static void task_numa_assign(struct task_numa_env *env, | |||
1096 | env->best_cpu = env->dst_cpu; | 1095 | env->best_cpu = env->dst_cpu; |
1097 | } | 1096 | } |
1098 | 1097 | ||
1099 | static bool load_too_imbalanced(long orig_src_load, long orig_dst_load, | 1098 | static bool load_too_imbalanced(long src_load, long dst_load, |
1100 | long src_load, long dst_load, | ||
1101 | struct task_numa_env *env) | 1099 | struct task_numa_env *env) |
1102 | { | 1100 | { |
1103 | long imb, old_imb; | 1101 | long imb, old_imb; |
1102 | long orig_src_load, orig_dst_load; | ||
1103 | long src_capacity, dst_capacity; | ||
1104 | |||
1105 | /* | ||
1106 | * The load is corrected for the CPU capacity available on each node. | ||
1107 | * | ||
1108 | * src_load dst_load | ||
1109 | * ------------ vs --------- | ||
1110 | * src_capacity dst_capacity | ||
1111 | */ | ||
1112 | src_capacity = env->src_stats.compute_capacity; | ||
1113 | dst_capacity = env->dst_stats.compute_capacity; | ||
1104 | 1114 | ||
1105 | /* We care about the slope of the imbalance, not the direction. */ | 1115 | /* We care about the slope of the imbalance, not the direction. */ |
1106 | if (dst_load < src_load) | 1116 | if (dst_load < src_load) |
1107 | swap(dst_load, src_load); | 1117 | swap(dst_load, src_load); |
1108 | 1118 | ||
1109 | /* Is the difference below the threshold? */ | 1119 | /* Is the difference below the threshold? */ |
1110 | imb = dst_load * 100 - src_load * env->imbalance_pct; | 1120 | imb = dst_load * src_capacity * 100 - |
1121 | src_load * dst_capacity * env->imbalance_pct; | ||
1111 | if (imb <= 0) | 1122 | if (imb <= 0) |
1112 | return false; | 1123 | return false; |
1113 | 1124 | ||
@@ -1115,10 +1126,14 @@ static bool load_too_imbalanced(long orig_src_load, long orig_dst_load, | |||
1115 | * The imbalance is above the allowed threshold. | 1126 | * The imbalance is above the allowed threshold. |
1116 | * Compare it with the old imbalance. | 1127 | * Compare it with the old imbalance. |
1117 | */ | 1128 | */ |
1129 | orig_src_load = env->src_stats.load; | ||
1130 | orig_dst_load = env->dst_stats.load; | ||
1131 | |||
1118 | if (orig_dst_load < orig_src_load) | 1132 | if (orig_dst_load < orig_src_load) |
1119 | swap(orig_dst_load, orig_src_load); | 1133 | swap(orig_dst_load, orig_src_load); |
1120 | 1134 | ||
1121 | old_imb = orig_dst_load * 100 - orig_src_load * env->imbalance_pct; | 1135 | old_imb = orig_dst_load * src_capacity * 100 - |
1136 | orig_src_load * dst_capacity * env->imbalance_pct; | ||
1122 | 1137 | ||
1123 | /* Would this change make things worse? */ | 1138 | /* Would this change make things worse? */ |
1124 | return (imb > old_imb); | 1139 | return (imb > old_imb); |
@@ -1136,10 +1151,10 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1136 | struct rq *src_rq = cpu_rq(env->src_cpu); | 1151 | struct rq *src_rq = cpu_rq(env->src_cpu); |
1137 | struct rq *dst_rq = cpu_rq(env->dst_cpu); | 1152 | struct rq *dst_rq = cpu_rq(env->dst_cpu); |
1138 | struct task_struct *cur; | 1153 | struct task_struct *cur; |
1139 | long orig_src_load, src_load; | 1154 | long src_load, dst_load; |
1140 | long orig_dst_load, dst_load; | ||
1141 | long load; | 1155 | long load; |
1142 | long imp = (groupimp > 0) ? groupimp : taskimp; | 1156 | long imp = env->p->numa_group ? groupimp : taskimp; |
1157 | long moveimp = imp; | ||
1143 | 1158 | ||
1144 | rcu_read_lock(); | 1159 | rcu_read_lock(); |
1145 | cur = ACCESS_ONCE(dst_rq->curr); | 1160 | cur = ACCESS_ONCE(dst_rq->curr); |
@@ -1177,11 +1192,6 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1177 | * itself (not part of a group), use the task weight | 1192 | * itself (not part of a group), use the task weight |
1178 | * instead. | 1193 | * instead. |
1179 | */ | 1194 | */ |
1180 | if (env->p->numa_group) | ||
1181 | imp = groupimp; | ||
1182 | else | ||
1183 | imp = taskimp; | ||
1184 | |||
1185 | if (cur->numa_group) | 1195 | if (cur->numa_group) |
1186 | imp += group_weight(cur, env->src_nid) - | 1196 | imp += group_weight(cur, env->src_nid) - |
1187 | group_weight(cur, env->dst_nid); | 1197 | group_weight(cur, env->dst_nid); |
@@ -1191,7 +1201,7 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1191 | } | 1201 | } |
1192 | } | 1202 | } |
1193 | 1203 | ||
1194 | if (imp < env->best_imp) | 1204 | if (imp <= env->best_imp && moveimp <= env->best_imp) |
1195 | goto unlock; | 1205 | goto unlock; |
1196 | 1206 | ||
1197 | if (!cur) { | 1207 | if (!cur) { |
@@ -1204,20 +1214,34 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1204 | } | 1214 | } |
1205 | 1215 | ||
1206 | /* Balance doesn't matter much if we're running a task per cpu */ | 1216 | /* Balance doesn't matter much if we're running a task per cpu */ |
1207 | if (src_rq->nr_running == 1 && dst_rq->nr_running == 1) | 1217 | if (imp > env->best_imp && src_rq->nr_running == 1 && |
1218 | dst_rq->nr_running == 1) | ||
1208 | goto assign; | 1219 | goto assign; |
1209 | 1220 | ||
1210 | /* | 1221 | /* |
1211 | * In the overloaded case, try and keep the load balanced. | 1222 | * In the overloaded case, try and keep the load balanced. |
1212 | */ | 1223 | */ |
1213 | balance: | 1224 | balance: |
1214 | orig_dst_load = env->dst_stats.load; | ||
1215 | orig_src_load = env->src_stats.load; | ||
1216 | |||
1217 | /* XXX missing capacity terms */ | ||
1218 | load = task_h_load(env->p); | 1225 | load = task_h_load(env->p); |
1219 | dst_load = orig_dst_load + load; | 1226 | dst_load = env->dst_stats.load + load; |
1220 | src_load = orig_src_load - load; | 1227 | src_load = env->src_stats.load - load; |
1228 | |||
1229 | if (moveimp > imp && moveimp > env->best_imp) { | ||
1230 | /* | ||
1231 | * If the improvement from just moving env->p direction is | ||
1232 | * better than swapping tasks around, check if a move is | ||
1233 | * possible. Store a slightly smaller score than moveimp, | ||
1234 | * so an actually idle CPU will win. | ||
1235 | */ | ||
1236 | if (!load_too_imbalanced(src_load, dst_load, env)) { | ||
1237 | imp = moveimp - 1; | ||
1238 | cur = NULL; | ||
1239 | goto assign; | ||
1240 | } | ||
1241 | } | ||
1242 | |||
1243 | if (imp <= env->best_imp) | ||
1244 | goto unlock; | ||
1221 | 1245 | ||
1222 | if (cur) { | 1246 | if (cur) { |
1223 | load = task_h_load(cur); | 1247 | load = task_h_load(cur); |
@@ -1225,8 +1249,7 @@ balance: | |||
1225 | src_load += load; | 1249 | src_load += load; |
1226 | } | 1250 | } |
1227 | 1251 | ||
1228 | if (load_too_imbalanced(orig_src_load, orig_dst_load, | 1252 | if (load_too_imbalanced(src_load, dst_load, env)) |
1229 | src_load, dst_load, env)) | ||
1230 | goto unlock; | 1253 | goto unlock; |
1231 | 1254 | ||
1232 | assign: | 1255 | assign: |
@@ -1302,9 +1325,8 @@ static int task_numa_migrate(struct task_struct *p) | |||
1302 | groupimp = group_weight(p, env.dst_nid) - groupweight; | 1325 | groupimp = group_weight(p, env.dst_nid) - groupweight; |
1303 | update_numa_stats(&env.dst_stats, env.dst_nid); | 1326 | update_numa_stats(&env.dst_stats, env.dst_nid); |
1304 | 1327 | ||
1305 | /* If the preferred nid has free capacity, try to use it. */ | 1328 | /* Try to find a spot on the preferred nid. */ |
1306 | if (env.dst_stats.has_free_capacity) | 1329 | task_numa_find_cpu(&env, taskimp, groupimp); |
1307 | task_numa_find_cpu(&env, taskimp, groupimp); | ||
1308 | 1330 | ||
1309 | /* No space available on the preferred nid. Look elsewhere. */ | 1331 | /* No space available on the preferred nid. Look elsewhere. */ |
1310 | if (env.best_cpu == -1) { | 1332 | if (env.best_cpu == -1) { |
@@ -1324,10 +1346,6 @@ static int task_numa_migrate(struct task_struct *p) | |||
1324 | } | 1346 | } |
1325 | } | 1347 | } |
1326 | 1348 | ||
1327 | /* No better CPU than the current one was found. */ | ||
1328 | if (env.best_cpu == -1) | ||
1329 | return -EAGAIN; | ||
1330 | |||
1331 | /* | 1349 | /* |
1332 | * If the task is part of a workload that spans multiple NUMA nodes, | 1350 | * If the task is part of a workload that spans multiple NUMA nodes, |
1333 | * and is migrating into one of the workload's active nodes, remember | 1351 | * and is migrating into one of the workload's active nodes, remember |
@@ -1336,8 +1354,19 @@ static int task_numa_migrate(struct task_struct *p) | |||
1336 | * A task that migrated to a second choice node will be better off | 1354 | * A task that migrated to a second choice node will be better off |
1337 | * trying for a better one later. Do not set the preferred node here. | 1355 | * trying for a better one later. Do not set the preferred node here. |
1338 | */ | 1356 | */ |
1339 | if (p->numa_group && node_isset(env.dst_nid, p->numa_group->active_nodes)) | 1357 | if (p->numa_group) { |
1340 | sched_setnuma(p, env.dst_nid); | 1358 | if (env.best_cpu == -1) |
1359 | nid = env.src_nid; | ||
1360 | else | ||
1361 | nid = env.dst_nid; | ||
1362 | |||
1363 | if (node_isset(nid, p->numa_group->active_nodes)) | ||
1364 | sched_setnuma(p, env.dst_nid); | ||
1365 | } | ||
1366 | |||
1367 | /* No better CPU than the current one was found. */ | ||
1368 | if (env.best_cpu == -1) | ||
1369 | return -EAGAIN; | ||
1341 | 1370 | ||
1342 | /* | 1371 | /* |
1343 | * Reset the scan period if the task is being rescheduled on an | 1372 | * Reset the scan period if the task is being rescheduled on an |
@@ -1415,12 +1444,12 @@ static void update_numa_active_node_mask(struct numa_group *numa_group) | |||
1415 | /* | 1444 | /* |
1416 | * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS | 1445 | * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS |
1417 | * increments. The more local the fault statistics are, the higher the scan | 1446 | * increments. The more local the fault statistics are, the higher the scan |
1418 | * period will be for the next scan window. If local/remote ratio is below | 1447 | * period will be for the next scan window. If local/(local+remote) ratio is |
1419 | * NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS) the | 1448 | * below NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS) |
1420 | * scan period will decrease | 1449 | * the scan period will decrease. Aim for 70% local accesses. |
1421 | */ | 1450 | */ |
1422 | #define NUMA_PERIOD_SLOTS 10 | 1451 | #define NUMA_PERIOD_SLOTS 10 |
1423 | #define NUMA_PERIOD_THRESHOLD 3 | 1452 | #define NUMA_PERIOD_THRESHOLD 7 |
1424 | 1453 | ||
1425 | /* | 1454 | /* |
1426 | * Increase the scan period (slow down scanning) if the majority of | 1455 | * Increase the scan period (slow down scanning) if the majority of |
@@ -1595,30 +1624,17 @@ static void task_numa_placement(struct task_struct *p) | |||
1595 | 1624 | ||
1596 | if (p->numa_group) { | 1625 | if (p->numa_group) { |
1597 | update_numa_active_node_mask(p->numa_group); | 1626 | update_numa_active_node_mask(p->numa_group); |
1598 | /* | ||
1599 | * If the preferred task and group nids are different, | ||
1600 | * iterate over the nodes again to find the best place. | ||
1601 | */ | ||
1602 | if (max_nid != max_group_nid) { | ||
1603 | unsigned long weight, max_weight = 0; | ||
1604 | |||
1605 | for_each_online_node(nid) { | ||
1606 | weight = task_weight(p, nid) + group_weight(p, nid); | ||
1607 | if (weight > max_weight) { | ||
1608 | max_weight = weight; | ||
1609 | max_nid = nid; | ||
1610 | } | ||
1611 | } | ||
1612 | } | ||
1613 | |||
1614 | spin_unlock_irq(group_lock); | 1627 | spin_unlock_irq(group_lock); |
1628 | max_nid = max_group_nid; | ||
1615 | } | 1629 | } |
1616 | 1630 | ||
1617 | /* Preferred node as the node with the most faults */ | 1631 | if (max_faults) { |
1618 | if (max_faults && max_nid != p->numa_preferred_nid) { | 1632 | /* Set the new preferred node */ |
1619 | /* Update the preferred nid and migrate task if possible */ | 1633 | if (max_nid != p->numa_preferred_nid) |
1620 | sched_setnuma(p, max_nid); | 1634 | sched_setnuma(p, max_nid); |
1621 | numa_migrate_preferred(p); | 1635 | |
1636 | if (task_node(p) != p->numa_preferred_nid) | ||
1637 | numa_migrate_preferred(p); | ||
1622 | } | 1638 | } |
1623 | } | 1639 | } |
1624 | 1640 | ||
@@ -2899,7 +2915,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
2899 | ideal_runtime = sched_slice(cfs_rq, curr); | 2915 | ideal_runtime = sched_slice(cfs_rq, curr); |
2900 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | 2916 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; |
2901 | if (delta_exec > ideal_runtime) { | 2917 | if (delta_exec > ideal_runtime) { |
2902 | resched_task(rq_of(cfs_rq)->curr); | 2918 | resched_curr(rq_of(cfs_rq)); |
2903 | /* | 2919 | /* |
2904 | * The current task ran long enough, ensure it doesn't get | 2920 | * The current task ran long enough, ensure it doesn't get |
2905 | * re-elected due to buddy favours. | 2921 | * re-elected due to buddy favours. |
@@ -2923,7 +2939,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
2923 | return; | 2939 | return; |
2924 | 2940 | ||
2925 | if (delta > ideal_runtime) | 2941 | if (delta > ideal_runtime) |
2926 | resched_task(rq_of(cfs_rq)->curr); | 2942 | resched_curr(rq_of(cfs_rq)); |
2927 | } | 2943 | } |
2928 | 2944 | ||
2929 | static void | 2945 | static void |
@@ -3063,7 +3079,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
3063 | * validating it and just reschedule. | 3079 | * validating it and just reschedule. |
3064 | */ | 3080 | */ |
3065 | if (queued) { | 3081 | if (queued) { |
3066 | resched_task(rq_of(cfs_rq)->curr); | 3082 | resched_curr(rq_of(cfs_rq)); |
3067 | return; | 3083 | return; |
3068 | } | 3084 | } |
3069 | /* | 3085 | /* |
@@ -3254,7 +3270,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) | |||
3254 | * hierarchy can be throttled | 3270 | * hierarchy can be throttled |
3255 | */ | 3271 | */ |
3256 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | 3272 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) |
3257 | resched_task(rq_of(cfs_rq)->curr); | 3273 | resched_curr(rq_of(cfs_rq)); |
3258 | } | 3274 | } |
3259 | 3275 | ||
3260 | static __always_inline | 3276 | static __always_inline |
@@ -3360,7 +3376,11 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
3360 | cfs_rq->throttled = 1; | 3376 | cfs_rq->throttled = 1; |
3361 | cfs_rq->throttled_clock = rq_clock(rq); | 3377 | cfs_rq->throttled_clock = rq_clock(rq); |
3362 | raw_spin_lock(&cfs_b->lock); | 3378 | raw_spin_lock(&cfs_b->lock); |
3363 | list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); | 3379 | /* |
3380 | * Add to the _head_ of the list, so that an already-started | ||
3381 | * distribute_cfs_runtime will not see us | ||
3382 | */ | ||
3383 | list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); | ||
3364 | if (!cfs_b->timer_active) | 3384 | if (!cfs_b->timer_active) |
3365 | __start_cfs_bandwidth(cfs_b, false); | 3385 | __start_cfs_bandwidth(cfs_b, false); |
3366 | raw_spin_unlock(&cfs_b->lock); | 3386 | raw_spin_unlock(&cfs_b->lock); |
@@ -3410,14 +3430,15 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | |||
3410 | 3430 | ||
3411 | /* determine whether we need to wake up potentially idle cpu */ | 3431 | /* determine whether we need to wake up potentially idle cpu */ |
3412 | if (rq->curr == rq->idle && rq->cfs.nr_running) | 3432 | if (rq->curr == rq->idle && rq->cfs.nr_running) |
3413 | resched_task(rq->curr); | 3433 | resched_curr(rq); |
3414 | } | 3434 | } |
3415 | 3435 | ||
3416 | static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, | 3436 | static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, |
3417 | u64 remaining, u64 expires) | 3437 | u64 remaining, u64 expires) |
3418 | { | 3438 | { |
3419 | struct cfs_rq *cfs_rq; | 3439 | struct cfs_rq *cfs_rq; |
3420 | u64 runtime = remaining; | 3440 | u64 runtime; |
3441 | u64 starting_runtime = remaining; | ||
3421 | 3442 | ||
3422 | rcu_read_lock(); | 3443 | rcu_read_lock(); |
3423 | list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, | 3444 | list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, |
@@ -3448,7 +3469,7 @@ next: | |||
3448 | } | 3469 | } |
3449 | rcu_read_unlock(); | 3470 | rcu_read_unlock(); |
3450 | 3471 | ||
3451 | return remaining; | 3472 | return starting_runtime - remaining; |
3452 | } | 3473 | } |
3453 | 3474 | ||
3454 | /* | 3475 | /* |
@@ -3494,22 +3515,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | |||
3494 | /* account preceding periods in which throttling occurred */ | 3515 | /* account preceding periods in which throttling occurred */ |
3495 | cfs_b->nr_throttled += overrun; | 3516 | cfs_b->nr_throttled += overrun; |
3496 | 3517 | ||
3497 | /* | ||
3498 | * There are throttled entities so we must first use the new bandwidth | ||
3499 | * to unthrottle them before making it generally available. This | ||
3500 | * ensures that all existing debts will be paid before a new cfs_rq is | ||
3501 | * allowed to run. | ||
3502 | */ | ||
3503 | runtime = cfs_b->runtime; | ||
3504 | runtime_expires = cfs_b->runtime_expires; | 3518 | runtime_expires = cfs_b->runtime_expires; |
3505 | cfs_b->runtime = 0; | ||
3506 | 3519 | ||
3507 | /* | 3520 | /* |
3508 | * This check is repeated as we are holding onto the new bandwidth | 3521 | * This check is repeated as we are holding onto the new bandwidth while |
3509 | * while we unthrottle. This can potentially race with an unthrottled | 3522 | * we unthrottle. This can potentially race with an unthrottled group |
3510 | * group trying to acquire new bandwidth from the global pool. | 3523 | * trying to acquire new bandwidth from the global pool. This can result |
3524 | * in us over-using our runtime if it is all used during this loop, but | ||
3525 | * only by limited amounts in that extreme case. | ||
3511 | */ | 3526 | */ |
3512 | while (throttled && runtime > 0) { | 3527 | while (throttled && cfs_b->runtime > 0) { |
3528 | runtime = cfs_b->runtime; | ||
3513 | raw_spin_unlock(&cfs_b->lock); | 3529 | raw_spin_unlock(&cfs_b->lock); |
3514 | /* we can't nest cfs_b->lock while distributing bandwidth */ | 3530 | /* we can't nest cfs_b->lock while distributing bandwidth */ |
3515 | runtime = distribute_cfs_runtime(cfs_b, runtime, | 3531 | runtime = distribute_cfs_runtime(cfs_b, runtime, |
@@ -3517,10 +3533,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) | |||
3517 | raw_spin_lock(&cfs_b->lock); | 3533 | raw_spin_lock(&cfs_b->lock); |
3518 | 3534 | ||
3519 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); | 3535 | throttled = !list_empty(&cfs_b->throttled_cfs_rq); |
3536 | |||
3537 | cfs_b->runtime -= min(runtime, cfs_b->runtime); | ||
3520 | } | 3538 | } |
3521 | 3539 | ||
3522 | /* return (any) remaining runtime */ | ||
3523 | cfs_b->runtime = runtime; | ||
3524 | /* | 3540 | /* |
3525 | * While we are ensured activity in the period following an | 3541 | * While we are ensured activity in the period following an |
3526 | * unthrottle, this also covers the case in which the new bandwidth is | 3542 | * unthrottle, this also covers the case in which the new bandwidth is |
@@ -3631,10 +3647,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) | |||
3631 | return; | 3647 | return; |
3632 | } | 3648 | } |
3633 | 3649 | ||
3634 | if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { | 3650 | if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) |
3635 | runtime = cfs_b->runtime; | 3651 | runtime = cfs_b->runtime; |
3636 | cfs_b->runtime = 0; | 3652 | |
3637 | } | ||
3638 | expires = cfs_b->runtime_expires; | 3653 | expires = cfs_b->runtime_expires; |
3639 | raw_spin_unlock(&cfs_b->lock); | 3654 | raw_spin_unlock(&cfs_b->lock); |
3640 | 3655 | ||
@@ -3645,7 +3660,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) | |||
3645 | 3660 | ||
3646 | raw_spin_lock(&cfs_b->lock); | 3661 | raw_spin_lock(&cfs_b->lock); |
3647 | if (expires == cfs_b->runtime_expires) | 3662 | if (expires == cfs_b->runtime_expires) |
3648 | cfs_b->runtime = runtime; | 3663 | cfs_b->runtime -= min(runtime, cfs_b->runtime); |
3649 | raw_spin_unlock(&cfs_b->lock); | 3664 | raw_spin_unlock(&cfs_b->lock); |
3650 | } | 3665 | } |
3651 | 3666 | ||
@@ -3775,6 +3790,19 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
3775 | hrtimer_cancel(&cfs_b->slack_timer); | 3790 | hrtimer_cancel(&cfs_b->slack_timer); |
3776 | } | 3791 | } |
3777 | 3792 | ||
3793 | static void __maybe_unused update_runtime_enabled(struct rq *rq) | ||
3794 | { | ||
3795 | struct cfs_rq *cfs_rq; | ||
3796 | |||
3797 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
3798 | struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth; | ||
3799 | |||
3800 | raw_spin_lock(&cfs_b->lock); | ||
3801 | cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF; | ||
3802 | raw_spin_unlock(&cfs_b->lock); | ||
3803 | } | ||
3804 | } | ||
3805 | |||
3778 | static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) | 3806 | static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) |
3779 | { | 3807 | { |
3780 | struct cfs_rq *cfs_rq; | 3808 | struct cfs_rq *cfs_rq; |
@@ -3788,6 +3816,12 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) | |||
3788 | * there's some valid quota amount | 3816 | * there's some valid quota amount |
3789 | */ | 3817 | */ |
3790 | cfs_rq->runtime_remaining = 1; | 3818 | cfs_rq->runtime_remaining = 1; |
3819 | /* | ||
3820 | * Offline rq is schedulable till cpu is completely disabled | ||
3821 | * in take_cpu_down(), so we prevent new cfs throttling here. | ||
3822 | */ | ||
3823 | cfs_rq->runtime_enabled = 0; | ||
3824 | |||
3791 | if (cfs_rq_throttled(cfs_rq)) | 3825 | if (cfs_rq_throttled(cfs_rq)) |
3792 | unthrottle_cfs_rq(cfs_rq); | 3826 | unthrottle_cfs_rq(cfs_rq); |
3793 | } | 3827 | } |
@@ -3831,6 +3865,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | |||
3831 | return NULL; | 3865 | return NULL; |
3832 | } | 3866 | } |
3833 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | 3867 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} |
3868 | static inline void update_runtime_enabled(struct rq *rq) {} | ||
3834 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} | 3869 | static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} |
3835 | 3870 | ||
3836 | #endif /* CONFIG_CFS_BANDWIDTH */ | 3871 | #endif /* CONFIG_CFS_BANDWIDTH */ |
@@ -3854,7 +3889,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
3854 | 3889 | ||
3855 | if (delta < 0) { | 3890 | if (delta < 0) { |
3856 | if (rq->curr == p) | 3891 | if (rq->curr == p) |
3857 | resched_task(p); | 3892 | resched_curr(rq); |
3858 | return; | 3893 | return; |
3859 | } | 3894 | } |
3860 | 3895 | ||
@@ -4723,7 +4758,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
4723 | return; | 4758 | return; |
4724 | 4759 | ||
4725 | preempt: | 4760 | preempt: |
4726 | resched_task(curr); | 4761 | resched_curr(rq); |
4727 | /* | 4762 | /* |
4728 | * Only set the backward buddy when the current task is still | 4763 | * Only set the backward buddy when the current task is still |
4729 | * on the rq. This can happen when a wakeup gets interleaved | 4764 | * on the rq. This can happen when a wakeup gets interleaved |
@@ -5094,8 +5129,7 @@ static void move_task(struct task_struct *p, struct lb_env *env) | |||
5094 | /* | 5129 | /* |
5095 | * Is this task likely cache-hot: | 5130 | * Is this task likely cache-hot: |
5096 | */ | 5131 | */ |
5097 | static int | 5132 | static int task_hot(struct task_struct *p, struct lb_env *env) |
5098 | task_hot(struct task_struct *p, u64 now) | ||
5099 | { | 5133 | { |
5100 | s64 delta; | 5134 | s64 delta; |
5101 | 5135 | ||
@@ -5108,7 +5142,7 @@ task_hot(struct task_struct *p, u64 now) | |||
5108 | /* | 5142 | /* |
5109 | * Buddy candidates are cache hot: | 5143 | * Buddy candidates are cache hot: |
5110 | */ | 5144 | */ |
5111 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && | 5145 | if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running && |
5112 | (&p->se == cfs_rq_of(&p->se)->next || | 5146 | (&p->se == cfs_rq_of(&p->se)->next || |
5113 | &p->se == cfs_rq_of(&p->se)->last)) | 5147 | &p->se == cfs_rq_of(&p->se)->last)) |
5114 | return 1; | 5148 | return 1; |
@@ -5118,7 +5152,7 @@ task_hot(struct task_struct *p, u64 now) | |||
5118 | if (sysctl_sched_migration_cost == 0) | 5152 | if (sysctl_sched_migration_cost == 0) |
5119 | return 0; | 5153 | return 0; |
5120 | 5154 | ||
5121 | delta = now - p->se.exec_start; | 5155 | delta = rq_clock_task(env->src_rq) - p->se.exec_start; |
5122 | 5156 | ||
5123 | return delta < (s64)sysctl_sched_migration_cost; | 5157 | return delta < (s64)sysctl_sched_migration_cost; |
5124 | } | 5158 | } |
@@ -5272,7 +5306,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
5272 | * 2) task is cache cold, or | 5306 | * 2) task is cache cold, or |
5273 | * 3) too many balance attempts have failed. | 5307 | * 3) too many balance attempts have failed. |
5274 | */ | 5308 | */ |
5275 | tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq)); | 5309 | tsk_cache_hot = task_hot(p, env); |
5276 | if (!tsk_cache_hot) | 5310 | if (!tsk_cache_hot) |
5277 | tsk_cache_hot = migrate_degrades_locality(p, env); | 5311 | tsk_cache_hot = migrate_degrades_locality(p, env); |
5278 | 5312 | ||
@@ -5864,10 +5898,12 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro | |||
5864 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 5898 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
5865 | * @local_group: Does group contain this_cpu. | 5899 | * @local_group: Does group contain this_cpu. |
5866 | * @sgs: variable to hold the statistics for this group. | 5900 | * @sgs: variable to hold the statistics for this group. |
5901 | * @overload: Indicate more than one runnable task for any CPU. | ||
5867 | */ | 5902 | */ |
5868 | static inline void update_sg_lb_stats(struct lb_env *env, | 5903 | static inline void update_sg_lb_stats(struct lb_env *env, |
5869 | struct sched_group *group, int load_idx, | 5904 | struct sched_group *group, int load_idx, |
5870 | int local_group, struct sg_lb_stats *sgs) | 5905 | int local_group, struct sg_lb_stats *sgs, |
5906 | bool *overload) | ||
5871 | { | 5907 | { |
5872 | unsigned long load; | 5908 | unsigned long load; |
5873 | int i; | 5909 | int i; |
@@ -5885,6 +5921,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
5885 | 5921 | ||
5886 | sgs->group_load += load; | 5922 | sgs->group_load += load; |
5887 | sgs->sum_nr_running += rq->nr_running; | 5923 | sgs->sum_nr_running += rq->nr_running; |
5924 | |||
5925 | if (rq->nr_running > 1) | ||
5926 | *overload = true; | ||
5927 | |||
5888 | #ifdef CONFIG_NUMA_BALANCING | 5928 | #ifdef CONFIG_NUMA_BALANCING |
5889 | sgs->nr_numa_running += rq->nr_numa_running; | 5929 | sgs->nr_numa_running += rq->nr_numa_running; |
5890 | sgs->nr_preferred_running += rq->nr_preferred_running; | 5930 | sgs->nr_preferred_running += rq->nr_preferred_running; |
@@ -5995,6 +6035,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd | |||
5995 | struct sched_group *sg = env->sd->groups; | 6035 | struct sched_group *sg = env->sd->groups; |
5996 | struct sg_lb_stats tmp_sgs; | 6036 | struct sg_lb_stats tmp_sgs; |
5997 | int load_idx, prefer_sibling = 0; | 6037 | int load_idx, prefer_sibling = 0; |
6038 | bool overload = false; | ||
5998 | 6039 | ||
5999 | if (child && child->flags & SD_PREFER_SIBLING) | 6040 | if (child && child->flags & SD_PREFER_SIBLING) |
6000 | prefer_sibling = 1; | 6041 | prefer_sibling = 1; |
@@ -6015,7 +6056,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd | |||
6015 | update_group_capacity(env->sd, env->dst_cpu); | 6056 | update_group_capacity(env->sd, env->dst_cpu); |
6016 | } | 6057 | } |
6017 | 6058 | ||
6018 | update_sg_lb_stats(env, sg, load_idx, local_group, sgs); | 6059 | update_sg_lb_stats(env, sg, load_idx, local_group, sgs, |
6060 | &overload); | ||
6019 | 6061 | ||
6020 | if (local_group) | 6062 | if (local_group) |
6021 | goto next_group; | 6063 | goto next_group; |
@@ -6049,6 +6091,13 @@ next_group: | |||
6049 | 6091 | ||
6050 | if (env->sd->flags & SD_NUMA) | 6092 | if (env->sd->flags & SD_NUMA) |
6051 | env->fbq_type = fbq_classify_group(&sds->busiest_stat); | 6093 | env->fbq_type = fbq_classify_group(&sds->busiest_stat); |
6094 | |||
6095 | if (!env->sd->parent) { | ||
6096 | /* update overload indicator if we are at root domain */ | ||
6097 | if (env->dst_rq->rd->overload != overload) | ||
6098 | env->dst_rq->rd->overload = overload; | ||
6099 | } | ||
6100 | |||
6052 | } | 6101 | } |
6053 | 6102 | ||
6054 | /** | 6103 | /** |
@@ -6767,7 +6816,8 @@ static int idle_balance(struct rq *this_rq) | |||
6767 | */ | 6816 | */ |
6768 | this_rq->idle_stamp = rq_clock(this_rq); | 6817 | this_rq->idle_stamp = rq_clock(this_rq); |
6769 | 6818 | ||
6770 | if (this_rq->avg_idle < sysctl_sched_migration_cost) { | 6819 | if (this_rq->avg_idle < sysctl_sched_migration_cost || |
6820 | !this_rq->rd->overload) { | ||
6771 | rcu_read_lock(); | 6821 | rcu_read_lock(); |
6772 | sd = rcu_dereference_check_sched_domain(this_rq->sd); | 6822 | sd = rcu_dereference_check_sched_domain(this_rq->sd); |
6773 | if (sd) | 6823 | if (sd) |
@@ -7325,6 +7375,8 @@ void trigger_load_balance(struct rq *rq) | |||
7325 | static void rq_online_fair(struct rq *rq) | 7375 | static void rq_online_fair(struct rq *rq) |
7326 | { | 7376 | { |
7327 | update_sysctl(); | 7377 | update_sysctl(); |
7378 | |||
7379 | update_runtime_enabled(rq); | ||
7328 | } | 7380 | } |
7329 | 7381 | ||
7330 | static void rq_offline_fair(struct rq *rq) | 7382 | static void rq_offline_fair(struct rq *rq) |
@@ -7398,7 +7450,7 @@ static void task_fork_fair(struct task_struct *p) | |||
7398 | * 'current' within the tree based on its new key value. | 7450 | * 'current' within the tree based on its new key value. |
7399 | */ | 7451 | */ |
7400 | swap(curr->vruntime, se->vruntime); | 7452 | swap(curr->vruntime, se->vruntime); |
7401 | resched_task(rq->curr); | 7453 | resched_curr(rq); |
7402 | } | 7454 | } |
7403 | 7455 | ||
7404 | se->vruntime -= cfs_rq->min_vruntime; | 7456 | se->vruntime -= cfs_rq->min_vruntime; |
@@ -7423,7 +7475,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) | |||
7423 | */ | 7475 | */ |
7424 | if (rq->curr == p) { | 7476 | if (rq->curr == p) { |
7425 | if (p->prio > oldprio) | 7477 | if (p->prio > oldprio) |
7426 | resched_task(rq->curr); | 7478 | resched_curr(rq); |
7427 | } else | 7479 | } else |
7428 | check_preempt_curr(rq, p, 0); | 7480 | check_preempt_curr(rq, p, 0); |
7429 | } | 7481 | } |
@@ -7486,7 +7538,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p) | |||
7486 | * if we can still preempt the current task. | 7538 | * if we can still preempt the current task. |
7487 | */ | 7539 | */ |
7488 | if (rq->curr == p) | 7540 | if (rq->curr == p) |
7489 | resched_task(rq->curr); | 7541 | resched_curr(rq); |
7490 | else | 7542 | else |
7491 | check_preempt_curr(rq, p, 0); | 7543 | check_preempt_curr(rq, p, 0); |
7492 | } | 7544 | } |
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index cf009fb0bc25..9f1608f99819 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c | |||
@@ -79,7 +79,7 @@ static void cpuidle_idle_call(void) | |||
79 | struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); | 79 | struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); |
80 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); | 80 | struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); |
81 | int next_state, entered_state; | 81 | int next_state, entered_state; |
82 | bool broadcast; | 82 | unsigned int broadcast; |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Check if the idle task must be rescheduled. If it is the | 85 | * Check if the idle task must be rescheduled. If it is the |
@@ -135,7 +135,7 @@ use_default: | |||
135 | goto exit_idle; | 135 | goto exit_idle; |
136 | } | 136 | } |
137 | 137 | ||
138 | broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); | 138 | broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP; |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Tell the time framework to switch to a broadcast timer | 141 | * Tell the time framework to switch to a broadcast timer |
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 879f2b75266a..67ad4e7f506a 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c | |||
@@ -20,7 +20,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
20 | */ | 20 | */ |
21 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) | 21 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) |
22 | { | 22 | { |
23 | resched_task(rq->idle); | 23 | resched_curr(rq); |
24 | } | 24 | } |
25 | 25 | ||
26 | static struct task_struct * | 26 | static struct task_struct * |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a49083192c64..5f6edca4fafd 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -463,9 +463,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | |||
463 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 463 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
464 | { | 464 | { |
465 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | 465 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; |
466 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
466 | struct sched_rt_entity *rt_se; | 467 | struct sched_rt_entity *rt_se; |
467 | 468 | ||
468 | int cpu = cpu_of(rq_of_rt_rq(rt_rq)); | 469 | int cpu = cpu_of(rq); |
469 | 470 | ||
470 | rt_se = rt_rq->tg->rt_se[cpu]; | 471 | rt_se = rt_rq->tg->rt_se[cpu]; |
471 | 472 | ||
@@ -476,7 +477,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | |||
476 | enqueue_rt_entity(rt_se, false); | 477 | enqueue_rt_entity(rt_se, false); |
477 | 478 | ||
478 | if (rt_rq->highest_prio.curr < curr->prio) | 479 | if (rt_rq->highest_prio.curr < curr->prio) |
479 | resched_task(curr); | 480 | resched_curr(rq); |
480 | } | 481 | } |
481 | } | 482 | } |
482 | 483 | ||
@@ -566,7 +567,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | |||
566 | return; | 567 | return; |
567 | 568 | ||
568 | enqueue_top_rt_rq(rt_rq); | 569 | enqueue_top_rt_rq(rt_rq); |
569 | resched_task(rq->curr); | 570 | resched_curr(rq); |
570 | } | 571 | } |
571 | 572 | ||
572 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 573 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
@@ -740,6 +741,9 @@ balanced: | |||
740 | rt_rq->rt_throttled = 0; | 741 | rt_rq->rt_throttled = 0; |
741 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 742 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
742 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 743 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
744 | |||
745 | /* Make rt_rq available for pick_next_task() */ | ||
746 | sched_rt_rq_enqueue(rt_rq); | ||
743 | } | 747 | } |
744 | } | 748 | } |
745 | 749 | ||
@@ -948,7 +952,7 @@ static void update_curr_rt(struct rq *rq) | |||
948 | raw_spin_lock(&rt_rq->rt_runtime_lock); | 952 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
949 | rt_rq->rt_time += delta_exec; | 953 | rt_rq->rt_time += delta_exec; |
950 | if (sched_rt_runtime_exceeded(rt_rq)) | 954 | if (sched_rt_runtime_exceeded(rt_rq)) |
951 | resched_task(curr); | 955 | resched_curr(rq); |
952 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 956 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
953 | } | 957 | } |
954 | } | 958 | } |
@@ -1363,7 +1367,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
1363 | * to try and push current away: | 1367 | * to try and push current away: |
1364 | */ | 1368 | */ |
1365 | requeue_task_rt(rq, p, 1); | 1369 | requeue_task_rt(rq, p, 1); |
1366 | resched_task(rq->curr); | 1370 | resched_curr(rq); |
1367 | } | 1371 | } |
1368 | 1372 | ||
1369 | #endif /* CONFIG_SMP */ | 1373 | #endif /* CONFIG_SMP */ |
@@ -1374,7 +1378,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
1374 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) | 1378 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
1375 | { | 1379 | { |
1376 | if (p->prio < rq->curr->prio) { | 1380 | if (p->prio < rq->curr->prio) { |
1377 | resched_task(rq->curr); | 1381 | resched_curr(rq); |
1378 | return; | 1382 | return; |
1379 | } | 1383 | } |
1380 | 1384 | ||
@@ -1690,7 +1694,7 @@ retry: | |||
1690 | * just reschedule current. | 1694 | * just reschedule current. |
1691 | */ | 1695 | */ |
1692 | if (unlikely(next_task->prio < rq->curr->prio)) { | 1696 | if (unlikely(next_task->prio < rq->curr->prio)) { |
1693 | resched_task(rq->curr); | 1697 | resched_curr(rq); |
1694 | return 0; | 1698 | return 0; |
1695 | } | 1699 | } |
1696 | 1700 | ||
@@ -1737,7 +1741,7 @@ retry: | |||
1737 | activate_task(lowest_rq, next_task, 0); | 1741 | activate_task(lowest_rq, next_task, 0); |
1738 | ret = 1; | 1742 | ret = 1; |
1739 | 1743 | ||
1740 | resched_task(lowest_rq->curr); | 1744 | resched_curr(lowest_rq); |
1741 | 1745 | ||
1742 | double_unlock_balance(rq, lowest_rq); | 1746 | double_unlock_balance(rq, lowest_rq); |
1743 | 1747 | ||
@@ -1936,7 +1940,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1936 | return; | 1940 | return; |
1937 | 1941 | ||
1938 | if (pull_rt_task(rq)) | 1942 | if (pull_rt_task(rq)) |
1939 | resched_task(rq->curr); | 1943 | resched_curr(rq); |
1940 | } | 1944 | } |
1941 | 1945 | ||
1942 | void __init init_sched_rt_class(void) | 1946 | void __init init_sched_rt_class(void) |
@@ -1974,7 +1978,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1974 | check_resched = 0; | 1978 | check_resched = 0; |
1975 | #endif /* CONFIG_SMP */ | 1979 | #endif /* CONFIG_SMP */ |
1976 | if (check_resched && p->prio < rq->curr->prio) | 1980 | if (check_resched && p->prio < rq->curr->prio) |
1977 | resched_task(rq->curr); | 1981 | resched_curr(rq); |
1978 | } | 1982 | } |
1979 | } | 1983 | } |
1980 | 1984 | ||
@@ -2003,11 +2007,11 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | |||
2003 | * Only reschedule if p is still on the same runqueue. | 2007 | * Only reschedule if p is still on the same runqueue. |
2004 | */ | 2008 | */ |
2005 | if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) | 2009 | if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) |
2006 | resched_task(p); | 2010 | resched_curr(rq); |
2007 | #else | 2011 | #else |
2008 | /* For UP simply resched on drop of prio */ | 2012 | /* For UP simply resched on drop of prio */ |
2009 | if (oldprio < p->prio) | 2013 | if (oldprio < p->prio) |
2010 | resched_task(p); | 2014 | resched_curr(rq); |
2011 | #endif /* CONFIG_SMP */ | 2015 | #endif /* CONFIG_SMP */ |
2012 | } else { | 2016 | } else { |
2013 | /* | 2017 | /* |
@@ -2016,7 +2020,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | |||
2016 | * then reschedule. | 2020 | * then reschedule. |
2017 | */ | 2021 | */ |
2018 | if (p->prio < rq->curr->prio) | 2022 | if (p->prio < rq->curr->prio) |
2019 | resched_task(rq->curr); | 2023 | resched_curr(rq); |
2020 | } | 2024 | } |
2021 | } | 2025 | } |
2022 | 2026 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31cc02ebc54e..579712f4e9d5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -477,6 +477,9 @@ struct root_domain { | |||
477 | cpumask_var_t span; | 477 | cpumask_var_t span; |
478 | cpumask_var_t online; | 478 | cpumask_var_t online; |
479 | 479 | ||
480 | /* Indicate more than one runnable task for any CPU */ | ||
481 | bool overload; | ||
482 | |||
480 | /* | 483 | /* |
481 | * The bit corresponding to a CPU gets set here if such CPU has more | 484 | * The bit corresponding to a CPU gets set here if such CPU has more |
482 | * than one runnable -deadline task (as it is below for RT tasks). | 485 | * than one runnable -deadline task (as it is below for RT tasks). |
@@ -884,20 +887,10 @@ enum { | |||
884 | #undef SCHED_FEAT | 887 | #undef SCHED_FEAT |
885 | 888 | ||
886 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) | 889 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) |
887 | static __always_inline bool static_branch__true(struct static_key *key) | ||
888 | { | ||
889 | return static_key_true(key); /* Not out of line branch. */ | ||
890 | } | ||
891 | |||
892 | static __always_inline bool static_branch__false(struct static_key *key) | ||
893 | { | ||
894 | return static_key_false(key); /* Out of line branch. */ | ||
895 | } | ||
896 | |||
897 | #define SCHED_FEAT(name, enabled) \ | 890 | #define SCHED_FEAT(name, enabled) \ |
898 | static __always_inline bool static_branch_##name(struct static_key *key) \ | 891 | static __always_inline bool static_branch_##name(struct static_key *key) \ |
899 | { \ | 892 | { \ |
900 | return static_branch__##enabled(key); \ | 893 | return static_key_##enabled(key); \ |
901 | } | 894 | } |
902 | 895 | ||
903 | #include "features.h" | 896 | #include "features.h" |
@@ -1196,7 +1189,7 @@ extern void init_sched_rt_class(void); | |||
1196 | extern void init_sched_fair_class(void); | 1189 | extern void init_sched_fair_class(void); |
1197 | extern void init_sched_dl_class(void); | 1190 | extern void init_sched_dl_class(void); |
1198 | 1191 | ||
1199 | extern void resched_task(struct task_struct *p); | 1192 | extern void resched_curr(struct rq *rq); |
1200 | extern void resched_cpu(int cpu); | 1193 | extern void resched_cpu(int cpu); |
1201 | 1194 | ||
1202 | extern struct rt_bandwidth def_rt_bandwidth; | 1195 | extern struct rt_bandwidth def_rt_bandwidth; |
@@ -1218,15 +1211,26 @@ static inline void add_nr_running(struct rq *rq, unsigned count) | |||
1218 | 1211 | ||
1219 | rq->nr_running = prev_nr + count; | 1212 | rq->nr_running = prev_nr + count; |
1220 | 1213 | ||
1221 | #ifdef CONFIG_NO_HZ_FULL | ||
1222 | if (prev_nr < 2 && rq->nr_running >= 2) { | 1214 | if (prev_nr < 2 && rq->nr_running >= 2) { |
1215 | #ifdef CONFIG_SMP | ||
1216 | if (!rq->rd->overload) | ||
1217 | rq->rd->overload = true; | ||
1218 | #endif | ||
1219 | |||
1220 | #ifdef CONFIG_NO_HZ_FULL | ||
1223 | if (tick_nohz_full_cpu(rq->cpu)) { | 1221 | if (tick_nohz_full_cpu(rq->cpu)) { |
1224 | /* Order rq->nr_running write against the IPI */ | 1222 | /* |
1225 | smp_wmb(); | 1223 | * Tick is needed if more than one task runs on a CPU. |
1226 | smp_send_reschedule(rq->cpu); | 1224 | * Send the target an IPI to kick it out of nohz mode. |
1225 | * | ||
1226 | * We assume that IPI implies full memory barrier and the | ||
1227 | * new value of rq->nr_running is visible on reception | ||
1228 | * from the target. | ||
1229 | */ | ||
1230 | tick_nohz_full_kick_cpu(rq->cpu); | ||
1227 | } | 1231 | } |
1228 | } | ||
1229 | #endif | 1232 | #endif |
1233 | } | ||
1230 | } | 1234 | } |
1231 | 1235 | ||
1232 | static inline void sub_nr_running(struct rq *rq, unsigned count) | 1236 | static inline void sub_nr_running(struct rq *rq, unsigned count) |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 0ffa20ae657b..15cab1a4f84e 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -319,14 +319,14 @@ EXPORT_SYMBOL(wake_bit_function); | |||
319 | */ | 319 | */ |
320 | int __sched | 320 | int __sched |
321 | __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, | 321 | __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, |
322 | int (*action)(void *), unsigned mode) | 322 | wait_bit_action_f *action, unsigned mode) |
323 | { | 323 | { |
324 | int ret = 0; | 324 | int ret = 0; |
325 | 325 | ||
326 | do { | 326 | do { |
327 | prepare_to_wait(wq, &q->wait, mode); | 327 | prepare_to_wait(wq, &q->wait, mode); |
328 | if (test_bit(q->key.bit_nr, q->key.flags)) | 328 | if (test_bit(q->key.bit_nr, q->key.flags)) |
329 | ret = (*action)(q->key.flags); | 329 | ret = (*action)(&q->key); |
330 | } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); | 330 | } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); |
331 | finish_wait(wq, &q->wait); | 331 | finish_wait(wq, &q->wait); |
332 | return ret; | 332 | return ret; |
@@ -334,7 +334,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
334 | EXPORT_SYMBOL(__wait_on_bit); | 334 | EXPORT_SYMBOL(__wait_on_bit); |
335 | 335 | ||
336 | int __sched out_of_line_wait_on_bit(void *word, int bit, | 336 | int __sched out_of_line_wait_on_bit(void *word, int bit, |
337 | int (*action)(void *), unsigned mode) | 337 | wait_bit_action_f *action, unsigned mode) |
338 | { | 338 | { |
339 | wait_queue_head_t *wq = bit_waitqueue(word, bit); | 339 | wait_queue_head_t *wq = bit_waitqueue(word, bit); |
340 | DEFINE_WAIT_BIT(wait, word, bit); | 340 | DEFINE_WAIT_BIT(wait, word, bit); |
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit); | |||
345 | 345 | ||
346 | int __sched | 346 | int __sched |
347 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | 347 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, |
348 | int (*action)(void *), unsigned mode) | 348 | wait_bit_action_f *action, unsigned mode) |
349 | { | 349 | { |
350 | do { | 350 | do { |
351 | int ret; | 351 | int ret; |
@@ -353,7 +353,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
353 | prepare_to_wait_exclusive(wq, &q->wait, mode); | 353 | prepare_to_wait_exclusive(wq, &q->wait, mode); |
354 | if (!test_bit(q->key.bit_nr, q->key.flags)) | 354 | if (!test_bit(q->key.bit_nr, q->key.flags)) |
355 | continue; | 355 | continue; |
356 | ret = action(q->key.flags); | 356 | ret = action(&q->key); |
357 | if (!ret) | 357 | if (!ret) |
358 | continue; | 358 | continue; |
359 | abort_exclusive_wait(wq, &q->wait, mode, &q->key); | 359 | abort_exclusive_wait(wq, &q->wait, mode, &q->key); |
@@ -365,7 +365,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
365 | EXPORT_SYMBOL(__wait_on_bit_lock); | 365 | EXPORT_SYMBOL(__wait_on_bit_lock); |
366 | 366 | ||
367 | int __sched out_of_line_wait_on_bit_lock(void *word, int bit, | 367 | int __sched out_of_line_wait_on_bit_lock(void *word, int bit, |
368 | int (*action)(void *), unsigned mode) | 368 | wait_bit_action_f *action, unsigned mode) |
369 | { | 369 | { |
370 | wait_queue_head_t *wq = bit_waitqueue(word, bit); | 370 | wait_queue_head_t *wq = bit_waitqueue(word, bit); |
371 | DEFINE_WAIT_BIT(wait, word, bit); | 371 | DEFINE_WAIT_BIT(wait, word, bit); |
@@ -502,3 +502,21 @@ void wake_up_atomic_t(atomic_t *p) | |||
502 | __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); | 502 | __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); |
503 | } | 503 | } |
504 | EXPORT_SYMBOL(wake_up_atomic_t); | 504 | EXPORT_SYMBOL(wake_up_atomic_t); |
505 | |||
506 | __sched int bit_wait(struct wait_bit_key *word) | ||
507 | { | ||
508 | if (signal_pending_state(current->state, current)) | ||
509 | return 1; | ||
510 | schedule(); | ||
511 | return 0; | ||
512 | } | ||
513 | EXPORT_SYMBOL(bit_wait); | ||
514 | |||
515 | __sched int bit_wait_io(struct wait_bit_key *word) | ||
516 | { | ||
517 | if (signal_pending_state(current->state, current)) | ||
518 | return 1; | ||
519 | io_schedule(); | ||
520 | return 0; | ||
521 | } | ||
522 | EXPORT_SYMBOL(bit_wait_io); | ||
diff --git a/kernel/smp.c b/kernel/smp.c index 80c33f8de14f..487653b5844f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | */ | 5 | */ |
6 | #include <linux/irq_work.h> | ||
6 | #include <linux/rcupdate.h> | 7 | #include <linux/rcupdate.h> |
7 | #include <linux/rculist.h> | 8 | #include <linux/rculist.h> |
8 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
@@ -251,6 +252,14 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) | |||
251 | csd->func(csd->info); | 252 | csd->func(csd->info); |
252 | csd_unlock(csd); | 253 | csd_unlock(csd); |
253 | } | 254 | } |
255 | |||
256 | /* | ||
257 | * Handle irq works queued remotely by irq_work_queue_on(). | ||
258 | * Smp functions above are typically synchronous so they | ||
259 | * better run first since some other CPUs may be busy waiting | ||
260 | * for them. | ||
261 | */ | ||
262 | irq_work_run(); | ||
254 | } | 263 | } |
255 | 264 | ||
256 | /* | 265 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f784d83e29f1..99aa6ee3908f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -225,13 +225,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |||
225 | }; | 225 | }; |
226 | 226 | ||
227 | /* | 227 | /* |
228 | * Kick the current CPU if it's full dynticks in order to force it to | 228 | * Kick the CPU if it's full dynticks in order to force it to |
229 | * re-evaluate its dependency on the tick and restart it if necessary. | 229 | * re-evaluate its dependency on the tick and restart it if necessary. |
230 | */ | 230 | */ |
231 | void tick_nohz_full_kick(void) | 231 | void tick_nohz_full_kick_cpu(int cpu) |
232 | { | 232 | { |
233 | if (tick_nohz_full_cpu(smp_processor_id())) | 233 | if (!tick_nohz_full_cpu(cpu)) |
234 | irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); | 234 | return; |
235 | |||
236 | irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); | ||
235 | } | 237 | } |
236 | 238 | ||
237 | static void nohz_full_kick_ipi(void *info) | 239 | static void nohz_full_kick_ipi(void *info) |
diff --git a/mm/filemap.c b/mm/filemap.c index 900edfaf6df5..65d44fd88c78 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -241,18 +241,6 @@ void delete_from_page_cache(struct page *page) | |||
241 | } | 241 | } |
242 | EXPORT_SYMBOL(delete_from_page_cache); | 242 | EXPORT_SYMBOL(delete_from_page_cache); |
243 | 243 | ||
244 | static int sleep_on_page(void *word) | ||
245 | { | ||
246 | io_schedule(); | ||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static int sleep_on_page_killable(void *word) | ||
251 | { | ||
252 | sleep_on_page(word); | ||
253 | return fatal_signal_pending(current) ? -EINTR : 0; | ||
254 | } | ||
255 | |||
256 | static int filemap_check_errors(struct address_space *mapping) | 244 | static int filemap_check_errors(struct address_space *mapping) |
257 | { | 245 | { |
258 | int ret = 0; | 246 | int ret = 0; |
@@ -692,7 +680,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
692 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | 680 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); |
693 | 681 | ||
694 | if (test_bit(bit_nr, &page->flags)) | 682 | if (test_bit(bit_nr, &page->flags)) |
695 | __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, | 683 | __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, |
696 | TASK_UNINTERRUPTIBLE); | 684 | TASK_UNINTERRUPTIBLE); |
697 | } | 685 | } |
698 | EXPORT_SYMBOL(wait_on_page_bit); | 686 | EXPORT_SYMBOL(wait_on_page_bit); |
@@ -705,7 +693,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) | |||
705 | return 0; | 693 | return 0; |
706 | 694 | ||
707 | return __wait_on_bit(page_waitqueue(page), &wait, | 695 | return __wait_on_bit(page_waitqueue(page), &wait, |
708 | sleep_on_page_killable, TASK_KILLABLE); | 696 | bit_wait_io, TASK_KILLABLE); |
709 | } | 697 | } |
710 | 698 | ||
711 | /** | 699 | /** |
@@ -806,7 +794,7 @@ void __lock_page(struct page *page) | |||
806 | { | 794 | { |
807 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 795 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
808 | 796 | ||
809 | __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, | 797 | __wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io, |
810 | TASK_UNINTERRUPTIBLE); | 798 | TASK_UNINTERRUPTIBLE); |
811 | } | 799 | } |
812 | EXPORT_SYMBOL(__lock_page); | 800 | EXPORT_SYMBOL(__lock_page); |
@@ -816,7 +804,7 @@ int __lock_page_killable(struct page *page) | |||
816 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); | 804 | DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); |
817 | 805 | ||
818 | return __wait_on_bit_lock(page_waitqueue(page), &wait, | 806 | return __wait_on_bit_lock(page_waitqueue(page), &wait, |
819 | sleep_on_page_killable, TASK_KILLABLE); | 807 | bit_wait_io, TASK_KILLABLE); |
820 | } | 808 | } |
821 | EXPORT_SYMBOL_GPL(__lock_page_killable); | 809 | EXPORT_SYMBOL_GPL(__lock_page_killable); |
822 | 810 | ||
@@ -1978,18 +1978,12 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) | |||
1978 | #endif /* CONFIG_MIGRATION */ | 1978 | #endif /* CONFIG_MIGRATION */ |
1979 | 1979 | ||
1980 | #ifdef CONFIG_MEMORY_HOTREMOVE | 1980 | #ifdef CONFIG_MEMORY_HOTREMOVE |
1981 | static int just_wait(void *word) | ||
1982 | { | ||
1983 | schedule(); | ||
1984 | return 0; | ||
1985 | } | ||
1986 | |||
1987 | static void wait_while_offlining(void) | 1981 | static void wait_while_offlining(void) |
1988 | { | 1982 | { |
1989 | while (ksm_run & KSM_RUN_OFFLINE) { | 1983 | while (ksm_run & KSM_RUN_OFFLINE) { |
1990 | mutex_unlock(&ksm_thread_mutex); | 1984 | mutex_unlock(&ksm_thread_mutex); |
1991 | wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), | 1985 | wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), |
1992 | just_wait, TASK_UNINTERRUPTIBLE); | 1986 | TASK_UNINTERRUPTIBLE); |
1993 | mutex_lock(&ksm_thread_mutex); | 1987 | mutex_lock(&ksm_thread_mutex); |
1994 | } | 1988 | } |
1995 | } | 1989 | } |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0a43cce9a914..e090bffe1bf8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
@@ -2186,12 +2186,6 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt) | |||
2186 | hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); | 2186 | hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); |
2187 | } | 2187 | } |
2188 | 2188 | ||
2189 | static int wait_inquiry(void *word) | ||
2190 | { | ||
2191 | schedule(); | ||
2192 | return signal_pending(current); | ||
2193 | } | ||
2194 | |||
2195 | int hci_inquiry(void __user *arg) | 2189 | int hci_inquiry(void __user *arg) |
2196 | { | 2190 | { |
2197 | __u8 __user *ptr = arg; | 2191 | __u8 __user *ptr = arg; |
@@ -2242,7 +2236,7 @@ int hci_inquiry(void __user *arg) | |||
2242 | /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is | 2236 | /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is |
2243 | * cleared). If it is interrupted by a signal, return -EINTR. | 2237 | * cleared). If it is interrupted by a signal, return -EINTR. |
2244 | */ | 2238 | */ |
2245 | if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry, | 2239 | if (wait_on_bit(&hdev->flags, HCI_INQUIRY, |
2246 | TASK_INTERRUPTIBLE)) | 2240 | TASK_INTERRUPTIBLE)) |
2247 | return -EINTR; | 2241 | return -EINTR; |
2248 | } | 2242 | } |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c0365c14b858..9358c79fd589 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -250,7 +250,7 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) | |||
250 | } | 250 | } |
251 | EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); | 251 | EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); |
252 | 252 | ||
253 | static int rpc_wait_bit_killable(void *word) | 253 | static int rpc_wait_bit_killable(struct wait_bit_key *key) |
254 | { | 254 | { |
255 | if (fatal_signal_pending(current)) | 255 | if (fatal_signal_pending(current)) |
256 | return -ERESTARTSYS; | 256 | return -ERESTARTSYS; |
@@ -309,7 +309,7 @@ static int rpc_complete_task(struct rpc_task *task) | |||
309 | * to enforce taking of the wq->lock and hence avoid races with | 309 | * to enforce taking of the wq->lock and hence avoid races with |
310 | * rpc_complete_task(). | 310 | * rpc_complete_task(). |
311 | */ | 311 | */ |
312 | int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) | 312 | int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action) |
313 | { | 313 | { |
314 | if (action == NULL) | 314 | if (action == NULL) |
315 | action = rpc_wait_bit_killable; | 315 | action = rpc_wait_bit_killable; |
diff --git a/security/keys/gc.c b/security/keys/gc.c index d3222b6d7d59..9609a7f0faea 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c | |||
@@ -92,15 +92,6 @@ static void key_gc_timer_func(unsigned long data) | |||
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
95 | * wait_on_bit() sleep function for uninterruptible waiting | ||
96 | */ | ||
97 | static int key_gc_wait_bit(void *flags) | ||
98 | { | ||
99 | schedule(); | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Reap keys of dead type. | 95 | * Reap keys of dead type. |
105 | * | 96 | * |
106 | * We use three flags to make sure we see three complete cycles of the garbage | 97 | * We use three flags to make sure we see three complete cycles of the garbage |
@@ -123,7 +114,7 @@ void key_gc_keytype(struct key_type *ktype) | |||
123 | schedule_work(&key_gc_work); | 114 | schedule_work(&key_gc_work); |
124 | 115 | ||
125 | kdebug("sleep"); | 116 | kdebug("sleep"); |
126 | wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit, | 117 | wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, |
127 | TASK_UNINTERRUPTIBLE); | 118 | TASK_UNINTERRUPTIBLE); |
128 | 119 | ||
129 | key_gc_dead_keytype = NULL; | 120 | key_gc_dead_keytype = NULL; |
diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 381411941cc1..26a94f18af94 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c | |||
@@ -21,24 +21,6 @@ | |||
21 | 21 | ||
22 | #define key_negative_timeout 60 /* default timeout on a negative key's existence */ | 22 | #define key_negative_timeout 60 /* default timeout on a negative key's existence */ |
23 | 23 | ||
24 | /* | ||
25 | * wait_on_bit() sleep function for uninterruptible waiting | ||
26 | */ | ||
27 | static int key_wait_bit(void *flags) | ||
28 | { | ||
29 | schedule(); | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | * wait_on_bit() sleep function for interruptible waiting | ||
35 | */ | ||
36 | static int key_wait_bit_intr(void *flags) | ||
37 | { | ||
38 | schedule(); | ||
39 | return signal_pending(current) ? -ERESTARTSYS : 0; | ||
40 | } | ||
41 | |||
42 | /** | 24 | /** |
43 | * complete_request_key - Complete the construction of a key. | 25 | * complete_request_key - Complete the construction of a key. |
44 | * @cons: The key construction record. | 26 | * @cons: The key construction record. |
@@ -592,10 +574,9 @@ int wait_for_key_construction(struct key *key, bool intr) | |||
592 | int ret; | 574 | int ret; |
593 | 575 | ||
594 | ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT, | 576 | ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT, |
595 | intr ? key_wait_bit_intr : key_wait_bit, | ||
596 | intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); | 577 | intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); |
597 | if (ret < 0) | 578 | if (ret) |
598 | return ret; | 579 | return -ERESTARTSYS; |
599 | if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { | 580 | if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { |
600 | smp_rmb(); | 581 | smp_rmb(); |
601 | return key->type_data.reject_error; | 582 | return key->type_data.reject_error; |