diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 16:50:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-19 16:50:07 -0400 |
commit | 75d65a425c0163d3ec476ddc12b51087217a070c (patch) | |
tree | 73fa02941dba62ff65fadc42eb03484428707cc5 | |
parent | 61c4f2c81c61f73549928dfd9f3e8f26aa36a8cf (diff) |
hlist: remove software prefetching in hlist iterators
They not only increase the code footprint, they actually make things
slower rather than faster. On internationally acclaimed benchmarks
("make -j16" on an already fully built kernel source tree) the hlist
prefetching slows down the build by up to 1%.
(Almost all of it comes from hlist_for_each_entry_rcu() as used by
avc_has_perm_noaudit(), which is very hot due to all the pathname
lookups to see if there is anything to do).
The cause seems to be two-fold:
- on at least some Intel cores, prefetch(NULL) ends up with some
microarchitectural stall due to the TLB miss that it incurs. The
hlist case triggers this very commonly, since the NULL pointer is the
last entry in the list.
- the prefetch appears to cause more D$ activity, probably because it
prefetches hash list entries that are never actually used (because we
ended the search early due to a hit).
Regardless, the numbers clearly say that the implicit prefetching is
simply a bad idea. If some _particular_ user of the hlist iterators
wants to prefetch the next list entry, they can do so themselves
explicitly, rather than depend on all list iterators doing so
implicitly.
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/list.h | 9 | ||||
-rw-r--r-- | include/linux/rculist.h | 10 |
2 files changed, 9 insertions, 10 deletions
diff --git a/include/linux/list.h b/include/linux/list.h index 3a54266a1e85..9ac11148e037 100644 --- a/include/linux/list.h +++ b/include/linux/list.h | |||
@@ -664,8 +664,7 @@ static inline void hlist_move_list(struct hlist_head *old, | |||
664 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) | 664 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) |
665 | 665 | ||
666 | #define hlist_for_each(pos, head) \ | 666 | #define hlist_for_each(pos, head) \ |
667 | for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ | 667 | for (pos = (head)->first; pos ; pos = pos->next) |
668 | pos = pos->next) | ||
669 | 668 | ||
670 | #define hlist_for_each_safe(pos, n, head) \ | 669 | #define hlist_for_each_safe(pos, n, head) \ |
671 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ | 670 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ |
@@ -680,7 +679,7 @@ static inline void hlist_move_list(struct hlist_head *old, | |||
680 | */ | 679 | */ |
681 | #define hlist_for_each_entry(tpos, pos, head, member) \ | 680 | #define hlist_for_each_entry(tpos, pos, head, member) \ |
682 | for (pos = (head)->first; \ | 681 | for (pos = (head)->first; \ |
683 | pos && ({ prefetch(pos->next); 1;}) && \ | 682 | pos && \ |
684 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | 683 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ |
685 | pos = pos->next) | 684 | pos = pos->next) |
686 | 685 | ||
@@ -692,7 +691,7 @@ static inline void hlist_move_list(struct hlist_head *old, | |||
692 | */ | 691 | */ |
693 | #define hlist_for_each_entry_continue(tpos, pos, member) \ | 692 | #define hlist_for_each_entry_continue(tpos, pos, member) \ |
694 | for (pos = (pos)->next; \ | 693 | for (pos = (pos)->next; \ |
695 | pos && ({ prefetch(pos->next); 1;}) && \ | 694 | pos && \ |
696 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | 695 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ |
697 | pos = pos->next) | 696 | pos = pos->next) |
698 | 697 | ||
@@ -703,7 +702,7 @@ static inline void hlist_move_list(struct hlist_head *old, | |||
703 | * @member: the name of the hlist_node within the struct. | 702 | * @member: the name of the hlist_node within the struct. |
704 | */ | 703 | */ |
705 | #define hlist_for_each_entry_from(tpos, pos, member) \ | 704 | #define hlist_for_each_entry_from(tpos, pos, member) \ |
706 | for (; pos && ({ prefetch(pos->next); 1;}) && \ | 705 | for (; pos && \ |
707 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | 706 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ |
708 | pos = pos->next) | 707 | pos = pos->next) |
709 | 708 | ||
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2dea94fc4402..900a97a44769 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h | |||
@@ -427,7 +427,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
427 | 427 | ||
428 | #define __hlist_for_each_rcu(pos, head) \ | 428 | #define __hlist_for_each_rcu(pos, head) \ |
429 | for (pos = rcu_dereference(hlist_first_rcu(head)); \ | 429 | for (pos = rcu_dereference(hlist_first_rcu(head)); \ |
430 | pos && ({ prefetch(pos->next); 1; }); \ | 430 | pos; \ |
431 | pos = rcu_dereference(hlist_next_rcu(pos))) | 431 | pos = rcu_dereference(hlist_next_rcu(pos))) |
432 | 432 | ||
433 | /** | 433 | /** |
@@ -443,7 +443,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
443 | */ | 443 | */ |
444 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ | 444 | #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ |
445 | for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ | 445 | for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ |
446 | pos && ({ prefetch(pos->next); 1; }) && \ | 446 | pos && \ |
447 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 447 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
448 | pos = rcu_dereference_raw(hlist_next_rcu(pos))) | 448 | pos = rcu_dereference_raw(hlist_next_rcu(pos))) |
449 | 449 | ||
@@ -460,7 +460,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
460 | */ | 460 | */ |
461 | #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ | 461 | #define hlist_for_each_entry_rcu_bh(tpos, pos, head, member) \ |
462 | for (pos = rcu_dereference_bh((head)->first); \ | 462 | for (pos = rcu_dereference_bh((head)->first); \ |
463 | pos && ({ prefetch(pos->next); 1; }) && \ | 463 | pos && \ |
464 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 464 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
465 | pos = rcu_dereference_bh(pos->next)) | 465 | pos = rcu_dereference_bh(pos->next)) |
466 | 466 | ||
@@ -472,7 +472,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
472 | */ | 472 | */ |
473 | #define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ | 473 | #define hlist_for_each_entry_continue_rcu(tpos, pos, member) \ |
474 | for (pos = rcu_dereference((pos)->next); \ | 474 | for (pos = rcu_dereference((pos)->next); \ |
475 | pos && ({ prefetch(pos->next); 1; }) && \ | 475 | pos && \ |
476 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 476 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
477 | pos = rcu_dereference(pos->next)) | 477 | pos = rcu_dereference(pos->next)) |
478 | 478 | ||
@@ -484,7 +484,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, | |||
484 | */ | 484 | */ |
485 | #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ | 485 | #define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member) \ |
486 | for (pos = rcu_dereference_bh((pos)->next); \ | 486 | for (pos = rcu_dereference_bh((pos)->next); \ |
487 | pos && ({ prefetch(pos->next); 1; }) && \ | 487 | pos && \ |
488 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ | 488 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ |
489 | pos = rcu_dereference_bh(pos->next)) | 489 | pos = rcu_dereference_bh(pos->next)) |
490 | 490 | ||