diff options
-rw-r--r-- | drivers/hv/channel_mgmt.c | 68 | ||||
-rw-r--r-- | include/linux/hyperv.h | 23 |
2 files changed, 62 insertions, 29 deletions
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index d8b64ba45b1d..bbd812e87511 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c | |||
@@ -356,8 +356,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) | |||
356 | * We need to free the bit for init_vp_index() to work in the case | 356 | * We need to free the bit for init_vp_index() to work in the case |
357 | * of sub-channel, when we reload drivers like hv_netvsc. | 357 | * of sub-channel, when we reload drivers like hv_netvsc. |
358 | */ | 358 | */ |
359 | cpumask_clear_cpu(channel->target_cpu, | 359 | if (channel->affinity_policy == HV_LOCALIZED) |
360 | &primary_channel->alloced_cpus_in_node); | 360 | cpumask_clear_cpu(channel->target_cpu, |
361 | &primary_channel->alloced_cpus_in_node); | ||
361 | 362 | ||
362 | vmbus_release_relid(relid); | 363 | vmbus_release_relid(relid); |
363 | 364 | ||
@@ -548,17 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) | |||
548 | } | 549 | } |
549 | 550 | ||
550 | /* | 551 | /* |
551 | * We distribute primary channels evenly across all the available | 552 | * Based on the channel affinity policy, we will assign the NUMA |
552 | * NUMA nodes and within the assigned NUMA node we will assign the | 553 | * nodes. |
553 | * first available CPU to the primary channel. | ||
554 | * The sub-channels will be assigned to the CPUs available in the | ||
555 | * NUMA node evenly. | ||
556 | */ | 554 | */ |
557 | if (!primary) { | 555 | |
556 | if ((channel->affinity_policy == HV_BALANCED) || (!primary)) { | ||
558 | while (true) { | 557 | while (true) { |
559 | next_node = next_numa_node_id++; | 558 | next_node = next_numa_node_id++; |
560 | if (next_node == nr_node_ids) | 559 | if (next_node == nr_node_ids) { |
561 | next_node = next_numa_node_id = 0; | 560 | next_node = next_numa_node_id = 0; |
561 | continue; | ||
562 | } | ||
562 | if (cpumask_empty(cpumask_of_node(next_node))) | 563 | if (cpumask_empty(cpumask_of_node(next_node))) |
563 | continue; | 564 | continue; |
564 | break; | 565 | break; |
@@ -582,15 +583,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) | |||
582 | 583 | ||
583 | cur_cpu = -1; | 584 | cur_cpu = -1; |
584 | 585 | ||
585 | /* | 586 | if (primary->affinity_policy == HV_LOCALIZED) { |
586 | * Normally Hyper-V host doesn't create more subchannels than there | 587 | /* |
587 | * are VCPUs on the node but it is possible when not all present VCPUs | 588 | * Normally Hyper-V host doesn't create more subchannels |
588 | * on the node are initialized by guest. Clear the alloced_cpus_in_node | 589 | * than there are VCPUs on the node but it is possible when not |
589 | * to start over. | 590 | * all present VCPUs on the node are initialized by guest. |
590 | */ | 591 | * Clear the alloced_cpus_in_node to start over. |
591 | if (cpumask_equal(&primary->alloced_cpus_in_node, | 592 | */ |
592 | cpumask_of_node(primary->numa_node))) | 593 | if (cpumask_equal(&primary->alloced_cpus_in_node, |
593 | cpumask_clear(&primary->alloced_cpus_in_node); | 594 | cpumask_of_node(primary->numa_node))) |
595 | cpumask_clear(&primary->alloced_cpus_in_node); | ||
596 | } | ||
594 | 597 | ||
595 | while (true) { | 598 | while (true) { |
596 | cur_cpu = cpumask_next(cur_cpu, &available_mask); | 599 | cur_cpu = cpumask_next(cur_cpu, &available_mask); |
@@ -601,17 +604,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) | |||
601 | continue; | 604 | continue; |
602 | } | 605 | } |
603 | 606 | ||
604 | /* | 607 | if (primary->affinity_policy == HV_LOCALIZED) { |
605 | * NOTE: in the case of sub-channel, we clear the sub-channel | 608 | /* |
606 | * related bit(s) in primary->alloced_cpus_in_node in | 609 | * NOTE: in the case of sub-channel, we clear the |
607 | * hv_process_channel_removal(), so when we reload drivers | 610 | * sub-channel related bit(s) in |
608 | * like hv_netvsc in SMP guest, here we're able to re-allocate | 611 | * primary->alloced_cpus_in_node in |
609 | * bit from primary->alloced_cpus_in_node. | 612 | * hv_process_channel_removal(), so when we |
610 | */ | 613 | * reload drivers like hv_netvsc in SMP guest, here |
611 | if (!cpumask_test_cpu(cur_cpu, | 614 | * we're able to re-allocate |
612 | &primary->alloced_cpus_in_node)) { | 615 | * bit from primary->alloced_cpus_in_node. |
613 | cpumask_set_cpu(cur_cpu, | 616 | */ |
614 | &primary->alloced_cpus_in_node); | 617 | if (!cpumask_test_cpu(cur_cpu, |
618 | &primary->alloced_cpus_in_node)) { | ||
619 | cpumask_set_cpu(cur_cpu, | ||
620 | &primary->alloced_cpus_in_node); | ||
621 | cpumask_set_cpu(cur_cpu, alloced_mask); | ||
622 | break; | ||
623 | } | ||
624 | } else { | ||
615 | cpumask_set_cpu(cur_cpu, alloced_mask); | 625 | cpumask_set_cpu(cur_cpu, alloced_mask); |
616 | break; | 626 | break; |
617 | } | 627 | } |
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index e6ef571e6100..c877e7980585 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h | |||
@@ -674,6 +674,11 @@ enum hv_signal_policy { | |||
674 | HV_SIGNAL_POLICY_EXPLICIT, | 674 | HV_SIGNAL_POLICY_EXPLICIT, |
675 | }; | 675 | }; |
676 | 676 | ||
677 | enum hv_numa_policy { | ||
678 | HV_BALANCED = 0, | ||
679 | HV_LOCALIZED, | ||
680 | }; | ||
681 | |||
677 | enum vmbus_device_type { | 682 | enum vmbus_device_type { |
678 | HV_IDE = 0, | 683 | HV_IDE = 0, |
679 | HV_SCSI, | 684 | HV_SCSI, |
@@ -876,6 +881,18 @@ struct vmbus_channel { | |||
876 | */ | 881 | */ |
877 | bool low_latency; | 882 | bool low_latency; |
878 | 883 | ||
884 | /* | ||
885 | * NUMA distribution policy: | ||
886 | * We support teo policies: | ||
887 | * 1) Balanced: Here all performance critical channels are | ||
888 | * distributed evenly amongst all the NUMA nodes. | ||
889 | * This policy will be the default policy. | ||
890 | * 2) Localized: All channels of a given instance of a | ||
891 | * performance critical service will be assigned CPUs | ||
892 | * within a selected NUMA node. | ||
893 | */ | ||
894 | enum hv_numa_policy affinity_policy; | ||
895 | |||
879 | }; | 896 | }; |
880 | 897 | ||
881 | static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) | 898 | static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) |
@@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c, | |||
895 | c->signal_policy = policy; | 912 | c->signal_policy = policy; |
896 | } | 913 | } |
897 | 914 | ||
915 | static inline void set_channel_affinity_state(struct vmbus_channel *c, | ||
916 | enum hv_numa_policy policy) | ||
917 | { | ||
918 | c->affinity_policy = policy; | ||
919 | } | ||
920 | |||
898 | static inline void set_channel_read_state(struct vmbus_channel *c, bool state) | 921 | static inline void set_channel_read_state(struct vmbus_channel *c, bool state) |
899 | { | 922 | { |
900 | c->batched_reading = state; | 923 | c->batched_reading = state; |