summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/hv/channel_mgmt.c68
-rw-r--r--include/linux/hyperv.h23
2 files changed, 62 insertions, 29 deletions
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d8b64ba45b1d..bbd812e87511 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -356,8 +356,9 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
356 * We need to free the bit for init_vp_index() to work in the case 356 * We need to free the bit for init_vp_index() to work in the case
357 * of sub-channel, when we reload drivers like hv_netvsc. 357 * of sub-channel, when we reload drivers like hv_netvsc.
358 */ 358 */
359 cpumask_clear_cpu(channel->target_cpu, 359 if (channel->affinity_policy == HV_LOCALIZED)
360 &primary_channel->alloced_cpus_in_node); 360 cpumask_clear_cpu(channel->target_cpu,
361 &primary_channel->alloced_cpus_in_node);
361 362
362 vmbus_release_relid(relid); 363 vmbus_release_relid(relid);
363 364
@@ -548,17 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
548 } 549 }
549 550
550 /* 551 /*
551 * We distribute primary channels evenly across all the available 552 * Based on the channel affinity policy, we will assign the NUMA
552 * NUMA nodes and within the assigned NUMA node we will assign the 553 * nodes.
553 * first available CPU to the primary channel.
554 * The sub-channels will be assigned to the CPUs available in the
555 * NUMA node evenly.
556 */ 554 */
557 if (!primary) { 555
556 if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
558 while (true) { 557 while (true) {
559 next_node = next_numa_node_id++; 558 next_node = next_numa_node_id++;
560 if (next_node == nr_node_ids) 559 if (next_node == nr_node_ids) {
561 next_node = next_numa_node_id = 0; 560 next_node = next_numa_node_id = 0;
561 continue;
562 }
562 if (cpumask_empty(cpumask_of_node(next_node))) 563 if (cpumask_empty(cpumask_of_node(next_node)))
563 continue; 564 continue;
564 break; 565 break;
@@ -582,15 +583,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
582 583
583 cur_cpu = -1; 584 cur_cpu = -1;
584 585
585 /* 586 if (primary->affinity_policy == HV_LOCALIZED) {
586 * Normally Hyper-V host doesn't create more subchannels than there 587 /*
587 * are VCPUs on the node but it is possible when not all present VCPUs 588 * Normally Hyper-V host doesn't create more subchannels
588 * on the node are initialized by guest. Clear the alloced_cpus_in_node 589 * than there are VCPUs on the node but it is possible when not
589 * to start over. 590 * all present VCPUs on the node are initialized by guest.
590 */ 591 * Clear the alloced_cpus_in_node to start over.
591 if (cpumask_equal(&primary->alloced_cpus_in_node, 592 */
592 cpumask_of_node(primary->numa_node))) 593 if (cpumask_equal(&primary->alloced_cpus_in_node,
593 cpumask_clear(&primary->alloced_cpus_in_node); 594 cpumask_of_node(primary->numa_node)))
595 cpumask_clear(&primary->alloced_cpus_in_node);
596 }
594 597
595 while (true) { 598 while (true) {
596 cur_cpu = cpumask_next(cur_cpu, &available_mask); 599 cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -601,17 +604,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
601 continue; 604 continue;
602 } 605 }
603 606
604 /* 607 if (primary->affinity_policy == HV_LOCALIZED) {
605 * NOTE: in the case of sub-channel, we clear the sub-channel 608 /*
606 * related bit(s) in primary->alloced_cpus_in_node in 609 * NOTE: in the case of sub-channel, we clear the
607 * hv_process_channel_removal(), so when we reload drivers 610 * sub-channel related bit(s) in
608 * like hv_netvsc in SMP guest, here we're able to re-allocate 611 * primary->alloced_cpus_in_node in
609 * bit from primary->alloced_cpus_in_node. 612 * hv_process_channel_removal(), so when we
610 */ 613 * reload drivers like hv_netvsc in SMP guest, here
611 if (!cpumask_test_cpu(cur_cpu, 614 * we're able to re-allocate
612 &primary->alloced_cpus_in_node)) { 615 * bit from primary->alloced_cpus_in_node.
613 cpumask_set_cpu(cur_cpu, 616 */
614 &primary->alloced_cpus_in_node); 617 if (!cpumask_test_cpu(cur_cpu,
618 &primary->alloced_cpus_in_node)) {
619 cpumask_set_cpu(cur_cpu,
620 &primary->alloced_cpus_in_node);
621 cpumask_set_cpu(cur_cpu, alloced_mask);
622 break;
623 }
624 } else {
615 cpumask_set_cpu(cur_cpu, alloced_mask); 625 cpumask_set_cpu(cur_cpu, alloced_mask);
616 break; 626 break;
617 } 627 }
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index e6ef571e6100..c877e7980585 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@ enum hv_signal_policy {
674 HV_SIGNAL_POLICY_EXPLICIT, 674 HV_SIGNAL_POLICY_EXPLICIT,
675}; 675};
676 676
677enum hv_numa_policy {
678 HV_BALANCED = 0,
679 HV_LOCALIZED,
680};
681
677enum vmbus_device_type { 682enum vmbus_device_type {
678 HV_IDE = 0, 683 HV_IDE = 0,
679 HV_SCSI, 684 HV_SCSI,
@@ -876,6 +881,18 @@ struct vmbus_channel {
876 */ 881 */
877 bool low_latency; 882 bool low_latency;
878 883
884 /*
885 * NUMA distribution policy:
886 * We support teo policies:
887 * 1) Balanced: Here all performance critical channels are
888 * distributed evenly amongst all the NUMA nodes.
889 * This policy will be the default policy.
890 * 2) Localized: All channels of a given instance of a
891 * performance critical service will be assigned CPUs
892 * within a selected NUMA node.
893 */
894 enum hv_numa_policy affinity_policy;
895
879}; 896};
880 897
881static inline void set_channel_lock_state(struct vmbus_channel *c, bool state) 898static inline void set_channel_lock_state(struct vmbus_channel *c, bool state)
@@ -895,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
895 c->signal_policy = policy; 912 c->signal_policy = policy;
896} 913}
897 914
915static inline void set_channel_affinity_state(struct vmbus_channel *c,
916 enum hv_numa_policy policy)
917{
918 c->affinity_policy = policy;
919}
920
898static inline void set_channel_read_state(struct vmbus_channel *c, bool state) 921static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
899{ 922{
900 c->batched_reading = state; 923 c->batched_reading = state;