aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTadeusz Struk <tadeusz.struk@intel.com>2016-09-25 10:44:23 -0400
committerDoug Ledford <dledford@redhat.com>2016-10-02 08:42:17 -0400
commit0cb2aa690c7ef14ad1f544288349abb5434bb75d (patch)
treed62ef577d193187d80650315d160fc37d896ecbb
parent3a6982dfd3a7931d679a1aac651fda83ecbad0a0 (diff)
IB/hfi1: Add sysfs interface for affinity setup
Some users want more control over which cpu cores are being used by the driver. For example, users might want to restrict the driver to some specified subset of the cores so that they can appropriately partition processes, irq handlers, and work threads. To allow the user to fine tune system affinity settings new sysfs attributes are introduced per sdma engine. This patch adds a new attribute type for sdma engine and a new cpu_list attribute. When the user writes a cpu range to the cpu_list attribute the driver will create an internal cpu->sdma map, which will be used later as a look-up table to choose an optimal engine for a user requests. Reviewed-by: Dean Luick <dean.luick@intel.com> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Reviewed-by: Jianxin Xiong <jianxin.xiong@intel.com> Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c310
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h8
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c90
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c9
5 files changed, 412 insertions, 7 deletions
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 5711620bc748..59f69fabc1ef 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -65,6 +65,7 @@
65#include <linux/i2c.h> 65#include <linux/i2c.h>
66#include <linux/i2c-algo-bit.h> 66#include <linux/i2c-algo-bit.h>
67#include <rdma/ib_hdrs.h> 67#include <rdma/ib_hdrs.h>
68#include <linux/rhashtable.h>
68#include <rdma/rdma_vt.h> 69#include <rdma/rdma_vt.h>
69 70
70#include "chip_registers.h" 71#include "chip_registers.h"
@@ -1174,6 +1175,7 @@ struct hfi1_devdata {
1174 atomic_t aspm_disabled_cnt; 1175 atomic_t aspm_disabled_cnt;
1175 1176
1176 struct hfi1_affinity *affinity; 1177 struct hfi1_affinity *affinity;
1178 struct rhashtable sdma_rht;
1177 struct kobject kobj; 1179 struct kobject kobj;
1178}; 1180};
1179 1181
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 0990fba660cf..8cfa960a1a4a 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
726} 726}
727 727
728/** 728/**
729 * sdma_engine_get_vl() - return vl for a given sdma engine
730 * @sde: sdma engine
731 *
732 * This function returns the vl mapped to a given engine, or an error if
733 * the mapping can't be found. The mapping fields are protected by RCU.
734 */
735int sdma_engine_get_vl(struct sdma_engine *sde)
736{
737 struct hfi1_devdata *dd = sde->dd;
738 struct sdma_vl_map *m;
739 u8 vl;
740
741 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
742 return -EINVAL;
743
744 rcu_read_lock();
745 m = rcu_dereference(dd->sdma_map);
746 if (unlikely(!m)) {
747 rcu_read_unlock();
748 return -EINVAL;
749 }
750 vl = m->engine_to_vl[sde->this_idx];
751 rcu_read_unlock();
752
753 return vl;
754}
755
756/**
729 * sdma_select_engine_vl() - select sdma engine 757 * sdma_select_engine_vl() - select sdma engine
730 * @dd: devdata 758 * @dd: devdata
731 * @selector: a spreading factor 759 * @selector: a spreading factor
@@ -788,6 +816,283 @@ struct sdma_engine *sdma_select_engine_sc(
788 return sdma_select_engine_vl(dd, selector, vl); 816 return sdma_select_engine_vl(dd, selector, vl);
789} 817}
790 818
819struct sdma_rht_map_elem {
820 u32 mask;
821 u8 ctr;
822 struct sdma_engine *sde[0];
823};
824
825struct sdma_rht_node {
826 unsigned long cpu_id;
827 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
828 struct rhash_head node;
829};
830
831#define NR_CPUS_HINT 192
832
833static const struct rhashtable_params sdma_rht_params = {
834 .nelem_hint = NR_CPUS_HINT,
835 .head_offset = offsetof(struct sdma_rht_node, node),
836 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
837 .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
838 .max_size = NR_CPUS,
839 .min_size = 8,
840 .automatic_shrinking = true,
841};
842
843/*
844 * sdma_select_user_engine() - select sdma engine based on user setup
845 * @dd: devdata
846 * @selector: a spreading factor
847 * @vl: this vl
848 *
849 * This function returns an sdma engine for a user sdma request.
850 * User defined sdma engine affinity setting is honored when applicable,
851 * otherwise system default sdma engine mapping is used. To ensure correct
852 * ordering, the mapping from <selector, vl> to sde must remain unchanged.
853 */
854struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
855 u32 selector, u8 vl)
856{
857 struct sdma_rht_node *rht_node;
858 struct sdma_engine *sde = NULL;
859 const struct cpumask *current_mask = tsk_cpus_allowed(current);
860 unsigned long cpu_id;
861
862 /*
863 * To ensure that always the same sdma engine(s) will be
864 * selected make sure the process is pinned to this CPU only.
865 */
866 if (cpumask_weight(current_mask) != 1)
867 goto out;
868
869 cpu_id = smp_processor_id();
870 rcu_read_lock();
871 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
872 sdma_rht_params);
873
874 if (rht_node && rht_node->map[vl]) {
875 struct sdma_rht_map_elem *map = rht_node->map[vl];
876
877 sde = map->sde[selector & map->mask];
878 }
879 rcu_read_unlock();
880
881 if (sde)
882 return sde;
883
884out:
885 return sdma_select_engine_vl(dd, selector, vl);
886}
887
888static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
889{
890 int i;
891
892 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
893 map->sde[map->ctr + i] = map->sde[i];
894}
895
896static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
897 struct sdma_engine *sde)
898{
899 unsigned int i, pow;
900
901 /* only need to check the first ctr entries for a match */
902 for (i = 0; i < map->ctr; i++) {
903 if (map->sde[i] == sde) {
904 memmove(&map->sde[i], &map->sde[i + 1],
905 (map->ctr - i - 1) * sizeof(map->sde[0]));
906 map->ctr--;
907 pow = roundup_pow_of_two(map->ctr ? : 1);
908 map->mask = pow - 1;
909 sdma_populate_sde_map(map);
910 break;
911 }
912 }
913}
914
915/*
916 * Prevents concurrent reads and writes of the sdma engine cpu_mask
917 */
918static DEFINE_MUTEX(process_to_sde_mutex);
919
920ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
921 size_t count)
922{
923 struct hfi1_devdata *dd = sde->dd;
924 cpumask_var_t mask, new_mask;
925 unsigned long cpu;
926 int ret, vl, sz;
927
928 vl = sdma_engine_get_vl(sde);
929 if (unlikely(vl < 0))
930 return -EINVAL;
931
932 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
933 if (!ret)
934 return -ENOMEM;
935
936 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
937 if (!ret) {
938 free_cpumask_var(mask);
939 return -ENOMEM;
940 }
941 ret = cpulist_parse(buf, mask);
942 if (ret)
943 goto out_free;
944
945 if (!cpumask_subset(mask, cpu_online_mask)) {
946 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
947 ret = -EINVAL;
948 goto out_free;
949 }
950
951 sz = sizeof(struct sdma_rht_map_elem) +
952 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
953
954 mutex_lock(&process_to_sde_mutex);
955
956 for_each_cpu(cpu, mask) {
957 struct sdma_rht_node *rht_node;
958
959 /* Check if we have this already mapped */
960 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
961 cpumask_set_cpu(cpu, new_mask);
962 continue;
963 }
964
965 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
966 sdma_rht_params);
967 if (!rht_node) {
968 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
969 if (!rht_node) {
970 ret = -ENOMEM;
971 goto out;
972 }
973
974 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
975 if (!rht_node->map[vl]) {
976 kfree(rht_node);
977 ret = -ENOMEM;
978 goto out;
979 }
980 rht_node->cpu_id = cpu;
981 rht_node->map[vl]->mask = 0;
982 rht_node->map[vl]->ctr = 1;
983 rht_node->map[vl]->sde[0] = sde;
984
985 ret = rhashtable_insert_fast(&dd->sdma_rht,
986 &rht_node->node,
987 sdma_rht_params);
988 if (ret) {
989 kfree(rht_node->map[vl]);
990 kfree(rht_node);
991 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
992 cpu);
993 goto out;
994 }
995
996 } else {
997 int ctr, pow;
998
999 /* Add new user mappings */
1000 if (!rht_node->map[vl])
1001 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
1002
1003 if (!rht_node->map[vl]) {
1004 ret = -ENOMEM;
1005 goto out;
1006 }
1007
1008 rht_node->map[vl]->ctr++;
1009 ctr = rht_node->map[vl]->ctr;
1010 rht_node->map[vl]->sde[ctr - 1] = sde;
1011 pow = roundup_pow_of_two(ctr);
1012 rht_node->map[vl]->mask = pow - 1;
1013
1014 /* Populate the sde map table */
1015 sdma_populate_sde_map(rht_node->map[vl]);
1016 }
1017 cpumask_set_cpu(cpu, new_mask);
1018 }
1019
1020 /* Clean up old mappings */
1021 for_each_cpu(cpu, cpu_online_mask) {
1022 struct sdma_rht_node *rht_node;
1023
1024 /* Don't cleanup sdes that are set in the new mask */
1025 if (cpumask_test_cpu(cpu, mask))
1026 continue;
1027
1028 rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
1029 sdma_rht_params);
1030 if (rht_node) {
1031 bool empty = true;
1032 int i;
1033
1034 /* Remove mappings for old sde */
1035 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1036 if (rht_node->map[i])
1037 sdma_cleanup_sde_map(rht_node->map[i],
1038 sde);
1039
1040 /* Free empty hash table entries */
1041 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1042 if (!rht_node->map[i])
1043 continue;
1044
1045 if (rht_node->map[i]->ctr) {
1046 empty = false;
1047 break;
1048 }
1049 }
1050
1051 if (empty) {
1052 ret = rhashtable_remove_fast(&dd->sdma_rht,
1053 &rht_node->node,
1054 sdma_rht_params);
1055 WARN_ON(ret);
1056
1057 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1058 kfree(rht_node->map[i]);
1059
1060 kfree(rht_node);
1061 }
1062 }
1063 }
1064
1065 cpumask_copy(&sde->cpu_mask, new_mask);
1066out:
1067 mutex_unlock(&process_to_sde_mutex);
1068out_free:
1069 free_cpumask_var(mask);
1070 free_cpumask_var(new_mask);
1071 return ret ? : strnlen(buf, PAGE_SIZE);
1072}
1073
1074ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1075{
1076 mutex_lock(&process_to_sde_mutex);
1077 if (cpumask_empty(&sde->cpu_mask))
1078 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1079 else
1080 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1081 mutex_unlock(&process_to_sde_mutex);
1082 return strnlen(buf, PAGE_SIZE);
1083}
1084
1085static void sdma_rht_free(void *ptr, void *arg)
1086{
1087 struct sdma_rht_node *rht_node = ptr;
1088 int i;
1089
1090 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1091 kfree(rht_node->map[i]);
1092
1093 kfree(rht_node);
1094}
1095
791/* 1096/*
792 * Free the indicated map struct 1097 * Free the indicated map struct
793 */ 1098 */
@@ -1161,6 +1466,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
1161 dd->num_sdma = num_engines; 1466 dd->num_sdma = num_engines;
1162 if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) 1467 if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
1163 goto bail; 1468 goto bail;
1469
1470 if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
1471 goto bail;
1472
1164 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); 1473 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1165 return 0; 1474 return 0;
1166 1475
@@ -1252,6 +1561,7 @@ void sdma_exit(struct hfi1_devdata *dd)
1252 sdma_finalput(&sde->state); 1561 sdma_finalput(&sde->state);
1253 } 1562 }
1254 sdma_clean(dd, dd->num_sdma); 1563 sdma_clean(dd, dd->num_sdma);
1564 rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
1255} 1565}
1256 1566
1257/* 1567/*
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index b333afa552fc..93025f6ded15 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@ struct sdma_engine {
413 spinlock_t flushlist_lock; 413 spinlock_t flushlist_lock;
414 /* private: */ 414 /* private: */
415 struct list_head flushlist; 415 struct list_head flushlist;
416 struct cpumask cpu_mask;
417 struct kobject kobj;
416}; 418};
417 419
418int sdma_init(struct hfi1_devdata *dd, u8 port); 420int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -1059,6 +1061,12 @@ struct sdma_engine *sdma_select_engine_vl(
1059 u32 selector, 1061 u32 selector,
1060 u8 vl); 1062 u8 vl);
1061 1063
1064struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
1065 u32 selector, u8 vl);
1066ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
1067ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
1068 size_t count);
1069int sdma_engine_get_vl(struct sdma_engine *sde);
1062void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *); 1070void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
1063 1071
1064#ifdef CONFIG_SDMA_VERBOSITY 1072#ifdef CONFIG_SDMA_VERBOSITY
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c655f7e..836eea58e4ff 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,82 @@ bail:
766 return ret; 766 return ret;
767} 767}
768 768
769struct sde_attribute {
770 struct attribute attr;
771 ssize_t (*show)(struct sdma_engine *sde, char *buf);
772 ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
773};
774
775static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
776{
777 struct sde_attribute *sde_attr =
778 container_of(attr, struct sde_attribute, attr);
779 struct sdma_engine *sde =
780 container_of(kobj, struct sdma_engine, kobj);
781
782 if (!sde_attr->show)
783 return -EINVAL;
784
785 return sde_attr->show(sde, buf);
786}
787
788static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
789 const char *buf, size_t count)
790{
791 struct sde_attribute *sde_attr =
792 container_of(attr, struct sde_attribute, attr);
793 struct sdma_engine *sde =
794 container_of(kobj, struct sdma_engine, kobj);
795
796 if (!capable(CAP_SYS_ADMIN))
797 return -EPERM;
798
799 if (!sde_attr->store)
800 return -EINVAL;
801
802 return sde_attr->store(sde, buf, count);
803}
804
805static const struct sysfs_ops sde_sysfs_ops = {
806 .show = sde_show,
807 .store = sde_store,
808};
809
810static struct kobj_type sde_ktype = {
811 .sysfs_ops = &sde_sysfs_ops,
812};
813
814#define SDE_ATTR(_name, _mode, _show, _store) \
815 struct sde_attribute sde_attr_##_name = \
816 __ATTR(_name, _mode, _show, _store)
817
818static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
819{
820 return sdma_get_cpu_to_sde_map(sde, buf);
821}
822
823static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
824 const char *buf, size_t count)
825{
826 return sdma_set_cpu_to_sde_map(sde, buf, count);
827}
828
829static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
830 sde_show_cpu_to_sde_map,
831 sde_store_cpu_to_sde_map);
832
833static struct sde_attribute *sde_attribs[] = {
834 &sde_attr_cpu_list
835};
836
769/* 837/*
770 * Register and create our files in /sys/class/infiniband. 838 * Register and create our files in /sys/class/infiniband.
771 */ 839 */
772int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) 840int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
773{ 841{
774 struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; 842 struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
775 int i, ret; 843 struct device *class_dev = &dev->dev;
844 int i, j, ret;
776 845
777 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { 846 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
778 ret = device_create_file(&dev->dev, hfi1_attributes[i]); 847 ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +849,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
780 goto bail; 849 goto bail;
781 } 850 }
782 851
852 for (i = 0; i < dd->num_sdma; i++) {
853 ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
854 &sde_ktype, &class_dev->kobj,
855 "sdma%d", i);
856 if (ret)
857 goto bail;
858
859 for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
860 ret = sysfs_create_file(&dd->per_sdma[i].kobj,
861 &sde_attribs[j]->attr);
862 if (ret)
863 goto bail;
864 }
865 }
866
783 return 0; 867 return 0;
784bail: 868bail:
785 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) 869 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
786 device_remove_file(&dev->dev, hfi1_attributes[i]); 870 device_remove_file(&dev->dev, hfi1_attributes[i]);
871
872 for (i = 0; i < dd->num_sdma; i++)
873 kobject_del(&dd->per_sdma[i].kobj);
874
787 return ret; 875 return ret;
788} 876}
789 877
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index bc7e5c179f80..a761f804111e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -548,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
548 u8 opcode, sc, vl; 548 u8 opcode, sc, vl;
549 int req_queued = 0; 549 int req_queued = 0;
550 u16 dlid; 550 u16 dlid;
551 u8 selector; 551 u32 selector;
552 552
553 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { 553 if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
554 hfi1_cdbg( 554 hfi1_cdbg(
@@ -753,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
753 753
754 dlid = be16_to_cpu(req->hdr.lrh[1]); 754 dlid = be16_to_cpu(req->hdr.lrh[1]);
755 selector = dlid_to_selector(dlid); 755 selector = dlid_to_selector(dlid);
756 selector += uctxt->ctxt + fd->subctxt;
757 req->sde = sdma_select_user_engine(dd, selector, vl);
756 758
757 /* Have to select the engine */
758 req->sde = sdma_select_engine_vl(dd,
759 (u32)(uctxt->ctxt + fd->subctxt +
760 selector),
761 vl);
762 if (!req->sde || !sdma_running(req->sde)) { 759 if (!req->sde || !sdma_running(req->sde)) {
763 ret = -ECOMM; 760 ret = -ECOMM;
764 goto free_req; 761 goto free_req;