aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorMoshe Shemesh <moshe@mellanox.com>2019-04-29 05:41:45 -0400
committerDavid S. Miller <davem@davemloft.net>2019-05-01 11:07:03 -0400
commitb587bdaf5f820cf7dac2c1b351db97bf98e1f427 (patch)
tree506edca56c4af13657e8f1136b5de7e9c4784960 /net
parent5be90f993880052d95bbf6ccdca3fa081eb9b1ff (diff)
devlink: Change devlink health locking mechanism
The devlink health reporters create/destroy and user commands currently use the devlink->lock as a locking mechanism. Different reporters have different rules in the driver and are being created/destroyed during different stages of driver load/unload/running. So during execution of a reporter recover the flow can go through another reporter's destroy and create. Such flow leads to deadlock trying to lock a mutex already held. With the new locking mechanism the different reporters share mutex lock only to protect access to shared reporters list. Added refcount per reporter, to protect the reporters from destroy while being used. Signed-off-by: Moshe Shemesh <moshe@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/devlink.c97
1 files changed, 74 insertions, 23 deletions
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4e28d04c0165..d43bc52b8840 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -20,6 +20,7 @@
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/netdevice.h> 21#include <linux/netdevice.h>
22#include <linux/spinlock.h> 22#include <linux/spinlock.h>
23#include <linux/refcount.h>
23#include <rdma/ib_verbs.h> 24#include <rdma/ib_verbs.h>
24#include <net/netlink.h> 25#include <net/netlink.h>
25#include <net/genetlink.h> 26#include <net/genetlink.h>
@@ -4432,6 +4433,7 @@ struct devlink_health_reporter {
4432 u64 error_count; 4433 u64 error_count;
4433 u64 recovery_count; 4434 u64 recovery_count;
4434 u64 last_recovery_ts; 4435 u64 last_recovery_ts;
4436 refcount_t refcount;
4435}; 4437};
4436 4438
4437void * 4439void *
@@ -4447,6 +4449,7 @@ devlink_health_reporter_find_by_name(struct devlink *devlink,
4447{ 4449{
4448 struct devlink_health_reporter *reporter; 4450 struct devlink_health_reporter *reporter;
4449 4451
4452 lockdep_assert_held(&devlink->reporters_lock);
4450 list_for_each_entry(reporter, &devlink->reporter_list, list) 4453 list_for_each_entry(reporter, &devlink->reporter_list, list)
4451 if (!strcmp(reporter->ops->name, reporter_name)) 4454 if (!strcmp(reporter->ops->name, reporter_name))
4452 return reporter; 4455 return reporter;
@@ -4470,7 +4473,7 @@ devlink_health_reporter_create(struct devlink *devlink,
4470{ 4473{
4471 struct devlink_health_reporter *reporter; 4474 struct devlink_health_reporter *reporter;
4472 4475
4473 mutex_lock(&devlink->lock); 4476 mutex_lock(&devlink->reporters_lock);
4474 if (devlink_health_reporter_find_by_name(devlink, ops->name)) { 4477 if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
4475 reporter = ERR_PTR(-EEXIST); 4478 reporter = ERR_PTR(-EEXIST);
4476 goto unlock; 4479 goto unlock;
@@ -4494,9 +4497,10 @@ devlink_health_reporter_create(struct devlink *devlink,
4494 reporter->graceful_period = graceful_period; 4497 reporter->graceful_period = graceful_period;
4495 reporter->auto_recover = auto_recover; 4498 reporter->auto_recover = auto_recover;
4496 mutex_init(&reporter->dump_lock); 4499 mutex_init(&reporter->dump_lock);
4500 refcount_set(&reporter->refcount, 1);
4497 list_add_tail(&reporter->list, &devlink->reporter_list); 4501 list_add_tail(&reporter->list, &devlink->reporter_list);
4498unlock: 4502unlock:
4499 mutex_unlock(&devlink->lock); 4503 mutex_unlock(&devlink->reporters_lock);
4500 return reporter; 4504 return reporter;
4501} 4505}
4502EXPORT_SYMBOL_GPL(devlink_health_reporter_create); 4506EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
@@ -4509,10 +4513,12 @@ EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
4509void 4513void
4510devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) 4514devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
4511{ 4515{
4512 mutex_lock(&reporter->devlink->lock); 4516 mutex_lock(&reporter->devlink->reporters_lock);
4513 list_del(&reporter->list); 4517 list_del(&reporter->list);
4518 mutex_unlock(&reporter->devlink->reporters_lock);
4519 while (refcount_read(&reporter->refcount) > 1)
4520 msleep(100);
4514 mutex_destroy(&reporter->dump_lock); 4521 mutex_destroy(&reporter->dump_lock);
4515 mutex_unlock(&reporter->devlink->lock);
4516 if (reporter->dump_fmsg) 4522 if (reporter->dump_fmsg)
4517 devlink_fmsg_free(reporter->dump_fmsg); 4523 devlink_fmsg_free(reporter->dump_fmsg);
4518 kfree(reporter); 4524 kfree(reporter);
@@ -4648,6 +4654,7 @@ static struct devlink_health_reporter *
4648devlink_health_reporter_get_from_info(struct devlink *devlink, 4654devlink_health_reporter_get_from_info(struct devlink *devlink,
4649 struct genl_info *info) 4655 struct genl_info *info)
4650{ 4656{
4657 struct devlink_health_reporter *reporter;
4651 char *reporter_name; 4658 char *reporter_name;
4652 4659
4653 if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) 4660 if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
@@ -4655,7 +4662,18 @@ devlink_health_reporter_get_from_info(struct devlink *devlink,
4655 4662
4656 reporter_name = 4663 reporter_name =
4657 nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); 4664 nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
4658 return devlink_health_reporter_find_by_name(devlink, reporter_name); 4665 mutex_lock(&devlink->reporters_lock);
4666 reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
4667 if (reporter)
4668 refcount_inc(&reporter->refcount);
4669 mutex_unlock(&devlink->reporters_lock);
4670 return reporter;
4671}
4672
4673static void
4674devlink_health_reporter_put(struct devlink_health_reporter *reporter)
4675{
4676 refcount_dec(&reporter->refcount);
4659} 4677}
4660 4678
4661static int 4679static int
@@ -4730,8 +4748,10 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
4730 return -EINVAL; 4748 return -EINVAL;
4731 4749
4732 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4750 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4733 if (!msg) 4751 if (!msg) {
4734 return -ENOMEM; 4752 err = -ENOMEM;
4753 goto out;
4754 }
4735 4755
4736 err = devlink_nl_health_reporter_fill(msg, devlink, reporter, 4756 err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
4737 DEVLINK_CMD_HEALTH_REPORTER_GET, 4757 DEVLINK_CMD_HEALTH_REPORTER_GET,
@@ -4739,10 +4759,13 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
4739 0); 4759 0);
4740 if (err) { 4760 if (err) {
4741 nlmsg_free(msg); 4761 nlmsg_free(msg);
4742 return err; 4762 goto out;
4743 } 4763 }
4744 4764
4745 return genlmsg_reply(msg, info); 4765 err = genlmsg_reply(msg, info);
4766out:
4767 devlink_health_reporter_put(reporter);
4768 return err;
4746} 4769}
4747 4770
4748static int 4771static int
@@ -4759,7 +4782,7 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
4759 list_for_each_entry(devlink, &devlink_list, list) { 4782 list_for_each_entry(devlink, &devlink_list, list) {
4760 if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) 4783 if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
4761 continue; 4784 continue;
4762 mutex_lock(&devlink->lock); 4785 mutex_lock(&devlink->reporters_lock);
4763 list_for_each_entry(reporter, &devlink->reporter_list, 4786 list_for_each_entry(reporter, &devlink->reporter_list,
4764 list) { 4787 list) {
4765 if (idx < start) { 4788 if (idx < start) {
@@ -4773,12 +4796,12 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
4773 cb->nlh->nlmsg_seq, 4796 cb->nlh->nlmsg_seq,
4774 NLM_F_MULTI); 4797 NLM_F_MULTI);
4775 if (err) { 4798 if (err) {
4776 mutex_unlock(&devlink->lock); 4799 mutex_unlock(&devlink->reporters_lock);
4777 goto out; 4800 goto out;
4778 } 4801 }
4779 idx++; 4802 idx++;
4780 } 4803 }
4781 mutex_unlock(&devlink->lock); 4804 mutex_unlock(&devlink->reporters_lock);
4782 } 4805 }
4783out: 4806out:
4784 mutex_unlock(&devlink_mutex); 4807 mutex_unlock(&devlink_mutex);
@@ -4793,6 +4816,7 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
4793{ 4816{
4794 struct devlink *devlink = info->user_ptr[0]; 4817 struct devlink *devlink = info->user_ptr[0];
4795 struct devlink_health_reporter *reporter; 4818 struct devlink_health_reporter *reporter;
4819 int err;
4796 4820
4797 reporter = devlink_health_reporter_get_from_info(devlink, info); 4821 reporter = devlink_health_reporter_get_from_info(devlink, info);
4798 if (!reporter) 4822 if (!reporter)
@@ -4800,8 +4824,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
4800 4824
4801 if (!reporter->ops->recover && 4825 if (!reporter->ops->recover &&
4802 (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || 4826 (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
4803 info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) 4827 info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) {
4804 return -EOPNOTSUPP; 4828 err = -EOPNOTSUPP;
4829 goto out;
4830 }
4805 4831
4806 if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) 4832 if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
4807 reporter->graceful_period = 4833 reporter->graceful_period =
@@ -4811,7 +4837,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
4811 reporter->auto_recover = 4837 reporter->auto_recover =
4812 nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); 4838 nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
4813 4839
4840 devlink_health_reporter_put(reporter);
4814 return 0; 4841 return 0;
4842out:
4843 devlink_health_reporter_put(reporter);
4844 return err;
4815} 4845}
4816 4846
4817static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, 4847static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
@@ -4819,12 +4849,16 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
4819{ 4849{
4820 struct devlink *devlink = info->user_ptr[0]; 4850 struct devlink *devlink = info->user_ptr[0];
4821 struct devlink_health_reporter *reporter; 4851 struct devlink_health_reporter *reporter;
4852 int err;
4822 4853
4823 reporter = devlink_health_reporter_get_from_info(devlink, info); 4854 reporter = devlink_health_reporter_get_from_info(devlink, info);
4824 if (!reporter) 4855 if (!reporter)
4825 return -EINVAL; 4856 return -EINVAL;
4826 4857
4827 return devlink_health_reporter_recover(reporter, NULL); 4858 err = devlink_health_reporter_recover(reporter, NULL);
4859
4860 devlink_health_reporter_put(reporter);
4861 return err;
4828} 4862}
4829 4863
4830static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, 4864static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
@@ -4839,12 +4873,16 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
4839 if (!reporter) 4873 if (!reporter)
4840 return -EINVAL; 4874 return -EINVAL;
4841 4875
4842 if (!reporter->ops->diagnose) 4876 if (!reporter->ops->diagnose) {
4877 devlink_health_reporter_put(reporter);
4843 return -EOPNOTSUPP; 4878 return -EOPNOTSUPP;
4879 }
4844 4880
4845 fmsg = devlink_fmsg_alloc(); 4881 fmsg = devlink_fmsg_alloc();
4846 if (!fmsg) 4882 if (!fmsg) {
4883 devlink_health_reporter_put(reporter);
4847 return -ENOMEM; 4884 return -ENOMEM;
4885 }
4848 4886
4849 err = devlink_fmsg_obj_nest_start(fmsg); 4887 err = devlink_fmsg_obj_nest_start(fmsg);
4850 if (err) 4888 if (err)
@@ -4863,6 +4901,7 @@ static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
4863 4901
4864out: 4902out:
4865 devlink_fmsg_free(fmsg); 4903 devlink_fmsg_free(fmsg);
4904 devlink_health_reporter_put(reporter);
4866 return err; 4905 return err;
4867} 4906}
4868 4907
@@ -4877,8 +4916,10 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
4877 if (!reporter) 4916 if (!reporter)
4878 return -EINVAL; 4917 return -EINVAL;
4879 4918
4880 if (!reporter->ops->dump) 4919 if (!reporter->ops->dump) {
4920 devlink_health_reporter_put(reporter);
4881 return -EOPNOTSUPP; 4921 return -EOPNOTSUPP;
4922 }
4882 4923
4883 mutex_lock(&reporter->dump_lock); 4924 mutex_lock(&reporter->dump_lock);
4884 err = devlink_health_do_dump(reporter, NULL); 4925 err = devlink_health_do_dump(reporter, NULL);
@@ -4890,6 +4931,7 @@ static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
4890 4931
4891out: 4932out:
4892 mutex_unlock(&reporter->dump_lock); 4933 mutex_unlock(&reporter->dump_lock);
4934 devlink_health_reporter_put(reporter);
4893 return err; 4935 return err;
4894} 4936}
4895 4937
@@ -4904,12 +4946,15 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
4904 if (!reporter) 4946 if (!reporter)
4905 return -EINVAL; 4947 return -EINVAL;
4906 4948
4907 if (!reporter->ops->dump) 4949 if (!reporter->ops->dump) {
4950 devlink_health_reporter_put(reporter);
4908 return -EOPNOTSUPP; 4951 return -EOPNOTSUPP;
4952 }
4909 4953
4910 mutex_lock(&reporter->dump_lock); 4954 mutex_lock(&reporter->dump_lock);
4911 devlink_health_dump_clear(reporter); 4955 devlink_health_dump_clear(reporter);
4912 mutex_unlock(&reporter->dump_lock); 4956 mutex_unlock(&reporter->dump_lock);
4957 devlink_health_reporter_put(reporter);
4913 return 0; 4958 return 0;
4914} 4959}
4915 4960
@@ -5191,7 +5236,8 @@ static const struct genl_ops devlink_nl_ops[] = {
5191 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 5236 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
5192 .doit = devlink_nl_cmd_health_reporter_get_doit, 5237 .doit = devlink_nl_cmd_health_reporter_get_doit,
5193 .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, 5238 .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
5194 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 5239 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
5240 DEVLINK_NL_FLAG_NO_LOCK,
5195 /* can be retrieved by unprivileged users */ 5241 /* can be retrieved by unprivileged users */
5196 }, 5242 },
5197 { 5243 {
@@ -5199,21 +5245,24 @@ static const struct genl_ops devlink_nl_ops[] = {
5199 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 5245 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
5200 .doit = devlink_nl_cmd_health_reporter_set_doit, 5246 .doit = devlink_nl_cmd_health_reporter_set_doit,
5201 .flags = GENL_ADMIN_PERM, 5247 .flags = GENL_ADMIN_PERM,
5202 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 5248 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
5249 DEVLINK_NL_FLAG_NO_LOCK,
5203 }, 5250 },
5204 { 5251 {
5205 .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, 5252 .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
5206 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 5253 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
5207 .doit = devlink_nl_cmd_health_reporter_recover_doit, 5254 .doit = devlink_nl_cmd_health_reporter_recover_doit,
5208 .flags = GENL_ADMIN_PERM, 5255 .flags = GENL_ADMIN_PERM,
5209 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 5256 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
5257 DEVLINK_NL_FLAG_NO_LOCK,
5210 }, 5258 },
5211 { 5259 {
5212 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 5260 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
5213 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 5261 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
5214 .doit = devlink_nl_cmd_health_reporter_diagnose_doit, 5262 .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
5215 .flags = GENL_ADMIN_PERM, 5263 .flags = GENL_ADMIN_PERM,
5216 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, 5264 .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
5265 DEVLINK_NL_FLAG_NO_LOCK,
5217 }, 5266 },
5218 { 5267 {
5219 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 5268 .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
@@ -5284,6 +5333,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
5284 INIT_LIST_HEAD(&devlink->region_list); 5333 INIT_LIST_HEAD(&devlink->region_list);
5285 INIT_LIST_HEAD(&devlink->reporter_list); 5334 INIT_LIST_HEAD(&devlink->reporter_list);
5286 mutex_init(&devlink->lock); 5335 mutex_init(&devlink->lock);
5336 mutex_init(&devlink->reporters_lock);
5287 return devlink; 5337 return devlink;
5288} 5338}
5289EXPORT_SYMBOL_GPL(devlink_alloc); 5339EXPORT_SYMBOL_GPL(devlink_alloc);
@@ -5326,6 +5376,7 @@ EXPORT_SYMBOL_GPL(devlink_unregister);
5326 */ 5376 */
5327void devlink_free(struct devlink *devlink) 5377void devlink_free(struct devlink *devlink)
5328{ 5378{
5379 mutex_destroy(&devlink->reporters_lock);
5329 mutex_destroy(&devlink->lock); 5380 mutex_destroy(&devlink->lock);
5330 WARN_ON(!list_empty(&devlink->reporter_list)); 5381 WARN_ON(!list_empty(&devlink->reporter_list));
5331 WARN_ON(!list_empty(&devlink->region_list)); 5382 WARN_ON(!list_empty(&devlink->region_list));