aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Williamson <alex.williamson@redhat.com>2018-05-15 15:53:55 -0400
committerAlex Williamson <alex.williamson@redhat.com>2018-06-08 12:24:27 -0400
commit002fe996f67f4f46d8917b14cfb6e4313c20685a (patch)
tree3bec3e6fc0ffe5fee605a6680a8a9a727874df31
parentd59502bc0591d75feb5824c7c24faaa8ec48a7ae (diff)
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the parent device and return -EEXIST if found, but the mdev device namespace is global since we'll link all devices from the bus. We do catch this later in sysfs_do_create_link_sd() to return -EEXIST, but with it comes a kernel warning and stack trace for trying to create duplicate sysfs links, which makes it an undesirable response. Therefore we should really be looking for duplicates across all mdev parent devices, or as implemented here, against our mdev device list. Using mdev_list to prevent duplicates means that we can remove mdev_parent.lock, but in order not to serialize mdev device creation and removal globally, we add mdev_device.active which allows UUIDs to be reserved such that we can drop the mdev_list_lock before the mdev device is fully in place. Two behavioral notes; first, mdev_parent.lock had the side-effect of serializing mdev create and remove ops per parent device. This was an implementation detail, not an intentional guarantee provided to the mdev vendor drivers. Vendor drivers can trivially provide this serialization internally if necessary. Second, review comments note the new -EAGAIN behavior when the device, and in particular the remove attribute, becomes visible in sysfs. If a remove is triggered prior to completion of mdev_device_create() the user will see a -EAGAIN error. While the errno is different, receiving an error during this period is not, the previous implementation returned -ENODEV for the same condition. Furthermore, the consistency to the user is improved in the case where mdev_device_remove_ops() returns error. Previously concurrent calls to mdev_device_remove() could see the device disappear with -ENODEV and return in the case of error. Now a user would see -EAGAIN while the device is in this transitory state. Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Acked-by: Halil Pasic <pasic@linux.ibm.com> Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
-rw-r--r--Documentation/vfio-mediated-device.txt5
-rw-r--r--drivers/vfio/mdev/mdev_core.c102
-rw-r--r--drivers/vfio/mdev/mdev_private.h2
3 files changed, 42 insertions, 67 deletions
diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt
index 1b3950346532..c3f69bcaf96e 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/vfio-mediated-device.txt
@@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows:
145* create: allocate basic resources in a driver for a mediated device 145* create: allocate basic resources in a driver for a mediated device
146* remove: free resources in a driver when a mediated device is destroyed 146* remove: free resources in a driver when a mediated device is destroyed
147 147
148(Note that mdev-core provides no implicit serialization of create/remove
149callbacks per mdev parent device, per mdev type, or any other categorization.
150Vendor drivers are expected to be fully asynchronous in this respect or
151provide their own internal resource protection.)
152
148The callbacks in the mdev_parent_ops structure are as follows: 153The callbacks in the mdev_parent_ops structure are as follows:
149 154
150* open: open callback of mediated device 155* open: open callback of mediated device
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 126991046eb7..0212f0ee8aea 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev)
66} 66}
67EXPORT_SYMBOL(mdev_uuid); 67EXPORT_SYMBOL(mdev_uuid);
68 68
69static int _find_mdev_device(struct device *dev, void *data)
70{
71 struct mdev_device *mdev;
72
73 if (!dev_is_mdev(dev))
74 return 0;
75
76 mdev = to_mdev_device(dev);
77
78 if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0)
79 return 1;
80
81 return 0;
82}
83
84static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid)
85{
86 struct device *dev;
87
88 dev = device_find_child(parent->dev, &uuid, _find_mdev_device);
89 if (dev) {
90 put_device(dev);
91 return true;
92 }
93
94 return false;
95}
96
97/* Should be called holding parent_list_lock */ 69/* Should be called holding parent_list_lock */
98static struct mdev_parent *__find_parent_device(struct device *dev) 70static struct mdev_parent *__find_parent_device(struct device *dev)
99{ 71{
@@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
221 } 193 }
222 194
223 kref_init(&parent->ref); 195 kref_init(&parent->ref);
224 mutex_init(&parent->lock);
225 196
226 parent->dev = dev; 197 parent->dev = dev;
227 parent->ops = ops; 198 parent->ops = ops;
@@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev)
297{ 268{
298 struct mdev_device *mdev = to_mdev_device(dev); 269 struct mdev_device *mdev = to_mdev_device(dev);
299 270
271 mutex_lock(&mdev_list_lock);
272 list_del(&mdev->next);
273 mutex_unlock(&mdev_list_lock);
274
300 dev_dbg(&mdev->dev, "MDEV: destroying\n"); 275 dev_dbg(&mdev->dev, "MDEV: destroying\n");
301 kfree(mdev); 276 kfree(mdev);
302} 277}
@@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev)
304int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) 279int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
305{ 280{
306 int ret; 281 int ret;
307 struct mdev_device *mdev; 282 struct mdev_device *mdev, *tmp;
308 struct mdev_parent *parent; 283 struct mdev_parent *parent;
309 struct mdev_type *type = to_mdev_type(kobj); 284 struct mdev_type *type = to_mdev_type(kobj);
310 285
@@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
312 if (!parent) 287 if (!parent)
313 return -EINVAL; 288 return -EINVAL;
314 289
315 mutex_lock(&parent->lock); 290 mutex_lock(&mdev_list_lock);
316 291
317 /* Check for duplicate */ 292 /* Check for duplicate */
318 if (mdev_device_exist(parent, uuid)) { 293 list_for_each_entry(tmp, &mdev_list, next) {
319 ret = -EEXIST; 294 if (!uuid_le_cmp(tmp->uuid, uuid)) {
320 goto create_err; 295 mutex_unlock(&mdev_list_lock);
296 ret = -EEXIST;
297 goto mdev_fail;
298 }
321 } 299 }
322 300
323 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); 301 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
324 if (!mdev) { 302 if (!mdev) {
303 mutex_unlock(&mdev_list_lock);
325 ret = -ENOMEM; 304 ret = -ENOMEM;
326 goto create_err; 305 goto mdev_fail;
327 } 306 }
328 307
329 memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); 308 memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
309 list_add(&mdev->next, &mdev_list);
310 mutex_unlock(&mdev_list_lock);
311
330 mdev->parent = parent; 312 mdev->parent = parent;
331 kref_init(&mdev->ref); 313 kref_init(&mdev->ref);
332 314
@@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
338 ret = device_register(&mdev->dev); 320 ret = device_register(&mdev->dev);
339 if (ret) { 321 if (ret) {
340 put_device(&mdev->dev); 322 put_device(&mdev->dev);
341 goto create_err; 323 goto mdev_fail;
342 } 324 }
343 325
344 ret = mdev_device_create_ops(kobj, mdev); 326 ret = mdev_device_create_ops(kobj, mdev);
345 if (ret) 327 if (ret)
346 goto create_failed; 328 goto create_fail;
347 329
348 ret = mdev_create_sysfs_files(&mdev->dev, type); 330 ret = mdev_create_sysfs_files(&mdev->dev, type);
349 if (ret) { 331 if (ret) {
350 mdev_device_remove_ops(mdev, true); 332 mdev_device_remove_ops(mdev, true);
351 goto create_failed; 333 goto create_fail;
352 } 334 }
353 335
354 mdev->type_kobj = kobj; 336 mdev->type_kobj = kobj;
337 mdev->active = true;
355 dev_dbg(&mdev->dev, "MDEV: created\n"); 338 dev_dbg(&mdev->dev, "MDEV: created\n");
356 339
357 mutex_unlock(&parent->lock); 340 return 0;
358
359 mutex_lock(&mdev_list_lock);
360 list_add(&mdev->next, &mdev_list);
361 mutex_unlock(&mdev_list_lock);
362
363 return ret;
364 341
365create_failed: 342create_fail:
366 device_unregister(&mdev->dev); 343 device_unregister(&mdev->dev);
367 344mdev_fail:
368create_err:
369 mutex_unlock(&parent->lock);
370 mdev_put_parent(parent); 345 mdev_put_parent(parent);
371 return ret; 346 return ret;
372} 347}
@@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove)
377 struct mdev_parent *parent; 352 struct mdev_parent *parent;
378 struct mdev_type *type; 353 struct mdev_type *type;
379 int ret; 354 int ret;
380 bool found = false;
381 355
382 mdev = to_mdev_device(dev); 356 mdev = to_mdev_device(dev);
383 357
384 mutex_lock(&mdev_list_lock); 358 mutex_lock(&mdev_list_lock);
385 list_for_each_entry(tmp, &mdev_list, next) { 359 list_for_each_entry(tmp, &mdev_list, next) {
386 if (tmp == mdev) { 360 if (tmp == mdev)
387 found = true;
388 break; 361 break;
389 }
390 } 362 }
391 363
392 if (found) 364 if (tmp != mdev) {
393 list_del(&mdev->next); 365 mutex_unlock(&mdev_list_lock);
366 return -ENODEV;
367 }
394 368
395 mutex_unlock(&mdev_list_lock); 369 if (!mdev->active) {
370 mutex_unlock(&mdev_list_lock);
371 return -EAGAIN;
372 }
396 373
397 if (!found) 374 mdev->active = false;
398 return -ENODEV; 375 mutex_unlock(&mdev_list_lock);
399 376
400 type = to_mdev_type(mdev->type_kobj); 377 type = to_mdev_type(mdev->type_kobj);
401 parent = mdev->parent; 378 parent = mdev->parent;
402 mutex_lock(&parent->lock);
403 379
404 ret = mdev_device_remove_ops(mdev, force_remove); 380 ret = mdev_device_remove_ops(mdev, force_remove);
405 if (ret) { 381 if (ret) {
406 mutex_unlock(&parent->lock); 382 mdev->active = true;
407
408 mutex_lock(&mdev_list_lock);
409 list_add(&mdev->next, &mdev_list);
410 mutex_unlock(&mdev_list_lock);
411
412 return ret; 383 return ret;
413 } 384 }
414 385
415 mdev_remove_sysfs_files(dev, type); 386 mdev_remove_sysfs_files(dev, type);
416 device_unregister(dev); 387 device_unregister(dev);
417 mutex_unlock(&parent->lock);
418 mdev_put_parent(parent); 388 mdev_put_parent(parent);
419 389
420 return 0; 390 return 0;
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index a9cefd70a705..b5819b7d7ef7 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -20,7 +20,6 @@ struct mdev_parent {
20 struct device *dev; 20 struct device *dev;
21 const struct mdev_parent_ops *ops; 21 const struct mdev_parent_ops *ops;
22 struct kref ref; 22 struct kref ref;
23 struct mutex lock;
24 struct list_head next; 23 struct list_head next;
25 struct kset *mdev_types_kset; 24 struct kset *mdev_types_kset;
26 struct list_head type_list; 25 struct list_head type_list;
@@ -34,6 +33,7 @@ struct mdev_device {
34 struct kref ref; 33 struct kref ref;
35 struct list_head next; 34 struct list_head next;
36 struct kobject *type_kobj; 35 struct kobject *type_kobj;
36 bool active;
37}; 37};
38 38
39#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) 39#define to_mdev_device(dev) container_of(dev, struct mdev_device, dev)