diff options
author | Tom St Denis <tom.stdenis@amd.com> | 2019-01-07 17:39:10 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-01-14 15:04:53 -0500 |
commit | 22d6575b8db59097655797c740bf840a616a6816 (patch) | |
tree | a8d7ac8678e9940f2ebfe9d00268e83be1cc3c36 | |
parent | 4b9674e509ea4365b68c5e309c402ef6544d567a (diff) |
drm/amd/amdgpu: add missing mutex lock to amdgpu_get_xgmi_hive() (v3)
v2: Move locks around in other functions so that this
function can stand on its own. Also only hold the hive
specific lock for add/remove device instead of the driver
global lock so you can't add/remove devices in parallel from
one hive.
v3: add reset_lock
Acked-by: Shaoyun.liu < Shaoyun.liu@amd.com>
Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 5 |
3 files changed, 32 insertions, 19 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 39d5d058b2c7..1a558dc41ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -3525,9 +3525,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
3525 | * by different nodes. No point also since the one node already executing | 3525 | * by different nodes. No point also since the one node already executing |
3526 | * reset will also reset all the other nodes in the hive. | 3526 | * reset will also reset all the other nodes in the hive. |
3527 | */ | 3527 | */ |
3528 | hive = amdgpu_get_xgmi_hive(adev); | 3528 | hive = amdgpu_get_xgmi_hive(adev, 0); |
3529 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && | 3529 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && |
3530 | !mutex_trylock(&hive->hive_lock)) | 3530 | !mutex_trylock(&hive->reset_lock)) |
3531 | return 0; | 3531 | return 0; |
3532 | 3532 | ||
3533 | /* Start with adev pre asic reset first for soft reset check.*/ | 3533 | /* Start with adev pre asic reset first for soft reset check.*/ |
@@ -3606,7 +3606,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ | |||
3606 | } | 3606 | } |
3607 | 3607 | ||
3608 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1) | 3608 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1) |
3609 | mutex_unlock(&hive->hive_lock); | 3609 | mutex_unlock(&hive->reset_lock); |
3610 | 3610 | ||
3611 | if (r) | 3611 | if (r) |
3612 | dev_info(adev->dev, "GPU reset end with ret = %d\n", r); | 3612 | dev_info(adev->dev, "GPU reset end with ret = %d\n", r); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index ac57a8767283..dac187454b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | |||
@@ -40,26 +40,40 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) | |||
40 | return &hive->device_list; | 40 | return &hive->device_list; |
41 | } | 41 | } |
42 | 42 | ||
43 | struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) | 43 | struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock) |
44 | { | 44 | { |
45 | int i; | 45 | int i; |
46 | struct amdgpu_hive_info *tmp; | 46 | struct amdgpu_hive_info *tmp; |
47 | 47 | ||
48 | if (!adev->gmc.xgmi.hive_id) | 48 | if (!adev->gmc.xgmi.hive_id) |
49 | return NULL; | 49 | return NULL; |
50 | |||
51 | mutex_lock(&xgmi_mutex); | ||
52 | |||
50 | for (i = 0 ; i < hive_count; ++i) { | 53 | for (i = 0 ; i < hive_count; ++i) { |
51 | tmp = &xgmi_hives[i]; | 54 | tmp = &xgmi_hives[i]; |
52 | if (tmp->hive_id == adev->gmc.xgmi.hive_id) | 55 | if (tmp->hive_id == adev->gmc.xgmi.hive_id) { |
56 | if (lock) | ||
57 | mutex_lock(&tmp->hive_lock); | ||
58 | mutex_unlock(&xgmi_mutex); | ||
53 | return tmp; | 59 | return tmp; |
60 | } | ||
54 | } | 61 | } |
55 | if (i >= AMDGPU_MAX_XGMI_HIVE) | 62 | if (i >= AMDGPU_MAX_XGMI_HIVE) { |
63 | mutex_unlock(&xgmi_mutex); | ||
56 | return NULL; | 64 | return NULL; |
65 | } | ||
57 | 66 | ||
58 | /* initialize new hive if not exist */ | 67 | /* initialize new hive if not exist */ |
59 | tmp = &xgmi_hives[hive_count++]; | 68 | tmp = &xgmi_hives[hive_count++]; |
60 | tmp->hive_id = adev->gmc.xgmi.hive_id; | 69 | tmp->hive_id = adev->gmc.xgmi.hive_id; |
61 | INIT_LIST_HEAD(&tmp->device_list); | 70 | INIT_LIST_HEAD(&tmp->device_list); |
62 | mutex_init(&tmp->hive_lock); | 71 | mutex_init(&tmp->hive_lock); |
72 | mutex_init(&tmp->reset_lock); | ||
73 | if (lock) | ||
74 | mutex_lock(&tmp->hive_lock); | ||
75 | |||
76 | mutex_unlock(&xgmi_mutex); | ||
63 | 77 | ||
64 | return tmp; | 78 | return tmp; |
65 | } | 79 | } |
@@ -111,8 +125,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | |||
111 | return ret; | 125 | return ret; |
112 | } | 126 | } |
113 | 127 | ||
114 | mutex_lock(&xgmi_mutex); | 128 | hive = amdgpu_get_xgmi_hive(adev, 1); |
115 | hive = amdgpu_get_xgmi_hive(adev); | ||
116 | if (!hive) { | 129 | if (!hive) { |
117 | ret = -EINVAL; | 130 | ret = -EINVAL; |
118 | dev_err(adev->dev, | 131 | dev_err(adev->dev, |
@@ -147,8 +160,8 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) | |||
147 | break; | 160 | break; |
148 | } | 161 | } |
149 | 162 | ||
163 | mutex_unlock(&hive->hive_lock); | ||
150 | exit: | 164 | exit: |
151 | mutex_unlock(&xgmi_mutex); | ||
152 | return ret; | 165 | return ret; |
153 | } | 166 | } |
154 | 167 | ||
@@ -159,15 +172,14 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) | |||
159 | if (!adev->gmc.xgmi.supported) | 172 | if (!adev->gmc.xgmi.supported) |
160 | return; | 173 | return; |
161 | 174 | ||
162 | mutex_lock(&xgmi_mutex); | 175 | hive = amdgpu_get_xgmi_hive(adev, 1); |
163 | |||
164 | hive = amdgpu_get_xgmi_hive(adev); | ||
165 | if (!hive) | 176 | if (!hive) |
166 | goto exit; | 177 | return; |
167 | 178 | ||
168 | if (!(hive->number_devices--)) | 179 | if (!(hive->number_devices--)) { |
169 | mutex_destroy(&hive->hive_lock); | 180 | mutex_destroy(&hive->hive_lock); |
170 | 181 | mutex_destroy(&hive->reset_lock); | |
171 | exit: | 182 | } else { |
172 | mutex_unlock(&xgmi_mutex); | 183 | mutex_unlock(&hive->hive_lock); |
184 | } | ||
173 | } | 185 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6151eb9c8ad3..14bc60664159 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | |||
@@ -29,10 +29,11 @@ struct amdgpu_hive_info { | |||
29 | struct list_head device_list; | 29 | struct list_head device_list; |
30 | struct psp_xgmi_topology_info topology_info; | 30 | struct psp_xgmi_topology_info topology_info; |
31 | int number_devices; | 31 | int number_devices; |
32 | struct mutex hive_lock; | 32 | struct mutex hive_lock, |
33 | reset_lock; | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); | 36 | struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); |
36 | int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); | 37 | int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); |
37 | int amdgpu_xgmi_add_device(struct amdgpu_device *adev); | 38 | int amdgpu_xgmi_add_device(struct amdgpu_device *adev); |
38 | void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); | 39 | void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); |