aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c59
1 files changed, 36 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd1b422..49c5a1798ac4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,6 +171,36 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
171 return 0; 171 return 0;
172} 172}
173 173
174/*
175 * Try to write quickly with an atomic kmap. Return true on success.
176 *
177 * If this fails (which includes a partial write), we'll redo the whole
178 * thing with the slow version.
179 *
180 * This is a workaround for the low performance of iounmap (approximate
181 * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels
182 * happens to let us map card memory without taking IPIs. When the vmap
183 * rework lands we should be able to dump this hack.
184 */
185static inline int fast_user_write(unsigned long pfn, char __user *user_data, int l)
186{
187#ifdef CONFIG_HIGHMEM
188 unsigned long unwritten;
189 char *vaddr_atomic;
190
191 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
192#if WATCH_PWRITE
193 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
194 i, o, l, pfn, vaddr_atomic);
195#endif
196 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
197 kunmap_atomic(vaddr_atomic, KM_USER0);
198 return !unwritten;
199#else
200 return 0;
201#endif
202}
203
174static int 204static int
175i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 205i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
176 struct drm_i915_gem_pwrite *args, 206 struct drm_i915_gem_pwrite *args,
@@ -180,12 +210,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
180 ssize_t remain; 210 ssize_t remain;
181 loff_t offset; 211 loff_t offset;
182 char __user *user_data; 212 char __user *user_data;
183 char __iomem *vaddr;
184 char *vaddr_atomic;
185 int i, o, l;
186 int ret = 0; 213 int ret = 0;
187 unsigned long pfn;
188 unsigned long unwritten;
189 214
190 user_data = (char __user *) (uintptr_t) args->data_ptr; 215 user_data = (char __user *) (uintptr_t) args->data_ptr;
191 remain = args->size; 216 remain = args->size;
@@ -209,6 +234,9 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
209 obj_priv->dirty = 1; 234 obj_priv->dirty = 1;
210 235
211 while (remain > 0) { 236 while (remain > 0) {
237 unsigned long pfn;
238 int i, o, l;
239
212 /* Operation in this page 240 /* Operation in this page
213 * 241 *
214 * i = page number 242 * i = page number
@@ -223,25 +251,10 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
223 251
224 pfn = (dev->agp->base >> PAGE_SHIFT) + i; 252 pfn = (dev->agp->base >> PAGE_SHIFT) + i;
225 253
226#ifdef CONFIG_HIGHMEM 254 if (!fast_user_write(pfn, user_data, l)) {
227 /* This is a workaround for the low performance of iounmap 255 unsigned long unwritten;
228 * (approximate 10% cpu cost on normal 3D workloads). 256 char __iomem *vaddr;
229 * kmap_atomic on HIGHMEM kernels happens to let us map card
230 * memory without taking IPIs. When the vmap rework lands
231 * we should be able to dump this hack.
232 */
233 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
234#if WATCH_PWRITE
235 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
236 i, o, l, pfn, vaddr_atomic);
237#endif
238 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o,
239 user_data, l);
240 kunmap_atomic(vaddr_atomic, KM_USER0);
241 257
242 if (unwritten)
243#endif /* CONFIG_HIGHMEM */
244 {
245 vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); 258 vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
246#if WATCH_PWRITE 259#if WATCH_PWRITE
247 DRM_INFO("pwrite slow i %d o %d l %d " 260 DRM_INFO("pwrite slow i %d o %d l %d "