24 files changed, 355 insertions, 120 deletions
diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 3caa5409..b00ecd31 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -108,8 +108,8 @@ struct nvgpu_clk_notification {
 struct nvgpu_clk_notification_queue {
        u32 size;
-        atomic_t head;
+        nvgpu_atomic_t head;
-        atomic_t tail;
+        nvgpu_atomic_t tail;
        struct nvgpu_clk_notification *notifications;
 };
@@ -183,13 +183,13 @@ struct nvgpu_clk_arb {
        u32 vf_table_index;
        u16 *mclk_f_points;
-        atomic_t req_nr;
+        nvgpu_atomic_t req_nr;
        u32 mclk_f_numpoints;
        u16 *gpc2clk_f_points;
        u32 gpc2clk_f_numpoints;
-        atomic64_t alarm_mask;
+        nvgpu_atomic64_t alarm_mask;
        struct nvgpu_clk_notification_queue notification_queue;
 #ifdef CONFIG_DEBUG_FS
@@ -206,11 +206,11 @@ struct nvgpu_clk_dev {
                struct llist_node node;
        };
        wait_queue_head_t readout_wq;
-        atomic_t poll_mask;
+        nvgpu_atomic_t poll_mask;
        u16 gpc2clk_target_mhz;
        u16 mclk_target_mhz;
        u32 alarms_reported;
-        atomic_t enabled_mask;
+        nvgpu_atomic_t enabled_mask;
        struct nvgpu_clk_notification_queue queue;
        u32 arb_queue_head;
        struct kref refcount;
@@ -253,8 +253,8 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
                return -ENOMEM;
        queue->size = events_number;
-        atomic_set(&queue->head, 0);
+        nvgpu_atomic_set(&queue->head, 0);
-        atomic_set(&queue->tail, 0);
+        nvgpu_atomic_set(&queue->tail, 0);
        return 0;
 }
@@ -263,8 +263,8 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g,
                struct nvgpu_clk_notification_queue *queue) {
        nvgpu_kfree(g, queue->notifications);
        queue->size = 0;
-        atomic_set(&queue->head, 0);
+        nvgpu_atomic_set(&queue->head, 0);
-        atomic_set(&queue->tail, 0);
+        nvgpu_atomic_set(&queue->tail, 0);
 }
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
@@ -346,9 +346,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
        arb->actual = &arb->actual_pool[0];
-        atomic_set(&arb->req_nr, 0);
+        nvgpu_atomic_set(&arb->req_nr, 0);
-        atomic64_set(&arb->alarm_mask, 0);
+        nvgpu_atomic64_set(&arb->alarm_mask, 0);
        err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
                DEFAULT_EVENT_NUMBER);
        if (err < 0)
@@ -388,8 +388,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
                /* Check that first run is completed */
                smp_mb();
                wait_event_interruptible(arb->request_wq,
-                        atomic_read(&arb->req_nr));
+                        nvgpu_atomic_read(&arb->req_nr));
-        } while (!atomic_read(&arb->req_nr));
+        } while (!nvgpu_atomic_read(&arb->req_nr));
        return arb->status;
@@ -430,7 +430,7 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
        u64 new_mask;
        do {
-                current_mask = atomic64_read(&arb->alarm_mask);
+                current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
                /* atomic operations are strong so they do not need masks */
                refcnt = ((u32) (current_mask >> 32)) + 1;
@@ -438,7 +438,7 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
                new_mask = ((u64) refcnt << 32) | alarm_mask;
        } while (unlikely(current_mask !=
-                        (u64)atomic64_cmpxchg(&arb->alarm_mask,
+                        (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
                                        current_mask, new_mask)));
 }
@@ -452,7 +452,7 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
        u64 new_mask;
        do {
-                current_mask = atomic64_read(&arb->alarm_mask);
+                current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
                /* atomic operations are strong so they do not need masks */
                refcnt = ((u32) (current_mask >> 32)) + 1;
@@ -460,7 +460,7 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
                new_mask = ((u64) refcnt << 32) | alarm_mask;
        } while (unlikely(current_mask !=
-                        (u64)atomic64_cmpxchg(&arb->alarm_mask,
+                        (u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
                                                current_mask, new_mask)));
        nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
@@ -537,7 +537,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
        init_waitqueue_head(&dev->readout_wq);
-        atomic_set(&dev->poll_mask, 0);
+        nvgpu_atomic_set(&dev->poll_mask, 0);
        dev->session = session;
        kref_init(&dev->refcount);
@@ -657,11 +657,11 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
         * updated
         */
        if (alarm_mask)
-                atomic_set(&dev->enabled_mask, alarm_mask);
+                nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
        else
-                atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
+                nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
-        dev->arb_queue_head = atomic_read(&arb->notification_queue.head);
+        dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
        nvgpu_spinlock_acquire(&arb->users_lock);
        list_add_tail_rcu(&dev->link, &arb->users);
@@ -1056,7 +1056,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        gk20a_dbg_fn("");
        /* bail out if gpu is down */
-        if (atomic_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
+        if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
                goto exit_arb;
 #ifdef CONFIG_DEBUG_FS
@@ -1247,7 +1247,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
        /* status must be visible before atomic inc */
        smp_wmb();
-        atomic_inc(&arb->req_nr);
+        nvgpu_atomic_inc(&arb->req_nr);
        /* Unlock pstate change for PG */
        nvgpu_mutex_release(&arb->pstate_lock);
@@ -1298,17 +1298,17 @@ exit_arb:
                        EVENT(ALARM_CLOCK_ARBITER_FAILED));
        }
-        current_alarm = (u32) atomic64_read(&arb->alarm_mask);
+        current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
        /* notify completion for all requests */
        head = llist_del_all(&arb->requests);
        llist_for_each_entry_safe(dev, tmp, head, node) {
-                atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
+                nvgpu_atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
                wake_up_interruptible(&dev->readout_wq);
                kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
        }
-        atomic_set(&arb->notification_queue.head,
+        nvgpu_atomic_set(&arb->notification_queue.head,
-                atomic_read(&arb->notification_queue.tail));
+                nvgpu_atomic_read(&arb->notification_queue.tail));
        /* notify event for all users */
        rcu_read_lock();
        list_for_each_entry_rcu(dev, &arb->users, link) {
@@ -1329,7 +1329,7 @@ static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
        u32 queue_index;
        u64 timestamp;
-        queue_index = (atomic_inc_return(&queue->tail)) % queue->size;
+        queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
        /* get current timestamp */
        timestamp = (u64) sched_clock();
@@ -1355,14 +1355,14 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
        size_t size;
        int index;
-        enabled_mask = atomic_read(&dev->enabled_mask);
+        enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
        size = arb->notification_queue.size;
        /* queue global arbiter notifications in buffer */
        do {
-                tail = atomic_read(&arb->notification_queue.tail);
+                tail = nvgpu_atomic_read(&arb->notification_queue.tail);
                /* copy items to the queue */
-                queue_index = atomic_read(&dev->queue.tail);
+                queue_index = nvgpu_atomic_read(&dev->queue.tail);
                head = dev->arb_queue_head;
                head = (tail - head) < arb->notification_queue.size ?
                        head : tail - arb->notification_queue.size;
@@ -1389,10 +1389,10 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
                        queue_alarm_mask |= alarm_detected;
                }
-        } while (unlikely(atomic_read(&arb->notification_queue.tail) !=
+        } while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
                        (int)tail));
-        atomic_set(&dev->queue.tail, queue_index);
+        nvgpu_atomic_set(&dev->queue.tail, queue_index);
        /* update the last notification we processed from global queue */
        dev->arb_queue_head = tail;
@@ -1429,7 +1429,7 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
        }
        if (poll_mask) {
-                atomic_set(&dev->poll_mask, poll_mask);
+                nvgpu_atomic_set(&dev->poll_mask, poll_mask);
                wake_up_interruptible_all(&dev->readout_wq);
        }
@@ -1454,7 +1454,7 @@ static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
                return -EFAULT;
        /* update alarm mask */
-        atomic_set(&dev->enabled_mask, mask);
+        nvgpu_atomic_set(&dev->enabled_mask, mask);
        return 0;
 }
@@ -1539,8 +1539,8 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
        u32 events = 0;
        struct nvgpu_clk_notification *p_notif;
-        tail = atomic_read(&dev->queue.tail);
+        tail = nvgpu_atomic_read(&dev->queue.tail);
-        head = atomic_read(&dev->queue.head);
+        head = nvgpu_atomic_read(&dev->queue.head);
        head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
@@ -1550,7 +1550,7 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
                events |= p_notif->notification;
                info->event_id = ffs(events) - 1;
                info->timestamp = p_notif->timestamp;
-                atomic_set(&dev->queue.head, head);
+                nvgpu_atomic_set(&dev->queue.head, head);
        }
        return events;
@@ -1594,7 +1594,7 @@ static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
        gk20a_dbg_fn("");
        poll_wait(filp, &dev->readout_wq, wait);
-        return atomic_xchg(&dev->poll_mask, 0);
+        return nvgpu_atomic_xchg(&dev->poll_mask, 0);
 }
 static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
index a240a138..1763eb7e 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -167,7 +167,7 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
                                }
                                kref_init(&f->profile.ref);
                        }
-                        atomic_set(&f->profile.get, 0);
+                        atomic_set(&f->profile.get.atomic_var, 0);
                        f->profile.enabled = true;
                }
        }
@@ -246,7 +246,7 @@ static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
                return 0;
        }
-        get = atomic_read(&g->fifo.profile.get);
+        get = atomic_read(&g->fifo.profile.get.atomic_var);
        __gk20a_fifo_create_stats(g, percentiles_ioctl,
                PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
@@ -311,7 +311,7 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
        nvgpu_mutex_init(&g->fifo.profile.lock);
        g->fifo.profile.enabled = false;
-        atomic_set(&g->fifo.profile.get, 0);
+        atomic_set(&g->fifo.profile.get.atomic_var, 0);
        atomic_set(&g->fifo.profile.ref.refcount, 0);
        debugfs_create_file("enable", 0600, profile_root, g,
@@ -342,7 +342,7 @@ struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
        /* If kref is zero, profiling is not enabled */
        if (!kref_get_unless_zero(&f->profile.ref))
                return NULL;
-        index = atomic_inc_return(&f->profile.get);
+        index = atomic_inc_return(&f->profile.get.atomic_var);
        profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
        return profile;
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index ea5b2837..2116053d 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -197,7 +197,7 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
        WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+        before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
        addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
        if (!addr) {
@@ -394,7 +394,7 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
                was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
                nvgpu_list_add_tail(&mem->clear_list_entry,
                              &g->mm.vidmem.clear_list_head);
-                atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+                atomic64_add(mem->size, &g->mm.vidmem.bytes_pending.atomic_var);
                nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
                if (was_empty) {
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index bfbe7a58..f5c6ca1f 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -68,13 +68,13 @@ int gk20a_busy(struct gk20a *g)
        if (!g)
                return -ENODEV;
-        atomic_inc(&g->usage_count);
+        atomic_inc(&g->usage_count.atomic_var);
        down_read(&g->busy_lock);
        if (!gk20a_can_busy(g)) {
                ret = -ENODEV;
-                atomic_dec(&g->usage_count);
+                atomic_dec(&g->usage_count.atomic_var);
                goto fail;
        }
@@ -87,7 +87,7 @@ int gk20a_busy(struct gk20a *g)
                        /* Mark suspended so runtime pm will retry later */
                        pm_runtime_set_suspended(dev);
                        pm_runtime_put_noidle(dev);
-                        atomic_dec(&g->usage_count);
+                        atomic_dec(&g->usage_count.atomic_var);
                        goto fail;
                }
        } else {
@@ -97,7 +97,7 @@ int gk20a_busy(struct gk20a *g)
                                vgpu_pm_finalize_poweron(dev)
                                : gk20a_pm_finalize_poweron(dev);
                        if (ret) {
-                                atomic_dec(&g->usage_count);
+                                atomic_dec(&g->usage_count.atomic_var);
                                nvgpu_mutex_release(&g->poweron_lock);
                                goto fail;
                        }
@@ -120,7 +120,7 @@ void gk20a_idle(struct gk20a *g)
 {
        struct device *dev;
-        atomic_dec(&g->usage_count);
+        atomic_dec(&g->usage_count.atomic_var);
        dev = dev_from_gk20a(g);
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 2a569efd..eeb86095 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -65,7 +65,9 @@ static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len)
                ret = cmpxchg(&pa->head, head, new_head);
                if (ret == head) {
                        addr = pa->base + head * pa->blk_size;
-                        atomic_inc(&pa->nr_allocs);
+                        nvgpu_atomic_inc(&pa->nr_allocs);
+                        alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
+                                  addr);
                        break;
                }
                head = ACCESS_ONCE(pa->head);
@@ -94,7 +96,8 @@ static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr)
                ACCESS_ONCE(pa->next[cur_idx]) = head;
                ret = cmpxchg(&pa->head, head, cur_idx);
                if (ret == head) {
-                        atomic_dec(&pa->nr_allocs);
+                        nvgpu_atomic_dec(&pa->nr_allocs);
+                        alloc_dbg(a, "Free node # %llu\n", cur_idx);
                        break;
                }
        }
@@ -125,9 +128,9 @@ static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a,
        /* Actual stats. */
        __alloc_pstat(s, a, "Stats:\n");
        __alloc_pstat(s, a, "  Number allocs = %d\n",
-                      atomic_read(&pa->nr_allocs));
+                      nvgpu_atomic_read(&pa->nr_allocs));
        __alloc_pstat(s, a, "  Number free   = %d\n",
-                      pa->nr_nodes - atomic_read(&pa->nr_allocs));
+                      pa->nr_nodes - nvgpu_atomic_read(&pa->nr_allocs));
 }
 #endif
@@ -193,7 +196,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
        a->blk_size = blk_size;
        a->nr_nodes = nr_nodes;
        a->flags = flags;
-        atomic_set(&a->nr_allocs, 0);
+        nvgpu_atomic_set(&a->nr_allocs, 0);
        wmb();
        a->inited = true;
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
index 32421ac1..c527bff9 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016 - 2017, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -109,7 +109,7 @@ struct nvgpu_lockless_allocator {
        bool inited;
        /* Statistics */
-        atomic_t nr_allocs;
+        nvgpu_atomic_t nr_allocs;
 };
 static inline struct nvgpu_lockless_allocator *lockless_allocator(
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 3e916b9d..ac45aaaa 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -364,7 +364,7 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
        hw_sema->p = p;
        hw_sema->idx = hw_sema_idx;
        hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
-        atomic_set(&hw_sema->next_value, 0);
+        nvgpu_atomic_set(&hw_sema->next_value, 0);
        nvgpu_init_list_node(&hw_sema->hw_sema_list);
        nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0);
@@ -425,7 +425,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
        kref_init(&s->ref);
        s->hw_sema = ch->hw_sema;
-        atomic_set(&s->value, 0);
+        nvgpu_atomic_set(&s->value, 0);
        /*
         * Take a ref on the pool so that we can keep this pool alive for
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 62b312b2..d96872f3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -100,7 +100,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
                ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a,
                                                          free_chs);
                nvgpu_list_del(&ch->free_chs);
-                WARN_ON(atomic_read(&ch->ref_count));
+                WARN_ON(nvgpu_atomic_read(&ch->ref_count));
                WARN_ON(ch->referenceable);
                f->used_channels++;
        }
@@ -394,20 +394,20 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 }
 static void gk20a_wait_until_counter_is_N(
-        struct channel_gk20a *ch, atomic_t *counter, int wait_value,
+        struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
        struct nvgpu_cond *c, const char *caller, const char *counter_name)
 {
        while (true) {
                if (NVGPU_COND_WAIT(
                            c,
-                            atomic_read(counter) == wait_value,
+                            nvgpu_atomic_read(counter) == wait_value,
                            5000) == 0)
                        break;
                nvgpu_warn(ch->g,
                           "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
                           caller, ch->chid, counter_name,
-                           atomic_read(counter), wait_value);
+                           nvgpu_atomic_read(counter), wait_value);
                gk20a_channel_dump_ref_actions(ch);
        }
@@ -491,7 +491,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        nvgpu_spinlock_release(&ch->ref_obtain_lock);
        /* matches with the initial reference in gk20a_open_new_channel() */
-        atomic_dec(&ch->ref_count);
+        nvgpu_atomic_dec(&ch->ref_count);
        /* wait until no more refs to the channel */
        if (!force)
@@ -635,7 +635,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
        nvgpu_spinlock_acquire(&ch->ref_actions_lock);
        dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
-                        ch->chid, atomic_read(&ch->ref_count));
+                        ch->chid, nvgpu_atomic_read(&ch->ref_count));
        /* start at the oldest possible entry. put is next insertion point */
        get = ch->ref_actions_put;
@@ -709,7 +709,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
        if (likely(ch->referenceable)) {
                gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
-                atomic_inc(&ch->ref_count);
+                nvgpu_atomic_inc(&ch->ref_count);
                ret = ch;
        } else
                ret = NULL;
@@ -726,17 +726,17 @@ void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
 {
        gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
        trace_gk20a_channel_put(ch->chid, caller);
-        atomic_dec(&ch->ref_count);
+        nvgpu_atomic_dec(&ch->ref_count);
        nvgpu_cond_broadcast(&ch->ref_count_dec_wq);
        /* More puts than gets. Channel is probably going to get
         * stuck. */
-        WARN_ON(atomic_read(&ch->ref_count) < 0);
+        WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
        /* Also, more puts than gets. ref_count can go to 0 only if
         * the channel is closing. Channel is probably going to get
         * stuck. */
-        WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
+        WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
 }
 void gk20a_channel_close(struct channel_gk20a *ch)
@@ -879,7 +879,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
         * references. The initial reference will be decreased in
         * gk20a_free_channel() */
        ch->referenceable = true;
-        atomic_set(&ch->ref_count, 1);
+        nvgpu_atomic_set(&ch->ref_count, 1);
        wmb();
        return ch;
@@ -1745,7 +1745,7 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
         * pair.
         */
-        put = atomic_inc_return(&g->channel_worker.put);
+        put = nvgpu_atomic_inc_return(&g->channel_worker.put);
        nvgpu_cond_signal(&g->channel_worker.wq);
        return put;
@@ -1761,7 +1761,7 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
 */
 static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
 {
-        bool pending = atomic_read(&g->channel_worker.put) != get;
+        bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
        /*
         * This would be the place for a rmb() pairing a wmb() for a wakeup
@@ -1864,7 +1864,7 @@ int nvgpu_channel_worker_init(struct gk20a *g)
        int err;
        char thread_name[64];
-        atomic_set(&g->channel_worker.put, 0);
+        nvgpu_atomic_set(&g->channel_worker.put, 0);
        nvgpu_cond_init(&g->channel_worker.wq);
        nvgpu_init_list_node(&g->channel_worker.items);
        nvgpu_spinlock_init(&g->channel_worker.items_lock);
@@ -2086,7 +2086,8 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                        if (g->aggressive_sync_destroy_thresh) {
                                nvgpu_mutex_acquire(&c->sync_lock);
-                                if (atomic_dec_and_test(&c->sync->refcount) &&
+                                if (nvgpu_atomic_dec_and_test(
+                                        &c->sync->refcount) &&
                                                g->aggressive_sync_destroy) {
                                        gk20a_channel_sync_destroy(c->sync);
                                        c->sync = NULL;
@@ -2321,7 +2322,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
                        }
                        new_sync_created = true;
                }
-                atomic_inc(&c->sync->refcount);
+                nvgpu_atomic_inc(&c->sync->refcount);
                nvgpu_mutex_release(&c->sync_lock);
        }
@@ -2774,9 +2775,9 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
        c->g = NULL;
        c->chid = chid;
-        atomic_set(&c->bound, false);
+        nvgpu_atomic_set(&c->bound, false);
        nvgpu_spinlock_init(&c->ref_obtain_lock);
-        atomic_set(&c->ref_count, 0);
+        nvgpu_atomic_set(&c->ref_count, 0);
        c->referenceable = false;
        nvgpu_cond_init(&c->ref_count_dec_wq);
@@ -2935,7 +2936,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
        for (chid = 0; chid < f->num_channels; chid++) {
                struct channel_gk20a *c = g->fifo.channel+chid;
                if (gk20a_channel_get(c)) {
-                        if (atomic_read(&c->bound)) {
+                        if (nvgpu_atomic_read(&c->bound)) {
                                nvgpu_cond_broadcast_interruptible(
                                                &c->semaphore_wq);
                                if (post_events) {
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index a9ccd93f..f022e630 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -24,6 +24,7 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/cond.h>
+#include <nvgpu/atomic.h>
 struct gk20a;
 struct gr_gk20a;
@@ -173,7 +174,7 @@ struct channel_gk20a {
        struct nvgpu_spinlock ref_obtain_lock;
        bool referenceable;
-        atomic_t ref_count;
+        nvgpu_atomic_t ref_count;
        struct nvgpu_cond ref_count_dec_wq;
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
        /*
@@ -191,7 +192,7 @@ struct channel_gk20a {
        int chid;
        bool wdt_enabled;
-        atomic_t bound;
+        nvgpu_atomic_t bound;
        bool first_init;
        bool vpr;
        bool deterministic;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index c9c03d37..aa340ba6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -346,7 +346,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
        nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
-        atomic_set(&sp->ops.refcount, 0);
+        nvgpu_atomic_set(&sp->ops.refcount, 0);
        sp->ops.wait_syncpt             = gk20a_channel_syncpt_wait_syncpt;
        sp->ops.wait_fd                 = gk20a_channel_syncpt_wait_fd;
        sp->ops.incr                    = gk20a_channel_syncpt_incr;
@@ -619,7 +619,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
                return err;
        nvgpu_semaphore_get(sema);
-        BUG_ON(!atomic_read(&sema->value));
+        BUG_ON(!nvgpu_atomic_read(&sema->value));
        add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
        /*
@@ -922,7 +922,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
                return NULL;
        }
 #endif
-        atomic_set(&sema->ops.refcount, 0);
+        nvgpu_atomic_set(&sema->ops.refcount, 0);
        sema->ops.wait_syncpt   = gk20a_channel_semaphore_wait_syncpt;
        sema->ops.wait_fd       = gk20a_channel_semaphore_wait_fd;
        sema->ops.incr          = gk20a_channel_semaphore_incr;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 4efd1b76..9bdc5d12 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -25,7 +25,7 @@ struct gk20a_fence;
 struct gk20a;
 struct gk20a_channel_sync {
-        atomic_t refcount;
+        nvgpu_atomic_t refcount;
        /* Generate a gpu wait cmdbuf from syncpoint.
         * Returns
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index cc05ceff..546917f1 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -47,7 +47,7 @@ struct gk20a_ctxsw_dev {
        size_t size;
        u32 num_ents;
-        atomic_t vma_ref;
+        nvgpu_atomic_t vma_ref;
        struct nvgpu_mutex write_lock;
 };
@@ -152,7 +152,7 @@ static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
        void *buf;
        int err;
-        if ((dev->write_enabled) || (atomic_read(&dev->vma_ref)))
+        if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
                return -EBUSY;
        err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
@@ -438,18 +438,18 @@ static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
 {
        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        atomic_inc(&dev->vma_ref);
+        nvgpu_atomic_inc(&dev->vma_ref);
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                atomic_read(&dev->vma_ref));
+                nvgpu_atomic_read(&dev->vma_ref));
 }
 static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
 {
        struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
-        atomic_dec(&dev->vma_ref);
+        nvgpu_atomic_dec(&dev->vma_ref);
        gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-                atomic_read(&dev->vma_ref));
+                nvgpu_atomic_read(&dev->vma_ref));
 }
 static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
@@ -497,7 +497,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g)
                err = nvgpu_mutex_init(&dev->write_lock);
                if (err)
                        return err;
-                atomic_set(&dev->vma_ref, 0);
+                nvgpu_atomic_set(&dev->vma_ref, 0);
                dev++;
        }
        return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 1572ff48..00050850 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/vm.h>
+#include <nvgpu/atomic.h>
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -74,10 +75,10 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 }
 /* silly allocator - just increment id */
-static atomic_t unique_id = ATOMIC_INIT(0);
+static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
 static int generate_unique_id(void)
 {
-        return atomic_add_return(1, &unique_id);
+        return nvgpu_atomic_add_return(1, &unique_id);
 }
 static int alloc_session(struct gk20a *g, struct dbg_session_gk20a **_dbg_s)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index abd455d7..47e7d82e 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3439,7 +3439,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
                gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
                                   "next_val: 0x%08x addr: 0x%010llx\n",
                                   __nvgpu_semaphore_read(hw_sema),
-                                   atomic_read(&hw_sema->next_value),
+                                   nvgpu_atomic_read(&hw_sema->next_value),
                                   nvgpu_hw_sema_addr(hw_sema));
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
@@ -3489,7 +3489,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
                        continue;
                ch_state[chid]->pid = ch->pid;
-                ch_state[chid]->refs = atomic_read(&ch->ref_count);
+                ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
                ch_state[chid]->deterministic = ch->deterministic;
                nvgpu_mem_rd_n(g, &ch->inst_block, 0,
                                &ch_state[chid]->inst_block[0],
@@ -3591,7 +3591,7 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
        gk20a_dbg_fn("");
-        if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
+        if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
                gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
                        ccsr_channel_inst_ptr_f(0) |
                        ccsr_channel_inst_bind_false_f());
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index b19a7b68..a6eae8ca 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -156,7 +156,7 @@ struct fifo_gk20a {
 #ifdef CONFIG_DEBUG_FS
        struct {
                struct fifo_profile_gk20a *data;
-                atomic_t get;
+                nvgpu_atomic_t get;
                bool enabled;
                u64 *sorted;
                struct kref ref;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index c50d800f..550b22c0 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -373,13 +373,13 @@ int gk20a_wait_for_idle(struct gk20a *g)
        if (g->user_railgate_disabled)
                target_usage_count = 1;
-        while ((atomic_read(&g->usage_count) != target_usage_count)
+        while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
                        && (wait_length-- >= 0))
                nvgpu_msleep(20);
        if (wait_length < 0) {
                pr_warn("%s: Timed out waiting for idle (%d)!\n",
-                        __func__, atomic_read(&g->usage_count));
+                        __func__, nvgpu_atomic_read(&g->usage_count));
                return -ETIMEDOUT;
        }
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4878fdd6..47fd3aef 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -48,6 +48,7 @@ struct nvgpu_cpu_time_correlation_sample;
 #include <nvgpu/kref.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/pmu.h>
+#include <nvgpu/atomic.h>
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
@@ -1038,7 +1039,7 @@ struct gk20a {
         */
        unsigned long *enabled_flags;
-        atomic_t usage_count;
+        nvgpu_atomic_t usage_count;
        struct kref refcount;
@@ -1205,7 +1206,7 @@ struct gk20a {
        struct gk20a_channel_worker {
                struct nvgpu_thread poll_task;
-                atomic_t put;
+                nvgpu_atomic_t put;
                struct nvgpu_cond wq;
                struct nvgpu_list_node items;
                struct nvgpu_spinlock items_lock;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 16fe7149..e21be1e5 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -617,7 +617,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
        nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
        INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
-        atomic64_set(&mm->vidmem.bytes_pending, 0);
+        nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
        nvgpu_init_list_node(&mm->vidmem.clear_list_head);
        nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
@@ -1165,7 +1165,7 @@ int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
        nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
        *space = nvgpu_alloc_space(allocator) +
-                atomic64_read(&g->mm.vidmem.bytes_pending);
+                nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
        nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
        return 0;
 #else
@@ -1483,7 +1483,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
                           (u64)get_vidmem_page_alloc(mem->priv.sgt->sgl));
                nvgpu_free_sgtable(g, &mem->priv.sgt);
-                WARN_ON(atomic64_sub_return(mem->size,
+                WARN_ON(nvgpu_atomic64_sub_return(mem->size,
                                        &g->mm.vidmem.bytes_pending) < 0);
                mem->size = 0;
                mem->aperture = APERTURE_INVALID;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7b2c0dfc..af176a73 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -30,6 +30,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
+#include <nvgpu/atomic.h>
 struct nvgpu_pd_cache;
@@ -283,7 +284,7 @@ struct mm_gk20a {
                struct nvgpu_mutex clear_list_mutex;
                struct work_struct clear_mem_worker;
-                atomic64_t bytes_pending;
+                nvgpu_atomic64_t bytes_pending;
        } vidmem;
 };
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index afdfba91..e688c863 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -51,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
                 ~ccsr_channel_enable_set_f(~0)) |
                 ccsr_channel_enable_set_true_f());
        wmb();
-        atomic_set(&c->bound, true);
+        nvgpu_atomic_set(&c->bound, true);
 }
 static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/atomic.h b/drivers/gpu/nvgpu/include/nvgpu/atomic.h
index 700e29fe..c7a5fcd9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/atomic.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/atomic.h
@@ -9,12 +9,102 @@
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #ifndef __NVGPU_ATOMIC_H__
 #define __NVGPU_ATOMIC_H__
-#ifdef __KERNEL__
+#include <nvgpu/linux/atomic.h>
-#include <linux/atomic.h>
-#endif
+#define NVGPU_ATOMIC_INIT(i)    __nvgpu_atomic_init(i)
+#define NVGPU_ATOMIC64_INIT(i)  __nvgpu_atomic64_init(i)
+static inline void nvgpu_atomic_set(nvgpu_atomic_t *v, int i)
+{
+        __nvgpu_atomic_set(v, i);
+}
+static inline int nvgpu_atomic_read(nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_read(v);
+}
+static inline void nvgpu_atomic_inc(nvgpu_atomic_t *v)
+{
+        __nvgpu_atomic_inc(v);
+}
+static inline int nvgpu_atomic_inc_return(nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_inc_return(v);
+}
+static inline void nvgpu_atomic_dec(nvgpu_atomic_t *v)
+{
+         __nvgpu_atomic_dec(v);
+}
+static inline int nvgpu_atomic_dec_return(nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_dec_return(v);
+}
+static inline int nvgpu_atomic_cmpxchg(nvgpu_atomic_t *v, int old, int new)
+{
+        return __nvgpu_atomic_cmpxchg(v, old, new);
+}
+static inline int nvgpu_atomic_xchg(nvgpu_atomic_t *v, int new)
+{
+        return __nvgpu_atomic_xchg(v, new);
+}
+static inline bool nvgpu_atomic_inc_and_test(nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_inc_and_test(v);
+}
+static inline bool nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_dec_and_test(v);
+}
+static inline int nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
+{
+        return __nvgpu_atomic_add_return(i, v);
+}
+static inline void nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
+{
+        return  __nvgpu_atomic64_set(v, i);
+}
+static inline long nvgpu_atomic64_read(nvgpu_atomic64_t *v)
+{
+        return  __nvgpu_atomic64_read(v);
+}
+static inline void nvgpu_atomic64_add(long x, nvgpu_atomic64_t *v)
+{
+        __nvgpu_atomic64_add(x, v);
+}
+static inline void nvgpu_atomic64_inc(nvgpu_atomic64_t *v)
+{
+        __nvgpu_atomic64_inc(v);
+}
+static inline long nvgpu_atomic64_inc_return(nvgpu_atomic64_t *v)
+{
+        return __nvgpu_atomic64_inc_return(v);
+}
+static inline void nvgpu_atomic64_dec(nvgpu_atomic64_t *v)
+{
+        __nvgpu_atomic64_dec(v);
+}
+static inline void nvgpu_atomic64_dec_return(nvgpu_atomic64_t *v)
+{
+        __nvgpu_atomic64_dec_return(v);
+}
+static inline long nvgpu_atomic64_cmpxchg(nvgpu_atomic64_t *v, long old,
+                                        long new)
+{
+        return __nvgpu_atomic64_cmpxchg(v, old, new);
+}
+static inline void nvgpu_atomic64_sub(long x, nvgpu_atomic64_t *v)
+{
+        __nvgpu_atomic64_sub(x, v);
+}
+static inline long nvgpu_atomic64_sub_return(long x, nvgpu_atomic64_t *v)
+{
+        return __nvgpu_atomic64_sub_return(x, v);
+}
-#endif
+#endif /* __NVGPU_ATOMIC_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h b/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
new file mode 100644
index 00000000..c6dd4650
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_ATOMIC_LINUX_H__
+#define __NVGPU_ATOMIC_LINUX_H__
+#include <linux/atomic.h>
+typedef struct nvgpu_atomic {
+  atomic_t atomic_var;
+} nvgpu_atomic_t;
+typedef struct nvgpu_atomic64 {
+  atomic64_t atomic_var;
+} nvgpu_atomic64_t;
+#define __nvgpu_atomic_init(i)  { ATOMIC_INIT(i) }
+#define __nvgpu_atomic64_init(i)        { ATOMIC64_INIT(i) }
+static inline void __nvgpu_atomic_set(nvgpu_atomic_t *v, int i)
+{
+        atomic_set(&v->atomic_var, i);
+}
+static inline int __nvgpu_atomic_read(nvgpu_atomic_t *v)
+{
+        return atomic_read(&v->atomic_var);
+}
+static inline void __nvgpu_atomic_inc(nvgpu_atomic_t *v)
+{
+        atomic_inc(&v->atomic_var);
+}
+static inline int __nvgpu_atomic_inc_return(nvgpu_atomic_t *v)
+{
+        return atomic_inc_return(&v->atomic_var);
+}
+static inline void __nvgpu_atomic_dec(nvgpu_atomic_t *v)
+{
+        atomic_dec(&v->atomic_var);
+}
+static inline int __nvgpu_atomic_dec_return(nvgpu_atomic_t *v)
+{
+        return atomic_dec_return(&v->atomic_var);
+}
+static inline int __nvgpu_atomic_cmpxchg(nvgpu_atomic_t *v, int old, int new)
+{
+        return atomic_cmpxchg(&v->atomic_var, old, new);
+}
+static inline int __nvgpu_atomic_xchg(nvgpu_atomic_t *v, int new)
+{
+        return atomic_xchg(&v->atomic_var, new);
+}
+static inline bool __nvgpu_atomic_inc_and_test(nvgpu_atomic_t *v)
+{
+        return atomic_inc_and_test(&v->atomic_var);
+}
+static inline bool __nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
+{
+        return atomic_dec_and_test(&v->atomic_var);
+}
+static inline int __nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
+{
+        return atomic_add_return(i, &v->atomic_var);
+}
+static inline void __nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
+{
+        atomic64_set(&v->atomic_var, i);
+}
+static inline long __nvgpu_atomic64_read(nvgpu_atomic64_t *v)
+{
+        return atomic64_read(&v->atomic_var);
+}
+static inline void __nvgpu_atomic64_add(long x, nvgpu_atomic64_t *v)
+{
+        atomic64_add(x, &v->atomic_var);
+}
+static inline void __nvgpu_atomic64_inc(nvgpu_atomic64_t *v)
+{
+        atomic64_inc(&v->atomic_var);
+}
+static inline long __nvgpu_atomic64_inc_return(nvgpu_atomic64_t *v)
+{
+        return atomic64_inc_return(&v->atomic_var);
+}
+static inline void __nvgpu_atomic64_dec(nvgpu_atomic64_t *v)
+{
+        atomic64_dec(&v->atomic_var);
+}
+static inline void __nvgpu_atomic64_dec_return(nvgpu_atomic64_t *v)
+{
+        atomic64_dec_return(&v->atomic_var);
+}
+static inline long __nvgpu_atomic64_cmpxchg(nvgpu_atomic64_t *v,
+                                        long old, long new)
+{
+        return atomic64_cmpxchg(&v->atomic_var, old, new);
+}
+static inline void __nvgpu_atomic64_sub(long x, nvgpu_atomic64_t *v)
+{
+        atomic64_sub(x, &v->atomic_var);
+}
+static inline long __nvgpu_atomic64_sub_return(long x, nvgpu_atomic64_t *v)
+{
+        return atomic64_sub_return(x, &v->atomic_var);
+}
+#endif /*__NVGPU_ATOMIC_LINUX_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index faa8d945..90261d81 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -48,7 +48,7 @@ struct nvgpu_semaphore_sea;
 struct nvgpu_semaphore_int {
        int idx;                        /* Semaphore index. */
        u32 offset;                     /* Offset into the pool. */
-        atomic_t next_value;            /* Next available value. */
+        nvgpu_atomic_t next_value;      /* Next available value. */
        u32 nr_incrs;                   /* Number of increments programmed. */
        struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */
        struct channel_gk20a *ch;       /* Channel that owns this sema. */
@@ -70,7 +70,7 @@ nvgpu_semaphore_int_from_hw_sema_list(struct nvgpu_list_node *node)
 struct nvgpu_semaphore {
        struct nvgpu_semaphore_int *hw_sema;
-        atomic_t value;
+        nvgpu_atomic_t value;
        int incremented;
        struct kref ref;
@@ -242,7 +242,7 @@ static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
         * the value of the semaphore then the semaphore has been signaled
         * (a.k.a. released).
         */
-        return (int)sema_val >= atomic_read(&s->value);
+        return (int)sema_val >= nvgpu_atomic_read(&s->value);
 }
 static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
@@ -252,12 +252,12 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
 {
-        return (u32)atomic_read(&s->value);
+        return (u32)nvgpu_atomic_read(&s->value);
 }
 static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
 {
-        return (u32)atomic_read(&s->hw_sema->next_value);
+        return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
 }
 /*
@@ -320,7 +320,7 @@ static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s)
 {
        BUG_ON(s->incremented);
-        atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
+        nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value));
        s->incremented = 1;
        gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a,
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index f1ae2f1f..c8519905 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -43,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch)
        WARN_ON(err || msg.ret);
        wmb();
-        atomic_set(&ch->bound, true);
+        nvgpu_atomic_set(&ch->bound, true);
 }
 static void vgpu_channel_unbind(struct channel_gk20a *ch)
@@ -51,7 +51,7 @@ static void vgpu_channel_unbind(struct channel_gk20a *ch)
        gk20a_dbg_fn("");
-        if (atomic_cmpxchg(&ch->bound, true, false)) {
+        if (nvgpu_atomic_cmpxchg(&ch->bound, true, false)) {
                struct tegra_vgpu_cmd_msg msg;
                struct tegra_vgpu_channel_config_params *p =
                                &msg.params.channel_config;
@@ -425,7 +425,7 @@ static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 chid)
        gk20a_dbg_fn("");
-        if (!atomic_read(&ch->bound))
+        if (!nvgpu_atomic_read(&ch->bound))
                return 0;
        msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;