GRU Driver: TLB flushing, MMUOPS callouts

This file contains the functions for handlinf GRU TLB flushing, This includes functions to handle the MMUOPS callouts. Signed-off-by: Jack Steiner <steiner@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Jack Steiner <steiner@sgi.com> 2008-07-30 01:33:59 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-30 12:41:48 -0400
commit: ee5b8feca3af01400e26637209a72fbf137c82ff (patch)
tree: b49588318f0b3de586cd11e9e4aa5581f6666347 /drivers
parent: 1d09d737ab017ff7a9745962e19909713ac89b37 (diff)
1 files changed, 370 insertions, 0 deletions
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
new file mode 100644
index 000000000000..bb6b0e64e101
--- /dev/null
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,370 @@
+/*
+ * SN Platform GRU Driver
+ *
+ *              MMUOPS callbacks  + TLB flushing
+ *
+ * This file handles emu notifier callbacks from the core kernel. The callbacks
+ * are used to update the TLB in the GRU as a result of changes in the
+ * state of a process address space. This file also handles TLB invalidates
+ * from the GRU driver.
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/hugetlb.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/delay.h>
+#include <linux/srcu.h>
+#include <asm/processor.h>
+#include "gru.h"
+#include "grutables.h"
+#include <asm/uv/uv_hub.h>
+#define gru_random()    get_cycles()
+/* ---------------------------------- TLB Invalidation functions --------
+ * get_tgh_handle
+ *
+ * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
+ * local blade, use a fixed TGH that is a function of the blade-local cpu
+ * number. Normally, this TGH is private to the cpu & no contention occurs for
+ * the TGH. For offblade GRUs, select a random TGH in the range above the
+ * private TGHs. A spinlock is required to access this TGH & the lock must be
+ * released when the invalidate is completes. This sucks, but it is the best we
+ * can do.
+ *
+ * Note that the spinlock is IN the TGH handle so locking does not involve
+ * additional cache lines.
+ *
+ */
+static inline int get_off_blade_tgh(struct gru_state *gru)
+{
+        int n;
+        n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
+        n = gru_random() % n;
+        n += gru->gs_tgh_first_remote;
+        return n;
+}
+static inline int get_on_blade_tgh(struct gru_state *gru)
+{
+        return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
+}
+static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
+                                                         *gru)
+{
+        struct gru_tlb_global_handle *tgh;
+        int n;
+        preempt_disable();
+        if (uv_numa_blade_id() == gru->gs_blade_id)
+                n = get_on_blade_tgh(gru);
+        else
+                n = get_off_blade_tgh(gru);
+        tgh = get_tgh_by_index(gru, n);
+        lock_tgh_handle(tgh);
+        return tgh;
+}
+static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
+{
+        unlock_tgh_handle(tgh);
+        preempt_enable();
+}
+/*
+ * gru_flush_tlb_range
+ *
+ * General purpose TLB invalidation function. This function scans every GRU in
+ * the ENTIRE system (partition) looking for GRUs where the specified MM has
+ * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
+ * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
+ * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
+ * cost of (possibly) a large number of future TLBmisses.
+ *
+ * The current algorithm is optimized based on the following (somewhat true)
+ * assumptions:
+ *      - GRU contexts are not loaded into a GRU unless a reference is made to
+ *        the data segment or control block (this is true, not an assumption).
+ *        If a DS/CB is referenced, the user will also issue instructions that
+ *        cause TLBmisses. It is not necessary to optimize for the case where
+ *        contexts are loaded but no instructions cause TLB misses. (I know
+ *        this will happen but I'm not optimizing for it).
+ *      - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
+ *        a few usec but in unusual cases, it could be longer. Avoid if
+ *        possible.
+ *      - intrablade process migration between cpus is not frequent but is
+ *        common.
+ *      - a GRU context is not typically migrated to a different GRU on the
+ *        blade because of intrablade migration
+ *      - interblade migration is rare. Processes migrate their GRU context to
+ *        the new blade.
+ *      - if interblade migration occurs, migration back to the original blade
+ *        is very very rare (ie., no optimization for this case)
+ *      - most GRU instruction operate on a subset of the user REGIONS. Code
+ *        & shared library regions are not likely targets of GRU instructions.
+ *
+ * To help improve the efficiency of TLB invalidation, the GMS data
+ * structure is maintained for EACH address space (MM struct). The GMS is
+ * also the structure that contains the pointer to the mmu callout
+ * functions. This structure is linked to the mm_struct for the address space
+ * using the mmu "register" function. The mmu interfaces are used to
+ * provide the callbacks for TLB invalidation. The GMS contains:
+ *
+ *      - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
+ *        loaded into the GRU.
+ *      - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
+ *        the above array
+ *      - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
+ *        in the GRU for the address space. This bitmap must be passed to the
+ *        GRU to do an invalidate.
+ *
+ * The current algorithm for invalidating TLBs is:
+ *      - scan the asidmap for GRUs where the context has been loaded, ie,
+ *        asid is non-zero.
+ *      - for each gru found:
+ *              - if the ctxtmap is non-zero, there are active contexts in the
+ *                GRU. TLB invalidate instructions must be issued to the GRU.
+ *              - if the ctxtmap is zero, no context is active. Set the ASID to
+ *                zero to force a full TLB invalidation. This is fast but will
+ *                cause a lot of TLB misses if the context is reloaded onto the
+ *                GRU
+ *
+ */
+void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
+                         unsigned long len)
+{
+        struct gru_state *gru;
+        struct gru_mm_tracker *asids;
+        struct gru_tlb_global_handle *tgh;
+        unsigned long num;
+        int grupagesize, pagesize, pageshift, gid, asid;
+        /* ZZZ TODO - handle huge pages */
+        pageshift = PAGE_SHIFT;
+        pagesize = (1UL << pageshift);
+        grupagesize = GRU_PAGESIZE(pageshift);
+        num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
+        STAT(flush_tlb);
+        gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
+                start, len, gms->ms_asidmap[0]);
+        spin_lock(&gms->ms_asid_lock);
+        for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
+                STAT(flush_tlb_gru);
+                gru = GID_TO_GRU(gid);
+                asids = gms->ms_asids + gid;
+                asid = asids->mt_asid;
+                if (asids->mt_ctxbitmap && asid) {
+                        STAT(flush_tlb_gru_tgh);
+                        asid = GRUASID(asid, start);
+                        gru_dbg(grudev,
+        "  FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
+                                gid, asid, num, asids->mt_ctxbitmap);
+                        tgh = get_lock_tgh_handle(gru);
+                        tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
+                                       num - 1, asids->mt_ctxbitmap);
+                        get_unlock_tgh_handle(tgh);
+                } else {
+                        STAT(flush_tlb_gru_zero_asid);
+                        asids->mt_asid = 0;
+                        __clear_bit(gru->gs_gid, gms->ms_asidmap);
+                        gru_dbg(grudev,
+        "  CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
+                                gid, asid, asids->mt_ctxbitmap,
+                                gms->ms_asidmap[0]);
+                }
+        }
+        spin_unlock(&gms->ms_asid_lock);
+}
+/*
+ * Flush the entire TLB on a chiplet.
+ */
+void gru_flush_all_tlb(struct gru_state *gru)
+{
+        struct gru_tlb_global_handle *tgh;
+        gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
+        tgh = get_lock_tgh_handle(gru);
+        tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
+        get_unlock_tgh_handle(tgh);
+        preempt_enable();
+}
+/*
+ * MMUOPS notifier callout functions
+ */
+static void gru_invalidate_range_start(struct mmu_notifier *mn,
+                                       struct mm_struct *mm,
+                                       unsigned long start, unsigned long end)
+{
+        struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+                                                 ms_notifier);
+        STAT(mmu_invalidate_range);
+        atomic_inc(&gms->ms_range_active);
+        gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
+                start, end, atomic_read(&gms->ms_range_active));
+        gru_flush_tlb_range(gms, start, end - start);
+}
+static void gru_invalidate_range_end(struct mmu_notifier *mn,
+                                     struct mm_struct *mm, unsigned long start,
+                                     unsigned long end)
+{
+        struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+                                                 ms_notifier);
+        atomic_dec(&gms->ms_range_active);
+        wake_up_all(&gms->ms_wait_queue);
+        gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
+}
+static void gru_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
+                                unsigned long address)
+{
+        struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+                                                 ms_notifier);
+        STAT(mmu_invalidate_page);
+        gru_flush_tlb_range(gms, address, PAGE_SIZE);
+        gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
+}
+static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+        struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
+                                                 ms_notifier);
+        gms->ms_released = 1;
+        gru_dbg(grudev, "gms %p\n", gms);
+}
+static const struct mmu_notifier_ops gru_mmuops = {
+        .invalidate_page        = gru_invalidate_page,
+        .invalidate_range_start = gru_invalidate_range_start,
+        .invalidate_range_end   = gru_invalidate_range_end,
+        .release                = gru_release,
+};
+/* Move this to the basic mmu_notifier file. But for now... */
+static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm,
+                        const struct mmu_notifier_ops *ops)
+{
+        struct mmu_notifier *mn, *gru_mn = NULL;
+        struct hlist_node *n;
+        if (mm->mmu_notifier_mm) {
+                rcu_read_lock();
+                hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
+                                         hlist)
+                    if (mn->ops == ops) {
+                        gru_mn = mn;
+                        break;
+                }
+                rcu_read_unlock();
+        }
+        return gru_mn;
+}
+struct gru_mm_struct *gru_register_mmu_notifier(void)
+{
+        struct gru_mm_struct *gms;
+        struct mmu_notifier *mn;
+        mn = mmu_find_ops(current->mm, &gru_mmuops);
+        if (mn) {
+                gms = container_of(mn, struct gru_mm_struct, ms_notifier);
+                atomic_inc(&gms->ms_refcnt);
+        } else {
+                gms = kzalloc(sizeof(*gms), GFP_KERNEL);
+                if (gms) {
+                        spin_lock_init(&gms->ms_asid_lock);
+                        gms->ms_notifier.ops = &gru_mmuops;
+                        atomic_set(&gms->ms_refcnt, 1);
+                        init_waitqueue_head(&gms->ms_wait_queue);
+                        __mmu_notifier_register(&gms->ms_notifier, current->mm);
+                }
+        }
+        gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
+                atomic_read(&gms->ms_refcnt));
+        return gms;
+}
+void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
+{
+        gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
+                atomic_read(&gms->ms_refcnt), gms->ms_released);
+        if (atomic_dec_return(&gms->ms_refcnt) == 0) {
+                if (!gms->ms_released)
+                        mmu_notifier_unregister(&gms->ms_notifier, current->mm);
+                kfree(gms);
+        }
+}
+/*
+ * Setup TGH parameters. There are:
+ *      - 24 TGH handles per GRU chiplet
+ *      - a portion (MAX_LOCAL_TGH) of the handles are reserved for
+ *        use by blade-local cpus
+ *      - the rest are used by off-blade cpus. This usage is
+ *        less frequent than blade-local usage.
+ *
+ * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
+ * has less tan or equal to 16 cpus, each cpu has a unique handle that it can
+ * use.
+ */
+#define MAX_LOCAL_TGH   16
+void gru_tgh_flush_init(struct gru_state *gru)
+{
+        int cpus, shift = 0, n;
+        cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
+        /* n = cpus rounded up to next power of 2 */
+        if (cpus) {
+                n = 1 << fls(cpus - 1);
+                /*
+                 * shift count for converting local cpu# to TGH index
+                 *      0 if cpus <= MAX_LOCAL_TGH,
+                 *      1 if cpus <= 2*MAX_LOCAL_TGH,
+                 *      etc
+                 */
+                shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
+        }
+        gru->gs_tgh_local_shift = shift;
+        /* first starting TGH index to use for remote purges */
+        gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
+}
author	Jack Steiner <steiner@sgi.com>	2008-07-30 01:33:59 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-30 12:41:48 -0400
commit	ee5b8feca3af01400e26637209a72fbf137c82ff (patch)
tree	b49588318f0b3de586cd11e9e4aa5581f6666347 /drivers
parent	1d09d737ab017ff7a9745962e19909713ac89b37 (diff)

diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 000000000000..bb6b0e64e101 --- /dev/null +++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -0,0 +1,370 @@
	1	/*
	2	* SN Platform GRU Driver
	3	*
	4	* MMUOPS callbacks + TLB flushing
	5	*
	6	* This file handles emu notifier callbacks from the core kernel. The callbacks
	7	* are used to update the TLB in the GRU as a result of changes in the
	8	* state of a process address space. This file also handles TLB invalidates
	9	* from the GRU driver.
	10	*
	11	* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
	12	*
	13	* This program is free software; you can redistribute it and/or modify
	14	* it under the terms of the GNU General Public License as published by
	15	* the Free Software Foundation; either version 2 of the License, or
	16	* (at your option) any later version.
	17	*
	18	* This program is distributed in the hope that it will be useful,
	19	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	21	* GNU General Public License for more details.
	22	*
	23	* You should have received a copy of the GNU General Public License
	24	* along with this program; if not, write to the Free Software
	25	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	26	*/
	27
	28	#include <linux/kernel.h>
	29	#include <linux/list.h>
	30	#include <linux/spinlock.h>
	31	#include <linux/mm.h>
	32	#include <linux/slab.h>
	33	#include <linux/device.h>
	34	#include <linux/hugetlb.h>
	35	#include <linux/delay.h>
	36	#include <linux/timex.h>
	37	#include <linux/delay.h>
	38	#include <linux/srcu.h>
	39	#include <asm/processor.h>
	40	#include "gru.h"
	41	#include "grutables.h"
	42	#include <asm/uv/uv_hub.h>
	43
	44	#define gru_random() get_cycles()
	45
	46	/* ---------------------------------- TLB Invalidation functions --------
	47	* get_tgh_handle
	48	*
	49	* Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the
	50	* local blade, use a fixed TGH that is a function of the blade-local cpu
	51	* number. Normally, this TGH is private to the cpu & no contention occurs for
	52	* the TGH. For offblade GRUs, select a random TGH in the range above the
	53	* private TGHs. A spinlock is required to access this TGH & the lock must be
	54	* released when the invalidate is completes. This sucks, but it is the best we
	55	* can do.
	56	*
	57	* Note that the spinlock is IN the TGH handle so locking does not involve
	58	* additional cache lines.
	59	*
	60	*/
	61	static inline int get_off_blade_tgh(struct gru_state *gru)
	62	{
	63	int n;
	64
	65	n = GRU_NUM_TGH - gru->gs_tgh_first_remote;
	66	n = gru_random() % n;
	67	n += gru->gs_tgh_first_remote;
	68	return n;
	69	}
	70
	71	static inline int get_on_blade_tgh(struct gru_state *gru)
	72	{
	73	return uv_blade_processor_id() >> gru->gs_tgh_local_shift;
	74	}
	75
	76	static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state
	77	*gru)
	78	{
	79	struct gru_tlb_global_handle *tgh;
	80	int n;
	81
	82	preempt_disable();
	83	if (uv_numa_blade_id() == gru->gs_blade_id)
	84	n = get_on_blade_tgh(gru);
	85	else
	86	n = get_off_blade_tgh(gru);
	87	tgh = get_tgh_by_index(gru, n);
	88	lock_tgh_handle(tgh);
	89
	90	return tgh;
	91	}
	92
	93	static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh)
	94	{
	95	unlock_tgh_handle(tgh);
	96	preempt_enable();
	97	}
	98
	99	/*
	100	* gru_flush_tlb_range
	101	*
	102	* General purpose TLB invalidation function. This function scans every GRU in
	103	* the ENTIRE system (partition) looking for GRUs where the specified MM has
	104	* been accessed by the GRU. For each GRU found, the TLB must be invalidated OR
	105	* the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned
	106	* on the next fault. This effectively flushes the ENTIRE TLB for the MM at the
	107	* cost of (possibly) a large number of future TLBmisses.
	108	*
	109	* The current algorithm is optimized based on the following (somewhat true)
	110	* assumptions:
	111	* - GRU contexts are not loaded into a GRU unless a reference is made to
	112	* the data segment or control block (this is true, not an assumption).
	113	* If a DS/CB is referenced, the user will also issue instructions that
	114	* cause TLBmisses. It is not necessary to optimize for the case where
	115	* contexts are loaded but no instructions cause TLB misses. (I know
	116	* this will happen but I'm not optimizing for it).
	117	* - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally
	118	* a few usec but in unusual cases, it could be longer. Avoid if
	119	* possible.
	120	* - intrablade process migration between cpus is not frequent but is
	121	* common.
	122	* - a GRU context is not typically migrated to a different GRU on the
	123	* blade because of intrablade migration
	124	* - interblade migration is rare. Processes migrate their GRU context to
	125	* the new blade.
	126	* - if interblade migration occurs, migration back to the original blade
	127	* is very very rare (ie., no optimization for this case)
	128	* - most GRU instruction operate on a subset of the user REGIONS. Code
	129	* & shared library regions are not likely targets of GRU instructions.
	130	*
	131	* To help improve the efficiency of TLB invalidation, the GMS data
	132	* structure is maintained for EACH address space (MM struct). The GMS is
	133	* also the structure that contains the pointer to the mmu callout
	134	* functions. This structure is linked to the mm_struct for the address space
	135	* using the mmu "register" function. The mmu interfaces are used to
	136	* provide the callbacks for TLB invalidation. The GMS contains:
	137	*
	138	* - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is
	139	* loaded into the GRU.
	140	* - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in
	141	* the above array
	142	* - ctxbitmap[maxgrus]. Indicates the contexts that are currently active
	143	* in the GRU for the address space. This bitmap must be passed to the
	144	* GRU to do an invalidate.
	145	*
	146	* The current algorithm for invalidating TLBs is:
	147	* - scan the asidmap for GRUs where the context has been loaded, ie,
	148	* asid is non-zero.
	149	* - for each gru found:
	150	* - if the ctxtmap is non-zero, there are active contexts in the
	151	* GRU. TLB invalidate instructions must be issued to the GRU.
	152	* - if the ctxtmap is zero, no context is active. Set the ASID to
	153	* zero to force a full TLB invalidation. This is fast but will
	154	* cause a lot of TLB misses if the context is reloaded onto the
	155	* GRU
	156	*
	157	*/
	158
	159	void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
	160	unsigned long len)
	161	{
	162	struct gru_state *gru;
	163	struct gru_mm_tracker *asids;
	164	struct gru_tlb_global_handle *tgh;
	165	unsigned long num;
	166	int grupagesize, pagesize, pageshift, gid, asid;
	167
	168	/* ZZZ TODO - handle huge pages */
	169	pageshift = PAGE_SHIFT;
	170	pagesize = (1UL << pageshift);
	171	grupagesize = GRU_PAGESIZE(pageshift);
	172	num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL);
	173
	174	STAT(flush_tlb);
	175	gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms,
	176	start, len, gms->ms_asidmap[0]);
	177
	178	spin_lock(&gms->ms_asid_lock);
	179	for_each_gru_in_bitmap(gid, gms->ms_asidmap) {
	180	STAT(flush_tlb_gru);
	181	gru = GID_TO_GRU(gid);
	182	asids = gms->ms_asids + gid;
	183	asid = asids->mt_asid;
	184	if (asids->mt_ctxbitmap && asid) {
	185	STAT(flush_tlb_gru_tgh);
	186	asid = GRUASID(asid, start);
	187	gru_dbg(grudev,
	188	" FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
	189	gid, asid, num, asids->mt_ctxbitmap);
	190	tgh = get_lock_tgh_handle(gru);
	191	tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
	192	num - 1, asids->mt_ctxbitmap);
	193	get_unlock_tgh_handle(tgh);
	194	} else {
	195	STAT(flush_tlb_gru_zero_asid);
	196	asids->mt_asid = 0;
	197	__clear_bit(gru->gs_gid, gms->ms_asidmap);
	198	gru_dbg(grudev,
	199	" CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n",
	200	gid, asid, asids->mt_ctxbitmap,
	201	gms->ms_asidmap[0]);
	202	}
	203	}
	204	spin_unlock(&gms->ms_asid_lock);
	205	}
	206
	207	/*
	208	* Flush the entire TLB on a chiplet.
	209	*/
	210	void gru_flush_all_tlb(struct gru_state *gru)
	211	{
	212	struct gru_tlb_global_handle *tgh;
	213
	214	gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
	215	tgh = get_lock_tgh_handle(gru);
	216	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
	217	get_unlock_tgh_handle(tgh);
	218	preempt_enable();
	219	}
	220
	221	/*
	222	* MMUOPS notifier callout functions
	223	*/
	224	static void gru_invalidate_range_start(struct mmu_notifier *mn,
	225	struct mm_struct *mm,
	226	unsigned long start, unsigned long end)
	227	{
	228	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
	229	ms_notifier);
	230
	231	STAT(mmu_invalidate_range);
	232	atomic_inc(&gms->ms_range_active);
	233	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
	234	start, end, atomic_read(&gms->ms_range_active));
	235	gru_flush_tlb_range(gms, start, end - start);
	236	}
	237
	238	static void gru_invalidate_range_end(struct mmu_notifier *mn,
	239	struct mm_struct *mm, unsigned long start,
	240	unsigned long end)
	241	{
	242	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
	243	ms_notifier);
	244
	245	atomic_dec(&gms->ms_range_active);
	246	wake_up_all(&gms->ms_wait_queue);
	247	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
	248	}
	249
	250	static void gru_invalidate_page(struct mmu_notifier mn, struct mm_struct mm,
	251	unsigned long address)
	252	{
	253	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
	254	ms_notifier);
	255
	256	STAT(mmu_invalidate_page);
	257	gru_flush_tlb_range(gms, address, PAGE_SIZE);
	258	gru_dbg(grudev, "gms %p, address 0x%lx\n", gms, address);
	259	}
	260
	261	static void gru_release(struct mmu_notifier mn, struct mm_struct mm)
	262	{
	263	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
	264	ms_notifier);
	265
	266	gms->ms_released = 1;
	267	gru_dbg(grudev, "gms %p\n", gms);
	268	}
	269
	270
	271	static const struct mmu_notifier_ops gru_mmuops = {
	272	.invalidate_page = gru_invalidate_page,
	273	.invalidate_range_start = gru_invalidate_range_start,
	274	.invalidate_range_end = gru_invalidate_range_end,
	275	.release = gru_release,
	276	};
	277
	278	/* Move this to the basic mmu_notifier file. But for now... */
	279	static struct mmu_notifier mmu_find_ops(struct mm_struct mm,
	280	const struct mmu_notifier_ops *ops)
	281	{
	282	struct mmu_notifier mn, gru_mn = NULL;
	283	struct hlist_node *n;
	284
	285	if (mm->mmu_notifier_mm) {
	286	rcu_read_lock();
	287	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list,
	288	hlist)
	289	if (mn->ops == ops) {
	290	gru_mn = mn;
	291	break;
	292	}
	293	rcu_read_unlock();
	294	}
	295	return gru_mn;
	296	}
	297
	298	struct gru_mm_struct *gru_register_mmu_notifier(void)
	299	{
	300	struct gru_mm_struct *gms;
	301	struct mmu_notifier *mn;
	302
	303	mn = mmu_find_ops(current->mm, &gru_mmuops);
	304	if (mn) {
	305	gms = container_of(mn, struct gru_mm_struct, ms_notifier);
	306	atomic_inc(&gms->ms_refcnt);
	307	} else {
	308	gms = kzalloc(sizeof(*gms), GFP_KERNEL);
	309	if (gms) {
	310	spin_lock_init(&gms->ms_asid_lock);
	311	gms->ms_notifier.ops = &gru_mmuops;
	312	atomic_set(&gms->ms_refcnt, 1);
	313	init_waitqueue_head(&gms->ms_wait_queue);
	314	__mmu_notifier_register(&gms->ms_notifier, current->mm);
	315	}
	316	}
	317	gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
	318	atomic_read(&gms->ms_refcnt));
	319	return gms;
	320	}
	321
	322	void gru_drop_mmu_notifier(struct gru_mm_struct *gms)
	323	{
	324	gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms,
	325	atomic_read(&gms->ms_refcnt), gms->ms_released);
	326	if (atomic_dec_return(&gms->ms_refcnt) == 0) {
	327	if (!gms->ms_released)
	328	mmu_notifier_unregister(&gms->ms_notifier, current->mm);
	329	kfree(gms);
	330	}
	331	}
	332
	333	/*
	334	* Setup TGH parameters. There are:
	335	* - 24 TGH handles per GRU chiplet
	336	* - a portion (MAX_LOCAL_TGH) of the handles are reserved for
	337	* use by blade-local cpus
	338	* - the rest are used by off-blade cpus. This usage is
	339	* less frequent than blade-local usage.
	340	*
	341	* For now, use 16 handles for local flushes, 8 for remote flushes. If the blade
	342	* has less tan or equal to 16 cpus, each cpu has a unique handle that it can
	343	* use.
	344	*/
	345	#define MAX_LOCAL_TGH 16
	346
	347	void gru_tgh_flush_init(struct gru_state *gru)
	348	{
	349	int cpus, shift = 0, n;
	350
	351	cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id);
	352
	353	/* n = cpus rounded up to next power of 2 */
	354	if (cpus) {
	355	n = 1 << fls(cpus - 1);
	356
	357	/*
	358	* shift count for converting local cpu# to TGH index
	359	* 0 if cpus <= MAX_LOCAL_TGH,
	360	* 1 if cpus <= 2*MAX_LOCAL_TGH,
	361	* etc
	362	*/
	363	shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1));
	364	}
	365	gru->gs_tgh_local_shift = shift;
	366
	367	/* first starting TGH index to use for remote purges */
	368	gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift;
	369
	370	}