path: root/litmus/lockdown.c



#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/sysctl.h>
#include <linux/mutex.h>
#include <linux/math64.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/sched.h>

#include <litmus/litmus.h>
#include <litmus/clock.h>

#ifdef CONFIG_CPU_V7
#include <asm/hardware/cache-l2x0.h>
#include <asm/cacheflush.h>
#endif

#include <litmus/color.h>
#include <litmus/debug_trace.h>
#include <litmus/lockdown.h>

#ifndef litmus_cycles_to_ns
#define litmus_cycles_to_ns(x) 0
#endif

#define MAX_NR_WAYS	16

/*
 * unlocked_way[i] : allocation can occur in way i
 *
 * 0 = allocation can occur in the corresponding way
 * 1 = allocation cannot occur in the corresponding way
 */
u32 unlocked_way[MAX_NR_WAYS]  = {
	0xFFFFFFFE, /* way 0 unlocked */
	0xFFFFFFFD,
	0xFFFFFFFB,
	0xFFFFFFF7,
	0xFFFFFFEF, /* way 4 unlocked */
	0xFFFFFFDF,
	0xFFFFFFBF,
	0xFFFFFF7F,
	0xFFFFFEFF, /* way 8 unlocked */
	0xFFFFFDFF,
	0xFFFFFBFF,
	0xFFFFF7FF,
	0xFFFFEFFF, /* way 12 unlocked */
	0xFFFFDFFF,
	0xFFFFBFFF,
	0xFFFF7FFF,
};

#ifndef CONFIG_CPU_V7
u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end)
{
	TRACE_CUR("Dummy read_in_mem: lock_val: 0x%x  unlock_val: 0x%x  "
			"start: 0x%p  end: 0x%p\n", lock_val, unlock_val,
			start, end);
	return 0;
}

void set_lockdown(u32 lockdown_state)
{
	TRACE_CUR("Dummy set_lockdown function lockdown_state: 0x%x\n",
			lockdown_state);
}

void litmus_setup_lockdown(void __iomem *base, u32 id)
{
	printk("LITMUS^RT Dummy Lockdown\n");
}

#else

static void __iomem *cache_base;
static void __iomem *lockreg_d;
static void __iomem *lockreg_i;
static raw_spinlock_t prefetch_lock;

static u32 cache_id;
static int nr_lockregs;

struct mutex actlr_mutex;
struct mutex l2x0_prefetch_mutex;
struct mutex lockdown_proc;

#define ld_d_reg(cpu) ({ int __cpu = cpu; \
			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
#define ld_i_reg(cpu) ({ int __cpu = cpu; \
			void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_I_BASE + \
			__cpu * L2X0_LOCKDOWN_STRIDE; __v; })

void set_lockdown(u32 lockdown_state)
{
	writel_relaxed(lockdown_state, lockreg_d);
}

u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end)
{
	u32 v = 0;

	__asm__ __volatile__ (
"	.align 5\n"
"	str	%[lockval], [%[cachereg]]\n"
"1:	ldr	%[val], [%[addr]], #32		@ 32 bytes = 1 cache line\n"
"	cmp	%[end], %[addr]			@ subtracts addr from end\n"
"	bgt	1b\n				@ read more, if necessary\n"
	: [addr] "+r" (start),
	  [val] "+r" (v)
	: [end] "r" (end),
	  [cachereg] "r" (lockreg_d),
	  [lockval] "r" (lock_val)
	: "cc");

	return v;
}


/*
 * Prefetch by reading the first word of each cache line in a page.
 *
 * @lockdown_reg: address of the lockdown register to write
 * @lock_val: value to be written to @lockdown_reg
 * @unlock_val: will unlock the cache to this value
 * @addr: start address to be prefetched
 * @end_addr: end address to prefetch (exclusive)
 *
 * Assumes: addr < end_addr AND addr != end_addr
 */
u32 color_read_in_mem_lock(u32 lock_val, u32 unlock_val, void *start, void *end)
{
	unsigned long flags;
	u32 v = 0;

	raw_spin_lock_irqsave(&prefetch_lock, flags);
	__asm__ __volatile__ (
"	.align 5\n"
"	str	%[lockval], [%[cachereg]]\n"
"1:	ldr	%[val], [%[addr]], #32		@ 32 bytes = 1 cache line\n"
"	cmp	%[end], %[addr]			@ subtracts addr from end\n"
"	bgt	1b\n				@ read more, if necessary\n"
"	str	%[unlockval], [%[cachereg]]\n"
	: [addr] "+r" (start),
	  [val] "+r" (v)
	: [end] "r" (end),
	  [cachereg] "r" (lockreg_d),
	  [lockval] "r" (lock_val),
	  [unlockval] "r" (unlock_val)
	: "cc");
	raw_spin_unlock_irqrestore(&prefetch_lock, flags);

	return v;
}

/*
 * Ensure that this page is not in the L1 or L2 cache.
 * Since the L1 cache is VIPT and the L2 cache is PIPT, we can use either the
 * kernel or user vaddr.
 */
void color_flush_page(void *vaddr)
{
	v7_flush_kern_dcache_area(vaddr, PAGE_SIZE);
}

static void print_lockdown_registers(void)
{
	int i;

	for (i = 0; i < nr_lockregs; i++) {
		printk("Lockdown Data CPU %2d: 0x%8x\n",
				i, readl_relaxed(ld_d_reg(i)));
		printk("Lockdown Inst CPU %2d: 0x%8x\n",
				i, readl_relaxed(ld_i_reg(i)));
	}
}

/* Operate on the Cortex-A9's ACTLR register */
#define ACTLR_L2_PREFETCH_HINT	(1 << 1)
#define ACTLR_L1_PREFETCH	(1 << 2)

/*
 * Change the ACTLR.
 * @mode	- If 1 (0), set (clear) the bit given in @mask in the ACTLR.
 * @mask	- A mask in which one bit is set to operate on the ACTLR.
 */
static void actlr_change(int mode, int mask)
{
	u32 orig_value, new_value, reread_value;

	if (0 != mode && 1 != mode) {
		printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
				__FUNCTION__);
		return;
	}

	/* get the original value */
	asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (orig_value));

	if (0 == mode)
		new_value = orig_value & ~(mask);
	else
		new_value = orig_value | mask;

	asm volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (new_value));
	asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (reread_value));

	printk("ACTLR: orig: 0x%8x  wanted: 0x%8x  new: 0x%8x\n",
			orig_value, new_value, reread_value);
}

int litmus_l1_prefetch_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, mode;

	mutex_lock(&actlr_mutex);
	ret = proc_dointvec(table, write, buffer, lenp, ppos);

	if (!ret && write) {
		mode = *((int*)table->data);
		actlr_change(mode, ACTLR_L1_PREFETCH);
	}
	mutex_unlock(&actlr_mutex);

	return ret;
}

int litmus_l2_prefetch_hint_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, mode;

	mutex_lock(&actlr_mutex);
	ret = proc_dointvec(table, write, buffer, lenp, ppos);
	if (!ret && write) {
		mode = *((int*)table->data);
		actlr_change(mode, ACTLR_L2_PREFETCH_HINT);
	}
	mutex_unlock(&actlr_mutex);

	return ret;
}


/* Operate on the PL-310's Prefetch Control Register, L2X0_PREFETCH_CTRL */
#define L2X0_PREFETCH_DOUBLE_LINEFILL	(1 << 30)
#define L2X0_PREFETCH_DATA_PREFETCH	(1 << 28)
static void l2x0_prefetch_change(int mode, int mask)
{
	u32 orig_value, new_value, reread_value;

	if (0 != mode && 1 != mode) {
		printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
				__FUNCTION__);
		return;
	}

	orig_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);

	if (0 == mode)
		new_value = orig_value & ~(mask);
	else
		new_value = orig_value | mask;

	writel_relaxed(new_value, cache_base + L2X0_PREFETCH_CTRL);
	reread_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);

	printk("l2x0 prefetch: orig: 0x%8x  wanted: 0x%8x  new: 0x%8x\n",
			orig_value, new_value, reread_value);
}

int litmus_l2_double_linefill_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, mode;

	mutex_lock(&l2x0_prefetch_mutex);
	ret = proc_dointvec(table, write, buffer, lenp, ppos);
	if (!ret && write) {
		mode = *((int*)table->data);
		l2x0_prefetch_change(mode, L2X0_PREFETCH_DOUBLE_LINEFILL);
	}
	mutex_unlock(&l2x0_prefetch_mutex);

	return ret;
}

int litmus_l2_data_prefetch_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, mode;

	mutex_lock(&l2x0_prefetch_mutex);
	ret = proc_dointvec(table, write, buffer, lenp, ppos);
	if (!ret && write) {
		mode = *((int*)table->data);
		l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH);
	}
	mutex_unlock(&l2x0_prefetch_mutex);

	return ret;
}

int litmus_lockdown_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int ret, lockdown, *data_ptr;
	data_ptr = (int*) table->data;

	mutex_lock(&lockdown_proc);
	if (!write) {
		lockdown = readl_relaxed(lockreg_d);
		*data_ptr = lockdown;
	}

	ret = proc_dointvec(table, write, buffer, lenp, ppos);
	if (!ret && write) {
		lockdown = *((int*)table->data);
		writel_relaxed(lockdown, lockreg_d);
	}
	mutex_unlock(&lockdown_proc);

	return ret;
}

#define TRIALS 1000

static int test_get_cycles_overhead(void)
{
	u64 sum = 0, min = (u64)-1, max = 0;
	unsigned long flags;
	cycles_t a, b;
	int i;

	for (i = 0; i < TRIALS; i++) {
		u64 diff;
		local_irq_save(flags);
		preempt_disable();
		a = litmus_get_cycles();
		b = litmus_get_cycles();
		preempt_enable();
		local_irq_restore(flags);
		diff = b - a;
		if (diff > max)
			max = diff;
		if (diff < min)
			min = diff;
		sum += diff;
	}
	printk("cycle test: avg: %llu  min: %llu  max: %llu\n",
			div64_u64(sum, TRIALS), min, max);
	return 0;
}

static long update_timeval(struct timespec lhs, struct timespec rhs)
{
	long val;
	struct timespec ts;

	ts = timespec_sub(rhs, lhs);
	val = ts.tv_sec*NSEC_PER_SEC + ts.tv_nsec;

	return val;
}

/*
 * 16 *  4 pages to use as colors 0->15 in 4 ways, and
 * 16 * 32 pages to use as colors 0->15 in 32 ways
 * Don't change these, because it will break things.
 */
#define NR_COLORS	16
#define CTRL_PAGES	(NR_COLORS * 4)
#define THRASH_PAGES	(NR_COLORS * 32)
#define TOTAL_PAGES	(CTRL_PAGES + THRASH_PAGES)
#define WAY_OFFSET	4

static void thrash(void *vaddr)
{
	void *thrash_pages = vaddr + CTRL_PAGES * PAGE_SIZE;

	/* try and flush it */
	v7_flush_kern_dcache_area(vaddr, CTRL_PAGES * PAGE_SIZE);

	/* thrash. don't lock down, we want to fill the dcache with these */
	color_read_in_mem(UNLOCK_ALL, UNLOCK_ALL, thrash_pages,
			thrash_pages + THRASH_PAGES * PAGE_SIZE);
}

//#define READ_TRACE(fmt, args...) TRACE("read_trace: " fmt, ##args)
#define READ_TRACE(fmt, args...) do { } while (0)

static int test_read_in(void)
{
	struct page **pages;
	cycles_t start, stop;
	unsigned long flags;
	void *remapped;
	u32 sum = 0;
	int ret = 0, i, j;

	pages = (struct page**) kmalloc(TOTAL_PAGES * sizeof(*pages), GFP_KERNEL);
	if (!pages) {
		printk("could not allocate pages array.\n");
		ret = -ENOMEM;
		goto out;
	}

	/* Allocate a bunch of pages. */
	for (i = 0; i < TOTAL_PAGES; i++) {
		const unsigned long color = i % NR_COLORS;

		pages[i] = get_colored_page(color);
		if (!pages[i]) {
			printk(KERN_WARNING "%s: no pages available.\n", __FUNCTION__);
			ret = -ENOMEM;
			goto out_free;
		}
	}

	/* Put the pages in a contiguous virtual address space. */
	remapped = vmap(pages, TOTAL_PAGES, VM_MAP, PAGE_KERNEL);
	if (!remapped) {
		printk(KERN_WARNING "%s: bad vmap\n", __FUNCTION__);
		ret = -EINVAL;
		goto out_free_colors;
	}

	/* smaller readings, in bytes */
	for (i = 8; i <= 4096; i += 8) {
		thrash(remapped);

		READ_TRACE("small test, i:%d\n", i);

		preempt_disable();
		local_irq_save(flags);
		start = litmus_get_cycles();
		color_read_in_mem(unlocked_way[WAY_OFFSET], UNLOCK_ALL,
				remapped, remapped + i);
		stop = litmus_get_cycles();
		local_irq_restore(flags);
		preempt_enable();

		TRACE("wss, nanoseconds: %4d, %lld\n",
				i, litmus_cycles_to_ns(stop - start));
	}

	for (i = 1; i <= CTRL_PAGES; i += 1) {
		/* i is the number of pages to read in */
		/* we will read in from page zero to page i (exclusive) */

		READ_TRACE("start on i:%d\n", i);

		thrash(remapped);

		preempt_disable();
		local_irq_save(flags);
		start = litmus_get_cycles();
		for (j = 0; j < i; j += NR_COLORS) {
			/* need to chunk the reads into groups of NR_COLORS
			 * so we can switch ways
			 */
			void *vaddr_start, *vaddr_end;
			int read_start = j, read_end = j + NR_COLORS;
			int way = WAY_OFFSET + j / NR_COLORS;

			if (read_end > i)
				read_end = i;

			vaddr_start = remapped + PAGE_SIZE * read_start;
			vaddr_end   = remapped + PAGE_SIZE * read_end;

			color_read_in_mem(unlocked_way[way], UNLOCK_ALL,
					vaddr_start, vaddr_end);

			READ_TRACE("i:%d  j:%d  read_start:%d  read_end:%d  way:%d\n",
					i, j, read_start, read_end, way);
		}
		stop = litmus_get_cycles();
		local_irq_restore(flags);
		preempt_enable();

		TRACE("wss, nanoseconds: %4lu, %lld\n",
				PAGE_SIZE * i,
				litmus_cycles_to_ns(stop - start));
	}

#if 0
	printk("read in %d pages (avg): %llu cycles  %ld (getnstimeofday) sum: %u\n",
			NR_PAGES,
			div64_u64(stop - start, TRIALS),
			update_timeval(before, after) / TRIALS,
			sum);
#endif

	/* Done with these pages */
	vunmap(remapped);

out_free_colors:
	for (i = 0; i < TOTAL_PAGES; i++) {
		put_page(pages[i]);
		add_page_to_color_list(pages[i]);
	}
out_free:
	kfree(pages);
out:
	return 0;
}

static void test_lockdown(void *ignore)
{
	int i;

	printk("Start lockdown test on CPU %d.\n", smp_processor_id());

	for (i = 0; i < nr_lockregs; i++) {
		printk("CPU %2d data reg: 0x%8p\n", i, ld_d_reg(i));
		printk("CPU %2d inst reg: 0x%8p\n", i, ld_i_reg(i));
	}

	printk("Lockdown initial state:\n");
	print_lockdown_registers();
	printk("---\n");

	for (i = 0; i < nr_lockregs; i++) {
		writel_relaxed(1, ld_d_reg(i));
		writel_relaxed(2, ld_i_reg(i));
	}
	printk("Lockdown all data=1 instr=2:\n");
	print_lockdown_registers();
	printk("---\n");

	for (i = 0; i < nr_lockregs; i++) {
		writel_relaxed((1 << i), ld_d_reg(i));
		writel_relaxed(((1 << 8) >> i), ld_i_reg(i));
	}
	printk("Lockdown varies:\n");
	print_lockdown_registers();
	printk("---\n");

	for (i = 0; i < nr_lockregs; i++) {
		writel_relaxed(UNLOCK_ALL, ld_d_reg(i));
		writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
	}
	printk("Lockdown all zero:\n");
	print_lockdown_registers();

	/* Checks that the unlocked array is set up correctly. */
	for (i = 0; i < MAX_NR_WAYS; i++) {
		unsigned long expected = 0xFFFFFFFF;
		clear_bit(i, &expected);
		if (expected != unlocked_way[i]) {
			WARN(1, "Unlock %2d: expected 0x%8x but got 0x%8x\n",
					i, ((u32)expected), unlocked_way[i]);
		}
	}

	printk("End lockdown test.\n");
}

static int perf_test(void) {
	struct timespec before, after;
	struct page *page;
	void *vaddr;
	u32 *data;
	long time;
	int i;

	page = alloc_page(__GFP_MOVABLE);
	if (!page) {
		printk(KERN_WARNING "No memory\n");
		return -ENOMEM;
	}

	vaddr = page_address(page);
	if (!vaddr)
		printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__);
	data = (u32*) vaddr;

	getnstimeofday(&before);
	barrier();
	for (i = 0; i < TRIALS; i++) {
		color_flush_page(vaddr);
	}
	barrier();
	getnstimeofday(&after);
	time = update_timeval(before, after);
	printk("Average for flushes without re-reading: %ld\n", time / TRIALS);

	color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
	barrier();
	getnstimeofday(&before);
	barrier();
	for (i = 0; i < TRIALS; i++) {
		color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
	}
	barrier();
	getnstimeofday(&after);
	time = update_timeval(before, after);
	printk("Average for read in (no flush): %ld\n", time / TRIALS);

	getnstimeofday(&before);
	barrier();
	for (i = 0; i < TRIALS; i++) {
		color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
		color_flush_page(vaddr);
	}
	barrier();
	getnstimeofday(&after);
	time = update_timeval(before, after);
	printk("Average for read in and then flush: %ld\n", time / TRIALS);

	free_page((unsigned long)vaddr);
	return 0;
}

#define LOCKREG_TEST_VAL	0x00000002
int litmus_test_prefetch_proc_handler(struct ctl_table *table, int write,
		void __user *buffer, size_t *lenp, loff_t *ppos)
{
	struct page *page;
	void *vaddr;
	u32 *data;
	int i;

	if (!write) {
		*lenp = 0;
		return 0;
	}

	page = alloc_page(__GFP_MOVABLE);
	if (!page) {
		printk(KERN_WARNING "No memory\n");
		return -ENOMEM;
	}

	vaddr = page_address(page);
	if (!vaddr)
		printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__);
	data = (u32*)vaddr;

	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
		data[i] = i;

	color_read_in_mem(UNLOCK_ALL, LOCKREG_TEST_VAL, vaddr, vaddr + PAGE_SIZE);

	if (LOCKREG_TEST_VAL != readl_relaxed(lockreg_d)) {
		printk("%s: Expected lockreg value 0x%8x but got 0x%8x!\n",
				__FUNCTION__, LOCKREG_TEST_VAL,
				readl_relaxed(lockreg_d));
	} else {
		printk("%s: Lockdown state after prefetch test passed.\n",
				__FUNCTION__);
	}

	writel_relaxed(UNLOCK_ALL, lockreg_d);

	free_page((unsigned long)vaddr);

	//test_get_cycles_overhead();
	test_read_in();

	return 0;
}

void litmus_setup_lockdown(void __iomem *base, u32 id)
{
	cache_base = base;
	cache_id = id;
	lockreg_d = cache_base + L2X0_LOCKDOWN_WAY_D_BASE;
	lockreg_i = cache_base + L2X0_LOCKDOWN_WAY_I_BASE;


	if (L2X0_CACHE_ID_PART_L310 == (cache_id & L2X0_CACHE_ID_PART_MASK)) {
		nr_lockregs = 8;
	} else {
		printk("Unknown cache ID!\n");
		nr_lockregs = 1;
	}

	raw_spin_lock_init(&prefetch_lock);
	mutex_init(&actlr_mutex);
	mutex_init(&l2x0_prefetch_mutex);
	mutex_init(&lockdown_proc);

	WARN(MAX_NR_WAYS < color_cache_info.ways,
			"Statically defined way maximum too small.\n");

	test_lockdown(NULL);
}

#endif