#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/sysctl.h>
#include <linux/mutex.h>
#include <linux/math64.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/clock.h>
#ifdef CONFIG_CPU_V7
#include <asm/hardware/cache-l2x0.h>
#include <asm/cacheflush.h>
#endif
#include <litmus/color.h>
#include <litmus/debug_trace.h>
#include <litmus/lockdown.h>
#ifndef litmus_cycles_to_ns
#define litmus_cycles_to_ns(x) 0
#endif
#define MAX_NR_WAYS 16
/*
* unlocked_way[i] : allocation can occur in way i
*
* 0 = allocation can occur in the corresponding way
* 1 = allocation cannot occur in the corresponding way
*/
u32 unlocked_way[MAX_NR_WAYS] = {
0xFFFFFFFE, /* way 0 unlocked */
0xFFFFFFFD,
0xFFFFFFFB,
0xFFFFFFF7,
0xFFFFFFEF, /* way 4 unlocked */
0xFFFFFFDF,
0xFFFFFFBF,
0xFFFFFF7F,
0xFFFFFEFF, /* way 8 unlocked */
0xFFFFFDFF,
0xFFFFFBFF,
0xFFFFF7FF,
0xFFFFEFFF, /* way 12 unlocked */
0xFFFFDFFF,
0xFFFFBFFF,
0xFFFF7FFF,
};
#ifndef CONFIG_CPU_V7
u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end)
{
TRACE_CUR("Dummy read_in_mem: lock_val: 0x%x unlock_val: 0x%x "
"start: 0x%p end: 0x%p\n", lock_val, unlock_val,
start, end);
return 0;
}
void set_lockdown(u32 lockdown_state)
{
TRACE_CUR("Dummy set_lockdown function lockdown_state: 0x%x\n",
lockdown_state);
}
void litmus_setup_lockdown(void __iomem *base, u32 id)
{
printk("LITMUS^RT Dummy Lockdown\n");
}
#else
static void __iomem *cache_base;
static void __iomem *lockreg_d;
static void __iomem *lockreg_i;
static raw_spinlock_t prefetch_lock;
static u32 cache_id;
static int nr_lockregs;
struct mutex actlr_mutex;
struct mutex l2x0_prefetch_mutex;
struct mutex lockdown_proc;
#define ld_d_reg(cpu) ({ int __cpu = cpu; \
void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \
__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
#define ld_i_reg(cpu) ({ int __cpu = cpu; \
void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_I_BASE + \
__cpu * L2X0_LOCKDOWN_STRIDE; __v; })
void set_lockdown(u32 lockdown_state)
{
writel_relaxed(lockdown_state, lockreg_d);
}
u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end)
{
u32 v = 0;
__asm__ __volatile__ (
" .align 5\n"
" str %[lockval], [%[cachereg]]\n"
"1: ldr %[val], [%[addr]], #32 @ 32 bytes = 1 cache line\n"
" cmp %[end], %[addr] @ subtracts addr from end\n"
" bgt 1b\n @ read more, if necessary\n"
: [addr] "+r" (start),
[val] "+r" (v)
: [end] "r" (end),
[cachereg] "r" (lockreg_d),
[lockval] "r" (lock_val)
: "cc");
return v;
}
/*
* Prefetch by reading the first word of each cache line in a page.
*
* @lockdown_reg: address of the lockdown register to write
* @lock_val: value to be written to @lockdown_reg
* @unlock_val: will unlock the cache to this value
* @addr: start address to be prefetched
* @end_addr: end address to prefetch (exclusive)
*
* Assumes: addr < end_addr AND addr != end_addr
*/
u32 color_read_in_mem_lock(u32 lock_val, u32 unlock_val, void *start, void *end)
{
unsigned long flags;
u32 v = 0;
raw_spin_lock_irqsave(&prefetch_lock, flags);
__asm__ __volatile__ (
" .align 5\n"
" str %[lockval], [%[cachereg]]\n"
"1: ldr %[val], [%[addr]], #32 @ 32 bytes = 1 cache line\n"
" cmp %[end], %[addr] @ subtracts addr from end\n"
" bgt 1b\n @ read more, if necessary\n"
" str %[unlockval], [%[cachereg]]\n"
: [addr] "+r" (start),
[val] "+r" (v)
: [end] "r" (end),
[cachereg] "r" (lockreg_d),
[lockval] "r" (lock_val),
[unlockval] "r" (unlock_val)
: "cc");
raw_spin_unlock_irqrestore(&prefetch_lock, flags);
return v;
}
/*
* Ensure that this page is not in the L1 or L2 cache.
* Since the L1 cache is VIPT and the L2 cache is PIPT, we can use either the
* kernel or user vaddr.
*/
void color_flush_page(void *vaddr)
{
v7_flush_kern_dcache_area(vaddr, PAGE_SIZE);
}
static void print_lockdown_registers(void)
{
int i;
for (i = 0; i < nr_lockregs; i++) {
printk("Lockdown Data CPU %2d: 0x%8x\n",
i, readl_relaxed(ld_d_reg(i)));
printk("Lockdown Inst CPU %2d: 0x%8x\n",
i, readl_relaxed(ld_i_reg(i)));
}
}
/* Operate on the Cortex-A9's ACTLR register */
#define ACTLR_L2_PREFETCH_HINT (1 << 1)
#define ACTLR_L1_PREFETCH (1 << 2)
/*
* Change the ACTLR.
* @mode - If 1 (0), set (clear) the bit given in @mask in the ACTLR.
* @mask - A mask in which one bit is set to operate on the ACTLR.
*/
static void actlr_change(int mode, int mask)
{
u32 orig_value, new_value, reread_value;
if (0 != mode && 1 != mode) {
printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
__FUNCTION__);
return;
}
/* get the original value */
asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (orig_value));
if (0 == mode)
new_value = orig_value & ~(mask);
else
new_value = orig_value | mask;
asm volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (new_value));
asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (reread_value));
printk("ACTLR: orig: 0x%8x wanted: 0x%8x new: 0x%8x\n",
orig_value, new_value, reread_value);
}
int litmus_l1_prefetch_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret, mode;
mutex_lock(&actlr_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
mode = *((int*)table->data);
actlr_change(mode, ACTLR_L1_PREFETCH);
}
mutex_unlock(&actlr_mutex);
return ret;
}
int litmus_l2_prefetch_hint_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret, mode;
mutex_lock(&actlr_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
mode = *((int*)table->data);
actlr_change(mode, ACTLR_L2_PREFETCH_HINT);
}
mutex_unlock(&actlr_mutex);
return ret;
}
/* Operate on the PL-310's Prefetch Control Register, L2X0_PREFETCH_CTRL */
#define L2X0_PREFETCH_DOUBLE_LINEFILL (1 << 30)
#define L2X0_PREFETCH_DATA_PREFETCH (1 << 28)
static void l2x0_prefetch_change(int mode, int mask)
{
u32 orig_value, new_value, reread_value;
if (0 != mode && 1 != mode) {
printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n",
__FUNCTION__);
return;
}
orig_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);
if (0 == mode)
new_value = orig_value & ~(mask);
else
new_value = orig_value | mask;
writel_relaxed(new_value, cache_base + L2X0_PREFETCH_CTRL);
reread_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL);
printk("l2x0 prefetch: orig: 0x%8x wanted: 0x%8x new: 0x%8x\n",
orig_value, new_value, reread_value);
}
int litmus_l2_double_linefill_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret, mode;
mutex_lock(&l2x0_prefetch_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
mode = *((int*)table->data);
l2x0_prefetch_change(mode, L2X0_PREFETCH_DOUBLE_LINEFILL);
}
mutex_unlock(&l2x0_prefetch_mutex);
return ret;
}
int litmus_l2_data_prefetch_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret, mode;
mutex_lock(&l2x0_prefetch_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
mode = *((int*)table->data);
l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH);
}
mutex_unlock(&l2x0_prefetch_mutex);
return ret;
}
int litmus_lockdown_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret, lockdown, *data_ptr;
data_ptr = (int*) table->data;
mutex_lock(&lockdown_proc);
if (!write) {
lockdown = readl_relaxed(lockreg_d);
*data_ptr = lockdown;
}
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write) {
lockdown = *((int*)table->data);
writel_relaxed(lockdown, lockreg_d);
}
mutex_unlock(&lockdown_proc);
return ret;
}
#define TRIALS 1000
static int test_get_cycles_overhead(void)
{
u64 sum = 0, min = (u64)-1, max = 0;
unsigned long flags;
cycles_t a, b;
int i;
for (i = 0; i < TRIALS; i++) {
u64 diff;
local_irq_save(flags);
preempt_disable();
a = litmus_get_cycles();
b = litmus_get_cycles();
preempt_enable();
local_irq_restore(flags);
diff = b - a;
if (diff > max)
max = diff;
if (diff < min)
min = diff;
sum += diff;
}
printk("cycle test: avg: %llu min: %llu max: %llu\n",
div64_u64(sum, TRIALS), min, max);
return 0;
}
static long update_timeval(struct timespec lhs, struct timespec rhs)
{
long val;
struct timespec ts;
ts = timespec_sub(rhs, lhs);
val = ts.tv_sec*NSEC_PER_SEC + ts.tv_nsec;
return val;
}
/*
* 16 * 4 pages to use as colors 0->15 in 4 ways, and
* 16 * 32 pages to use as colors 0->15 in 32 ways
* Don't change these, because it will break things.
*/
#define NR_COLORS 16
#define CTRL_PAGES (NR_COLORS * 4)
#define THRASH_PAGES (NR_COLORS * 32)
#define TOTAL_PAGES (CTRL_PAGES + THRASH_PAGES)
#define WAY_OFFSET 4
static void thrash(void *vaddr)
{
void *thrash_pages = vaddr + CTRL_PAGES * PAGE_SIZE;
/* try and flush it */
v7_flush_kern_dcache_area(vaddr, CTRL_PAGES * PAGE_SIZE);
/* thrash. don't lock down, we want to fill the dcache with these */
color_read_in_mem(UNLOCK_ALL, UNLOCK_ALL, thrash_pages,
thrash_pages + THRASH_PAGES * PAGE_SIZE);
}
//#define READ_TRACE(fmt, args...) TRACE("read_trace: " fmt, ##args)
#define READ_TRACE(fmt, args...) do { } while (0)
static int test_read_in(void)
{
struct page **pages;
cycles_t start, stop;
unsigned long flags;
void *remapped;
u32 sum = 0;
int ret = 0, i, j;
pages = (struct page**) kmalloc(TOTAL_PAGES * sizeof(*pages), GFP_KERNEL);
if (!pages) {
printk("could not allocate pages array.\n");
ret = -ENOMEM;
goto out;
}
/* Allocate a bunch of pages. */
for (i = 0; i < TOTAL_PAGES; i++) {
const unsigned long color = i % NR_COLORS;
pages[i] = get_colored_page(color);
if (!pages[i]) {
printk(KERN_WARNING "%s: no pages available.\n", __FUNCTION__);
ret = -ENOMEM;
goto out_free;
}
}
/* Put the pages in a contiguous virtual address space. */
remapped = vmap(pages, TOTAL_PAGES, VM_MAP, PAGE_KERNEL);
if (!remapped) {
printk(KERN_WARNING "%s: bad vmap\n", __FUNCTION__);
ret = -EINVAL;
goto out_free_colors;
}
/* smaller readings, in bytes */
for (i = 8; i <= 4096; i += 8) {
thrash(remapped);
READ_TRACE("small test, i:%d\n", i);
preempt_disable();
local_irq_save(flags);
start = litmus_get_cycles();
color_read_in_mem(unlocked_way[WAY_OFFSET], UNLOCK_ALL,
remapped, remapped + i);
stop = litmus_get_cycles();
local_irq_restore(flags);
preempt_enable();
TRACE("wss, nanoseconds: %4d, %lld\n",
i, litmus_cycles_to_ns(stop - start));
}
for (i = 1; i <= CTRL_PAGES; i += 1) {
/* i is the number of pages to read in */
/* we will read in from page zero to page i (exclusive) */
READ_TRACE("start on i:%d\n", i);
thrash(remapped);
preempt_disable();
local_irq_save(flags);
start = litmus_get_cycles();
for (j = 0; j < i; j += NR_COLORS) {
/* need to chunk the reads into groups of NR_COLORS
* so we can switch ways
*/
void *vaddr_start, *vaddr_end;
int read_start = j, read_end = j + NR_COLORS;
int way = WAY_OFFSET + j / NR_COLORS;
if (read_end > i)
read_end = i;
vaddr_start = remapped + PAGE_SIZE * read_start;
vaddr_end = remapped + PAGE_SIZE * read_end;
color_read_in_mem(unlocked_way[way], UNLOCK_ALL,
vaddr_start, vaddr_end);
READ_TRACE("i:%d j:%d read_start:%d read_end:%d way:%d\n",
i, j, read_start, read_end, way);
}
stop = litmus_get_cycles();
local_irq_restore(flags);
preempt_enable();
TRACE("wss, nanoseconds: %4lu, %lld\n",
PAGE_SIZE * i,
litmus_cycles_to_ns(stop - start));
}
#if 0
printk("read in %d pages (avg): %llu cycles %ld (getnstimeofday) sum: %u\n",
NR_PAGES,
div64_u64(stop - start, TRIALS),
update_timeval(before, after) / TRIALS,
sum);
#endif
/* Done with these pages */
vunmap(remapped);
out_free_colors:
for (i = 0; i < TOTAL_PAGES; i++) {
put_page(pages[i]);
add_page_to_color_list(pages[i]);
}
out_free:
kfree(pages);
out:
return 0;
}
static void test_lockdown(void *ignore)
{
int i;
printk("Start lockdown test on CPU %d.\n", smp_processor_id());
for (i = 0; i < nr_lockregs; i++) {
printk("CPU %2d data reg: 0x%8p\n", i, ld_d_reg(i));
printk("CPU %2d inst reg: 0x%8p\n", i, ld_i_reg(i));
}
printk("Lockdown initial state:\n");
print_lockdown_registers();
printk("---\n");
for (i = 0; i < nr_lockregs; i++) {
writel_relaxed(1, ld_d_reg(i));
writel_relaxed(2, ld_i_reg(i));
}
printk("Lockdown all data=1 instr=2:\n");
print_lockdown_registers();
printk("---\n");
for (i = 0; i < nr_lockregs; i++) {
writel_relaxed((1 << i), ld_d_reg(i));
writel_relaxed(((1 << 8) >> i), ld_i_reg(i));
}
printk("Lockdown varies:\n");
print_lockdown_registers();
printk("---\n");
for (i = 0; i < nr_lockregs; i++) {
writel_relaxed(UNLOCK_ALL, ld_d_reg(i));
writel_relaxed(UNLOCK_ALL, ld_i_reg(i));
}
printk("Lockdown all zero:\n");
print_lockdown_registers();
/* Checks that the unlocked array is set up correctly. */
for (i = 0; i < MAX_NR_WAYS; i++) {
unsigned long expected = 0xFFFFFFFF;
clear_bit(i, &expected);
if (expected != unlocked_way[i]) {
WARN(1, "Unlock %2d: expected 0x%8x but got 0x%8x\n",
i, ((u32)expected), unlocked_way[i]);
}
}
printk("End lockdown test.\n");
}
static int perf_test(void) {
struct timespec before, after;
struct page *page;
void *vaddr;
u32 *data;
long time;
int i;
page = alloc_page(__GFP_MOVABLE);
if (!page) {
printk(KERN_WARNING "No memory\n");
return -ENOMEM;
}
vaddr = page_address(page);
if (!vaddr)
printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__);
data = (u32*) vaddr;
getnstimeofday(&before);
barrier();
for (i = 0; i < TRIALS; i++) {
color_flush_page(vaddr);
}
barrier();
getnstimeofday(&after);
time = update_timeval(before, after);
printk("Average for flushes without re-reading: %ld\n", time / TRIALS);
color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
barrier();
getnstimeofday(&before);
barrier();
for (i = 0; i < TRIALS; i++) {
color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
}
barrier();
getnstimeofday(&after);
time = update_timeval(before, after);
printk("Average for read in (no flush): %ld\n", time / TRIALS);
getnstimeofday(&before);
barrier();
for (i = 0; i < TRIALS; i++) {
color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE);
color_flush_page(vaddr);
}
barrier();
getnstimeofday(&after);
time = update_timeval(before, after);
printk("Average for read in and then flush: %ld\n", time / TRIALS);
free_page((unsigned long)vaddr);
return 0;
}
#define LOCKREG_TEST_VAL 0x00000002
int litmus_test_prefetch_proc_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
struct page *page;
void *vaddr;
u32 *data;
int i;
if (!write) {
*lenp = 0;
return 0;
}
page = alloc_page(__GFP_MOVABLE);
if (!page) {
printk(KERN_WARNING "No memory\n");
return -ENOMEM;
}
vaddr = page_address(page);
if (!vaddr)
printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__);
data = (u32*)vaddr;
for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
data[i] = i;
color_read_in_mem(UNLOCK_ALL, LOCKREG_TEST_VAL, vaddr, vaddr + PAGE_SIZE);
if (LOCKREG_TEST_VAL != readl_relaxed(lockreg_d)) {
printk("%s: Expected lockreg value 0x%8x but got 0x%8x!\n",
__FUNCTION__, LOCKREG_TEST_VAL,
readl_relaxed(lockreg_d));
} else {
printk("%s: Lockdown state after prefetch test passed.\n",
__FUNCTION__);
}
writel_relaxed(UNLOCK_ALL, lockreg_d);
free_page((unsigned long)vaddr);
//test_get_cycles_overhead();
test_read_in();
return 0;
}
void litmus_setup_lockdown(void __iomem *base, u32 id)
{
cache_base = base;
cache_id = id;
lockreg_d = cache_base + L2X0_LOCKDOWN_WAY_D_BASE;
lockreg_i = cache_base + L2X0_LOCKDOWN_WAY_I_BASE;
if (L2X0_CACHE_ID_PART_L310 == (cache_id & L2X0_CACHE_ID_PART_MASK)) {
nr_lockregs = 8;
} else {
printk("Unknown cache ID!\n");
nr_lockregs = 1;
}
raw_spin_lock_init(&prefetch_lock);
mutex_init(&actlr_mutex);
mutex_init(&l2x0_prefetch_mutex);
mutex_init(&lockdown_proc);
WARN(MAX_NR_WAYS < color_cache_info.ways,
"Statically defined way maximum too small.\n");
test_lockdown(NULL);
}
#endif