#include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_CPU_V7 #include #include #endif #include #include #include #ifndef litmus_cycles_to_ns #define litmus_cycles_to_ns(x) 0 #endif #define MAX_NR_WAYS 16 /* * unlocked_way[i] : allocation can occur in way i * * 0 = allocation can occur in the corresponding way * 1 = allocation cannot occur in the corresponding way */ u32 unlocked_way[MAX_NR_WAYS] = { 0xFFFFFFFE, /* way 0 unlocked */ 0xFFFFFFFD, 0xFFFFFFFB, 0xFFFFFFF7, 0xFFFFFFEF, /* way 4 unlocked */ 0xFFFFFFDF, 0xFFFFFFBF, 0xFFFFFF7F, 0xFFFFFEFF, /* way 8 unlocked */ 0xFFFFFDFF, 0xFFFFFBFF, 0xFFFFF7FF, 0xFFFFEFFF, /* way 12 unlocked */ 0xFFFFDFFF, 0xFFFFBFFF, 0xFFFF7FFF, }; #ifndef CONFIG_CPU_V7 u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end) { TRACE_CUR("Dummy read_in_mem: lock_val: 0x%x unlock_val: 0x%x " "start: 0x%p end: 0x%p\n", lock_val, unlock_val, start, end); return 0; } u32 color_read_in_mem_lock(u32 lock_val, u32 unlock_val, void *start, void *end) { TRACE_CUR("Dummy read_in_mem: lock_val: 0x%x unlock_val: 0x%x " "start: 0x%p end: 0x%p\n", lock_val, unlock_val, start, end); return 0; } void set_lockdown(u32 lockdown_state) { TRACE_CUR("Dummy set_lockdown function lockdown_state: 0x%x\n", lockdown_state); } void litmus_setup_lockdown(void __iomem *base, u32 id) { printk("LITMUS^RT Dummy Lockdown\n"); } #else static void __iomem *cache_base; static void __iomem *lockreg_d; static void __iomem *lockreg_i; static raw_spinlock_t prefetch_lock; static u32 cache_id; static int nr_lockregs; struct mutex actlr_mutex; struct mutex l2x0_prefetch_mutex; struct mutex lockdown_proc; #define ld_d_reg(cpu) ({ int __cpu = cpu; \ void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_D_BASE + \ __cpu * L2X0_LOCKDOWN_STRIDE; __v; }) #define ld_i_reg(cpu) ({ int __cpu = cpu; \ void __iomem *__v = cache_base + L2X0_LOCKDOWN_WAY_I_BASE + \ __cpu * L2X0_LOCKDOWN_STRIDE; __v; }) void set_lockdown(u32 lockdown_state) { writel_relaxed(lockdown_state, lockreg_d); } u32 color_read_in_mem(u32 lock_val, u32 unlock_val, void *start, void *end) { u32 v = 0; __asm__ __volatile__ ( " .align 5\n" " str %[lockval], [%[cachereg]]\n" "1: ldr %[val], [%[addr]], #32 @ 32 bytes = 1 cache line\n" " cmp %[end], %[addr] @ subtracts addr from end\n" " bgt 1b\n @ read more, if necessary\n" : [addr] "+r" (start), [val] "+r" (v) : [end] "r" (end), #ifdef CONFIG_CACHE_PL310 [cachereg] "r" (ld_d_reg(raw_smp_processor_id())), #else [cachereg] "r" (lockreg_d), #endif [lockval] "r" (lock_val) : "cc"); return v; } /* * Prefetch by reading the first word of each cache line in a page. * * @lockdown_reg: address of the lockdown register to write * @lock_val: value to be written to @lockdown_reg * @unlock_val: will unlock the cache to this value * @addr: start address to be prefetched * @end_addr: end address to prefetch (exclusive) * * Assumes: addr < end_addr AND addr != end_addr */ u32 color_read_in_mem_lock(u32 lock_val, u32 unlock_val, void *start, void *end) { unsigned long flags; u32 v = 0; #ifndef CONFIG_CACHE_PL310 raw_spin_lock_irqsave(&prefetch_lock, flags); #endif __asm__ __volatile__ ( " .align 5\n" " str %[lockval], [%[cachereg]]\n" "1: ldr %[val], [%[addr]], #32 @ 32 bytes = 1 cache line\n" " cmp %[end], %[addr] @ subtracts addr from end\n" " bgt 1b\n @ read more, if necessary\n" " str %[unlockval], [%[cachereg]]\n" : [addr] "+r" (start), [val] "+r" (v) : [end] "r" (end), #ifdef CONFIG_CACHE_PL310 [cachereg] "r" (ld_d_reg(raw_smp_processor_id())), #else [cachereg] "r" (lockreg_d), #endif [lockval] "r" (lock_val), [unlockval] "r" (unlock_val) : "cc"); #ifndef CONFIG_CACHE_PL310 raw_spin_unlock_irqrestore(&prefetch_lock, flags); #endif return v; } /* * Ensure that this page is not in the L1 or L2 cache. * Since the L1 cache is VIPT and the L2 cache is PIPT, we can use either the * kernel or user vaddr. */ void color_flush_page(void *vaddr) { v7_flush_kern_dcache_area(vaddr, PAGE_SIZE); } static void print_lockdown_registers(void) { int i; for (i = 0; i < nr_lockregs; i++) { printk("Lockdown Data CPU %2d: 0x%8x\n", i, readl_relaxed(ld_d_reg(i))); printk("Lockdown Inst CPU %2d: 0x%8x\n", i, readl_relaxed(ld_i_reg(i))); } } /* Operate on the Cortex-A9's ACTLR register */ #define ACTLR_L2_PREFETCH_HINT (1 << 1) #define ACTLR_L1_PREFETCH (1 << 2) /* * Change the ACTLR. * @mode - If 1 (0), set (clear) the bit given in @mask in the ACTLR. * @mask - A mask in which one bit is set to operate on the ACTLR. */ static void actlr_change(int mode, int mask) { u32 orig_value, new_value, reread_value; if (0 != mode && 1 != mode) { printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n", __FUNCTION__); return; } /* get the original value */ asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (orig_value)); if (0 == mode) new_value = orig_value & ~(mask); else new_value = orig_value | mask; asm volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (new_value)); asm volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (reread_value)); printk("ACTLR: orig: 0x%8x wanted: 0x%8x new: 0x%8x\n", orig_value, new_value, reread_value); } int litmus_l1_prefetch_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret, mode; mutex_lock(&actlr_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { mode = *((int*)table->data); actlr_change(mode, ACTLR_L1_PREFETCH); } mutex_unlock(&actlr_mutex); return ret; } int litmus_l2_prefetch_hint_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret, mode; mutex_lock(&actlr_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { mode = *((int*)table->data); actlr_change(mode, ACTLR_L2_PREFETCH_HINT); } mutex_unlock(&actlr_mutex); return ret; } /* Operate on the PL-310's Prefetch Control Register, L2X0_PREFETCH_CTRL */ #define L2X0_PREFETCH_DOUBLE_LINEFILL (1 << 30) #define L2X0_PREFETCH_DATA_PREFETCH (1 << 28) static void l2x0_prefetch_change(int mode, int mask) { u32 orig_value, new_value, reread_value; if (0 != mode && 1 != mode) { printk(KERN_WARNING "Called %s with mode != 0 and mode != 1.\n", __FUNCTION__); return; } orig_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL); if (0 == mode) new_value = orig_value & ~(mask); else new_value = orig_value | mask; writel_relaxed(new_value, cache_base + L2X0_PREFETCH_CTRL); reread_value = readl_relaxed(cache_base + L2X0_PREFETCH_CTRL); printk("l2x0 prefetch: orig: 0x%8x wanted: 0x%8x new: 0x%8x\n", orig_value, new_value, reread_value); } int litmus_l2_double_linefill_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret, mode; mutex_lock(&l2x0_prefetch_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { mode = *((int*)table->data); l2x0_prefetch_change(mode, L2X0_PREFETCH_DOUBLE_LINEFILL); } mutex_unlock(&l2x0_prefetch_mutex); return ret; } int litmus_l2_data_prefetch_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret, mode; mutex_lock(&l2x0_prefetch_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { mode = *((int*)table->data); l2x0_prefetch_change(mode, L2X0_PREFETCH_DATA_PREFETCH); } mutex_unlock(&l2x0_prefetch_mutex); return ret; } int litmus_lockdown_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret, lockdown, *data_ptr; data_ptr = (int*) table->data; mutex_lock(&lockdown_proc); if (!write) { lockdown = readl_relaxed(lockreg_d); *data_ptr = lockdown; } ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { lockdown = *((int*)table->data); writel_relaxed(lockdown, lockreg_d); } mutex_unlock(&lockdown_proc); return ret; } #define TRIALS 1000 static void sleep_ns(int ns) { int i; lt_t start = litmus_clock(); for (i = 0; litmus_clock() - start < ns;i++); } static int test_get_cycles_overhead(void) { u64 avg = 0, min = (u64)-1, max = 0; unsigned long flags; cycles_t a, b; int i; for (i = 0; i < TRIALS; i++) { u64 diff; local_irq_save(flags); preempt_disable(); a = litmus_get_cycles(); sleep_ns(15000); b = litmus_get_cycles(); preempt_enable(); local_irq_restore(flags); diff = b - a; if (diff > max) max = diff; if (diff < min) min = diff; avg += div64_u64(diff, TRIALS); } printk("cycle test 15us: avg: %llu min: %llu max: %llu\n", avg, min, max); return 0; } static long update_timeval(struct timespec lhs, struct timespec rhs) { long val; struct timespec ts; ts = timespec_sub(rhs, lhs); val = ts.tv_sec*NSEC_PER_SEC + ts.tv_nsec; return val; } /* * 16 * 4 pages to use as colors 0->15 in 4 ways, and * 16 * 32 pages to use as colors 0->15 in 32 ways * Don't change these, because it will break things. */ #define NR_COLORS 16 #define CTRL_PAGES (NR_COLORS * 4) #define THRASH_PAGES (NR_COLORS * 32) #define TOTAL_PAGES (CTRL_PAGES + THRASH_PAGES) #define WAY_OFFSET 4 static void thrash(void *vaddr) { void *thrash_pages = vaddr + CTRL_PAGES * PAGE_SIZE; /* try and flush it */ v7_flush_kern_dcache_area(vaddr, CTRL_PAGES * PAGE_SIZE); /* thrash. don't lock down, we want to fill the dcache with these */ color_read_in_mem(UNLOCK_ALL, UNLOCK_ALL, thrash_pages, thrash_pages + THRASH_PAGES * PAGE_SIZE); } //#define READ_TRACE(fmt, args...) TRACE("read_trace: " fmt, ##args) #define READ_TRACE(fmt, args...) do { } while (0) static int test_read_in(void) { struct page **pages; cycles_t start, stop; unsigned long flags; void *remapped; u32 sum = 0; int ret = 0, i, j; pages = (struct page**) kmalloc(TOTAL_PAGES * sizeof(*pages), GFP_KERNEL); if (!pages) { printk("could not allocate pages array.\n"); ret = -ENOMEM; goto out; } /* Allocate a bunch of pages. */ for (i = 0; i < TOTAL_PAGES; i++) { const unsigned long color = i % NR_COLORS; pages[i] = get_colored_page(color); if (!pages[i]) { printk(KERN_WARNING "%s: no pages available.\n", __FUNCTION__); ret = -ENOMEM; goto out_free; } } /* Put the pages in a contiguous virtual address space. */ remapped = vmap(pages, TOTAL_PAGES, VM_MAP, PAGE_KERNEL); if (!remapped) { printk(KERN_WARNING "%s: bad vmap\n", __FUNCTION__); ret = -EINVAL; goto out_free_colors; } /* smaller readings, in bytes */ for (i = 8; i <= 4096; i += 8) { thrash(remapped); READ_TRACE("small test, i:%d\n", i); preempt_disable(); local_irq_save(flags); start = litmus_get_cycles(); color_read_in_mem(unlocked_way[WAY_OFFSET], UNLOCK_ALL, remapped, remapped + i); stop = litmus_get_cycles(); local_irq_restore(flags); preempt_enable(); TRACE("wss, nanoseconds: %4d, %lld\n", i, litmus_cycles_to_ns(stop - start)); } for (i = 1; i <= CTRL_PAGES; i += 1) { /* i is the number of pages to read in */ /* we will read in from page zero to page i (exclusive) */ READ_TRACE("start on i:%d\n", i); thrash(remapped); preempt_disable(); local_irq_save(flags); start = litmus_get_cycles(); for (j = 0; j < i; j += NR_COLORS) { /* need to chunk the reads into groups of NR_COLORS * so we can switch ways */ void *vaddr_start, *vaddr_end; int read_start = j, read_end = j + NR_COLORS; int way = WAY_OFFSET + j / NR_COLORS; if (read_end > i) read_end = i; vaddr_start = remapped + PAGE_SIZE * read_start; vaddr_end = remapped + PAGE_SIZE * read_end; color_read_in_mem(unlocked_way[way], UNLOCK_ALL, vaddr_start, vaddr_end); READ_TRACE("i:%d j:%d read_start:%d read_end:%d way:%d\n", i, j, read_start, read_end, way); } stop = litmus_get_cycles(); local_irq_restore(flags); preempt_enable(); TRACE("wss, nanoseconds: %4lu, %lld\n", PAGE_SIZE * i, litmus_cycles_to_ns(stop - start)); } #if 0 printk("read in %d pages (avg): %llu cycles %ld (getnstimeofday) sum: %u\n", NR_PAGES, div64_u64(stop - start, TRIALS), update_timeval(before, after) / TRIALS, sum); #endif /* Done with these pages */ vunmap(remapped); out_free_colors: for (i = 0; i < TOTAL_PAGES; i++) { put_page(pages[i]); add_page_to_color_list(pages[i]); } out_free: kfree(pages); out: return 0; } static void test_lockdown(void *ignore) { int i; printk("Start lockdown test on CPU %d.\n", smp_processor_id()); for (i = 0; i < nr_lockregs; i++) { printk("CPU %2d data reg: 0x%8p\n", i, ld_d_reg(i)); printk("CPU %2d inst reg: 0x%8p\n", i, ld_i_reg(i)); } printk("Lockdown initial state:\n"); print_lockdown_registers(); printk("---\n"); for (i = 0; i < nr_lockregs; i++) { writel_relaxed(1, ld_d_reg(i)); writel_relaxed(2, ld_i_reg(i)); } printk("Lockdown all data=1 instr=2:\n"); print_lockdown_registers(); printk("---\n"); for (i = 0; i < nr_lockregs; i++) { writel_relaxed((1 << i), ld_d_reg(i)); writel_relaxed(((1 << 8) >> i), ld_i_reg(i)); } printk("Lockdown varies:\n"); print_lockdown_registers(); printk("---\n"); for (i = 0; i < nr_lockregs; i++) { writel_relaxed(UNLOCK_ALL, ld_d_reg(i)); writel_relaxed(UNLOCK_ALL, ld_i_reg(i)); } printk("Lockdown all zero:\n"); print_lockdown_registers(); /* Checks that the unlocked array is set up correctly. */ for (i = 0; i < MAX_NR_WAYS; i++) { unsigned long expected = 0xFFFFFFFF; clear_bit(i, &expected); if (expected != unlocked_way[i]) { WARN(1, "Unlock %2d: expected 0x%8x but got 0x%8x\n", i, ((u32)expected), unlocked_way[i]); } } printk("End lockdown test.\n"); } static int perf_test(void) { struct timespec before, after; struct page *page; void *vaddr; u32 *data; long time; int i; page = alloc_page(__GFP_MOVABLE); if (!page) { printk(KERN_WARNING "No memory\n"); return -ENOMEM; } vaddr = page_address(page); if (!vaddr) printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__); data = (u32*) vaddr; getnstimeofday(&before); barrier(); for (i = 0; i < TRIALS; i++) { color_flush_page(vaddr); } barrier(); getnstimeofday(&after); time = update_timeval(before, after); printk("Average for flushes without re-reading: %ld\n", time / TRIALS); color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE); barrier(); getnstimeofday(&before); barrier(); for (i = 0; i < TRIALS; i++) { color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE); } barrier(); getnstimeofday(&after); time = update_timeval(before, after); printk("Average for read in (no flush): %ld\n", time / TRIALS); getnstimeofday(&before); barrier(); for (i = 0; i < TRIALS; i++) { color_read_in_mem(unlocked_way[0], UNLOCK_ALL, vaddr, vaddr + PAGE_SIZE); color_flush_page(vaddr); } barrier(); getnstimeofday(&after); time = update_timeval(before, after); printk("Average for read in and then flush: %ld\n", time / TRIALS); free_page((unsigned long)vaddr); return 0; } #define LOCKREG_TEST_VAL 0x00000002 int litmus_test_prefetch_proc_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct page *page; void *vaddr; u32 *data; int i; if (!write) { *lenp = 0; return 0; } page = alloc_page(__GFP_MOVABLE); if (!page) { printk(KERN_WARNING "No memory\n"); return -ENOMEM; } vaddr = page_address(page); if (!vaddr) printk(KERN_WARNING "%s: vaddr is null\n", __FUNCTION__); data = (u32*)vaddr; for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) data[i] = i; color_read_in_mem(UNLOCK_ALL, LOCKREG_TEST_VAL, vaddr, vaddr + PAGE_SIZE); if (LOCKREG_TEST_VAL != readl_relaxed(lockreg_d)) { printk("%s: Expected lockreg value 0x%8x but got 0x%8x!\n", __FUNCTION__, LOCKREG_TEST_VAL, readl_relaxed(lockreg_d)); } else { printk("%s: Lockdown state after prefetch test passed.\n", __FUNCTION__); } writel_relaxed(UNLOCK_ALL, lockreg_d); free_page((unsigned long)vaddr); test_get_cycles_overhead(); test_read_in(); return 0; } void litmus_setup_lockdown(void __iomem *base, u32 id) { cache_base = base; cache_id = id; lockreg_d = cache_base + L2X0_LOCKDOWN_WAY_D_BASE; lockreg_i = cache_base + L2X0_LOCKDOWN_WAY_I_BASE; if (L2X0_CACHE_ID_PART_L310 == (cache_id & L2X0_CACHE_ID_PART_MASK)) { nr_lockregs = 8; } else { printk("Unknown cache ID!\n"); nr_lockregs = 1; } raw_spin_lock_init(&prefetch_lock); mutex_init(&actlr_mutex); mutex_init(&l2x0_prefetch_mutex); mutex_init(&lockdown_proc); WARN(MAX_NR_WAYS < color_cache_info.ways, "Statically defined way maximum too small.\n"); test_lockdown(NULL); } #endif