/* * Copyright (C) 2009 Red Hat, Inc. * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "internal.h" /* * By default transparent hugepage support is disabled in order that avoid * to risk increase the memory footprint of applications without a guaranteed * benefit. When transparent hugepage support is enabled, is for all mappings, * and khugepaged scans all mappings. * Defrag is invoked by khugepaged hugepage allocations and by page faults * for all hugepage allocations. */ unsigned long transparent_hugepage_flags __read_mostly = #ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS (1< min_free_kbytes) { if (user_min_free_kbytes >= 0) pr_info("raising min_free_kbytes from %d to %lu " "to help transparent hugepage allocations\n", min_free_kbytes, recommended_min); min_free_kbytes = recommended_min; } setup_per_zone_wmarks(); return 0; } late_initcall(set_recommended_min_free_kbytes); static int start_khugepaged(void) { int err = 0; if (khugepaged_enabled()) { if (!khugepaged_thread) khugepaged_thread = kthread_run(khugepaged, NULL, "khugepaged"); if (unlikely(IS_ERR(khugepaged_thread))) { printk(KERN_ERR "khugepaged: kthread_run(khugepaged) failed\n"); err = PTR_ERR(khugepaged_thread); khugepaged_thread = NULL; } if (!list_empty(&khugepaged_scan.mm_head)) wake_up_interruptible(&khugepaged_wait); set_recommended_min_free_kbytes(); } else if (khugepaged_thread) { kthread_stop(khugepaged_thread); khugepaged_thread = NULL; } return err; } static atomic_t huge_zero_refcount; static struct page *huge_zero_page __read_mostly; static inline bool is_huge_zero_page(struct page *page) { return ACCESS_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) { return is_huge_zero_page(pmd_page(pmd)); } static struct page *get_huge_zero_page(void) { struct page *zero_page; retry: if (likely(atomic_inc_not_zero(&huge_zero_refcount))) return ACCESS_ONCE(huge_zero_page); zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, HPAGE_PMD_ORDER); if (!zero_page) { count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); return NULL; } count_vm_event(THP_ZERO_PAGE_ALLOC); preempt_disable(); if (cmpxchg(&huge_zero_page, NULL, zero_page)) { preempt_enable(); __free_page(zero_page); goto retry; } /* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); preempt_enable(); return ACCESS_ONCE(huge_zero_page); } static void put_huge_zero_page(void) { /* * Counter should never go to zero here. Only shrinker can put * last reference. */ BUG_ON(atomic_dec_and_test(&huge_zero_refcount)); } static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink, struct shrink_control *sc) { /* we can free zero page only if last reference remains */ return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0; } static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, struct shrink_control *sc) { if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); __free_page(zero_page); return HPAGE_PMD_NR; } return 0; } static struct shrinker huge_zero_page_shrinker = { .count_objects = shrink_huge_zero_page_count, .scan_objects = shrink_huge_zero_page_scan, .seeks = DEFAULT_SEEKS, }; #ifdef CONFIG_SYSFS static ssize_t double_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag enabled, enum transparent_hugepage_flag req_madv) { if (test_bit(enabled, &transparent_hugepage_flags)) { VM_BUG_ON(test_bit(req_madv, &transparent_hugepage_flags)); return sprintf(buf, "[always] madvise never\n"); } else if (test_bit(req_madv, &transparent_hugepage_flags)) return sprintf(buf, "always [madvise] never\n"); else return sprintf(buf, "always madvise [never]\n"); } static ssize_t double_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag enabled, enum transparent_hugepage_flag req_madv) { if (!memcmp("always", buf, min(sizeof("always")-1, count))) { set_bit(enabled, &transparent_hugepage_flags); clear_bit(req_madv, &transparent_hugepage_flags); } else if (!memcmp("madvise", buf, min(sizeof("madvise")-1, count))) { clear_bit(enabled, &transparent_hugepage_flags); set_bit(req_madv, &transparent_hugepage_flags); } else if (!memcmp("never", buf, min(sizeof("never")-1, count))) { clear_bit(enabled, &transparent_hugepage_flags); clear_bit(req_madv, &transparent_hugepage_flags); } else return -EINVAL; return count; } static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return double_flag_show(kobj, attr, buf, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG); } static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { ssize_t ret; ret = double_flag_store(kobj, attr, buf, count, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG); if (ret > 0) { int err; mutex_lock(&khugepaged_mutex); err = start_khugepaged(); mutex_unlock(&khugepaged_mutex); if (err) ret = err; } return ret; } static struct kobj_attribute enabled_attr = __ATTR(enabled, 0644, enabled_show, enabled_store); static ssize_t single_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag) { return sprintf(buf, "%d\n", !!test_bit(flag, &transparent_hugepage_flags)); } static ssize_t single_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag) { unsigned long value; int ret; ret = kstrtoul(buf, 10, &value); if (ret < 0) return ret; if (value > 1) return -EINVAL; if (value) set_bit(flag, &transparent_hugepage_flags); else clear_bit(flag, &transparent_hugepage_flags); return count; } /* * Currently defrag only disables __GFP_NOWAIT for allocation. A blind * __GFP_REPEAT is too aggressive, it's never worth swapping tons of * memory just to allocate one more hugepage. */ static ssize_t defrag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return double_flag_show(kobj, attr, buf, TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG); } static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { return double_flag_store(kobj, attr, buf, count, TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG); } static struct kobj_attribute defrag_attr = __ATTR(defrag, 0644, defrag_show, defrag_store); static ssize_t use_zero_page_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return single_flag_show(kobj, attr, buf, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); } static ssize_t use_zero_page_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { return single_flag_store(kobj, attr, buf, count, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); } static struct kobj_attribute use_zero_page_attr = __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store); #ifdef CONFIG_DEBUG_VM static ssize_t debug_cow_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return single_flag_show(kobj, attr, buf, TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG); } static ssize_t debug_cow_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { return single_flag_store(kobj, attr, buf, count, TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG); } static struct kobj_attribute debug_cow_attr = __ATTR(debug_cow, 0644, debug_cow_show, debug_cow_store); #endif /* CONFIG_DEBUG_VM */ static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, &use_zero_page_attr.attr, #ifdef CONFIG_DEBUG_VM &debug_cow_attr.attr, #endif NULL, }; static struct attribute_group hugepage_attr_group = { .attrs = hugepage_attr, }; static ssize_t scan_sleep_millisecs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_scan_sleep_millisecs); } static ssize_t scan_sleep_millisecs_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned long msecs; int err; err = kstrtoul(buf, 10, &msecs); if (err || msecs > UINT_MAX) return -EINVAL; khugepaged_scan_sleep_millisecs = msecs; wake_up_interruptible(&khugepaged_wait); return count; } static struct kobj_attribute scan_sleep_millisecs_attr = __ATTR(scan_sleep_millisecs, 0644, scan_sleep_millisecs_show, scan_sleep_millisecs_store); static ssize_t alloc_sleep_millisecs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_alloc_sleep_millisecs); } static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned long msecs; int err; err = kstrtoul(buf, 10, &msecs); if (err || msecs > UINT_MAX) return -EINVAL; khugepaged_alloc_sleep_millisecs = msecs; wake_up_interruptible(&khugepaged_wait); return count; } static struct kobj_attribute alloc_sleep_millisecs_attr = __ATTR(alloc_sleep_millisecs, 0644, alloc_sleep_millisecs_show, alloc_sleep_millisecs_store); static ssize_t pages_to_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_pages_to_scan); } static ssize_t pages_to_scan_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long pages; err = kstrtoul(buf, 10, &pages); if (err || !pages || pages > UINT_MAX) return -EINVAL; khugepaged_pages_to_scan = pages; return count; } static struct kobj_attribute pages_to_scan_attr = __ATTR(pages_to_scan, 0644, pages_to_scan_show, pages_to_scan_store); static ssize_t pages_collapsed_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_pages_collapsed); } static struct kobj_attribute pages_collapsed_attr = __ATTR_RO(pages_collapsed); static ssize_t full_scans_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_full_scans); } static struct kobj_attribute full_scans_attr = __ATTR_RO(full_scans); static ssize_t khugepaged_defrag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return single_flag_show(kobj, attr, buf, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG); } static ssize_t khugepaged_defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { return single_flag_store(kobj, attr, buf, count, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG); } static struct kobj_attribute khugepaged_defrag_attr = __ATTR(defrag, 0644, khugepaged_defrag_show, khugepaged_defrag_store); /* * max_ptes_none controls if khugepaged should collapse hugepages over * any unmapped ptes in turn potentially increasing the memory * footprint of the vmas. When max_ptes_none is 0 khugepaged will not * reduce the available free memory in the system as it * runs. Increasing max_ptes_none will instead potentially reduce the * free memory in the system during the khugepaged scan. */ static ssize_t khugepaged_max_ptes_none_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%u\n", khugepaged_max_ptes_none); } static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long max_ptes_none; err = kstrtoul(buf, 10, &max_ptes_none); if (err || max_ptes_none > HPAGE_PMD_NR-1) return -EINVAL; khugepaged_max_ptes_none = max_ptes_none; return count; } static struct kobj_attribute khugepaged_max_ptes_none_attr = __ATTR(max_ptes_none, 0644, khugepaged_max_ptes_none_show, khugepaged_max_ptes_none_store); static struct attribute *khugepaged_attr[] = { &khugepaged_defrag_attr.attr, &khugepaged_max_ptes_none_attr.attr, &pages_to_scan_attr.attr, &pages_collapsed_attr.attr, &full_scans_attr.attr, &scan_sleep_millisecs_attr.attr, &alloc_sleep_millisecs_attr.attr, NULL, }; static struct attribute_group khugepaged_attr_group = { .attrs = khugepaged_attr, .name = "khugepaged", }; static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) { int err; *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!*hugepage_kobj)) { printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n"); return -ENOMEM; } err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto delete_obj; } err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n"); goto remove_hp_group; } return 0; remove_hp_group: sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group); delete_obj: kobject_put(*hugepage_kobj); return err; } static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj) { sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group); sysfs_remove_group(hugepage_kobj, &hugepage_attr_group); kobject_put(hugepage_kobj); } #else static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj) { return 0; } static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj) { } #endif /* CONFIG_SYSFS */ static int __init hugepage_init(void) { int err; struct kobject *hugepage_kobj; if (!has_transparent_hugepage()) { transparent_hugepage_flags = 0; return -EINVAL; } err = hugepage_init_sysfs(&hugepage_kobj); if (err) return err; err = khugepaged_slab_init(); if (err) goto out; register_shrinker(&huge_zero_page_shrinker); /* * By default disable transparent hugepages on smaller systems, * where the extra memory used could hurt more than TLB overhead * is likely to save. The admin can still enable it through /sys. */ if (totalram_pages < (512 << (20 - PAGE_SHIFT))) transparent_hugepage_flags = 0; start_khugepaged(); return 0; out: hugepage_exit_sysfs(hugepage_kobj); return err; } subsys_initcall(hugepage_init); static int __init setup_transparent_hugepage(char *str) { int ret = 0; if (!str) goto out; if (!strcmp(str, "always")) { set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); ret = 1; } else if (!strcmp(str, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); ret = 1; } else if (!strcmp(str, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); ret = 1; } out: if (!ret) printk(KERN_WARNING "transparent_hugepage= cannot parse, ignored\n"); return ret; } __setup("transparent_hugepage=", setup_transparent_hugepage); pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) { if (likely(vma->vm_flags & VM_WRITE)) pmd = pmd_mkwrite(pmd); return pmd; } static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) { pmd_t entry; entry = mk_pmd(page, prot); entry = pmd_mkhuge(entry); return entry; } static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *page) { pgtable_t pgtable; spinlock_t *ptl; VM_BUG_ON_PAGE(!PageCompound(page), page); pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; clear_huge_page(page, haddr, HPAGE_PMD_NR); /* * The memory barrier inside __SetPageUptodate makes sure that * clear_huge_page writes become visible before the set_pmd_at() * write. */ __SetPageUptodate(page); ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_none(*pmd))) { spin_unlock(ptl); mem_cgroup_uncharge_page(page); put_page(page); pte_free(mm, pgtable); } else { pmd_t entry; entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); atomic_long_inc(&mm->nr_ptes); spin_unlock(ptl); } return 0; } static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) { return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; } static inline struct page *alloc_hugepage_vma(int defrag, struct vm_area_struct *vma, unsigned long haddr, int nd, gfp_t extra_gfp) { return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp), HPAGE_PMD_ORDER, vma, haddr, nd); } /* Caller must hold page table lock. */ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) { pmd_t entry; if (!pmd_none(*pmd)) return false; entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_wrprotect(entry); entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); atomic_long_inc(&mm->nr_ptes); return true; } int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) return VM_FAULT_FALLBACK; if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; if (unlikely(khugepaged_enter(vma))) return VM_FAULT_OOM; if (!(flags & FAULT_FLAG_WRITE) && transparent_hugepage_use_zero_page()) { spinlock_t *ptl; pgtable_t pgtable; struct page *zero_page; bool set; pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; zero_page = get_huge_zero_page(); if (unlikely(!zero_page)) { pte_free(mm, pgtable); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } ptl = pmd_lock(mm, pmd); set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, zero_page); spin_unlock(ptl); if (!set) { pte_free(mm, pgtable); put_huge_zero_page(); } return 0; } page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), vma, haddr, numa_node_id(), 0); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { mem_cgroup_uncharge_page(page); put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } count_vm_event(THP_FAULT_ALLOC); return 0; } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma) { spinlock_t *dst_ptl, *src_ptl; struct page *src_page; pmd_t pmd; pgtable_t pgtable; int ret; ret = -ENOMEM; pgtable = pte_alloc_one(dst_mm, addr); if (unlikely(!pgtable)) goto out; dst_ptl = pmd_lock(dst_mm, dst_pmd); src_ptl = pmd_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ret = -EAGAIN; pmd = *src_pmd; if (unlikely(!pmd_trans_huge(pmd))) { pte_free(dst_mm, pgtable); goto out_unlock; } /* * When page table lock is held, the huge zero pmd should not be * under splitting since we don't split the page itself, only pmd to * a page table. */ if (is_huge_zero_pmd(pmd)) { struct page *zero_page; bool set; /* * get_huge_zero_page() will never allocate a new page here, * since we already have a zero page to copy. It just takes a * reference. */ zero_page = get_huge_zero_page(); set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, zero_page); BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ ret = 0; goto out_unlock; } if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); spin_unlock(dst_ptl); pte_free(dst_mm, pgtable); wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ goto out; } src_page = pmd_page(pmd); VM_BUG_ON_PAGE(!PageHead(src_page), src_page); get_page(src_page); page_dup_rmap(src_page); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); pmdp_set_wrprotect(src_mm, addr, src_pmd); pmd = pmd_mkold(pmd_wrprotect(pmd)); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); atomic_long_inc(&dst_mm->nr_ptes); ret = 0; out_unlock: spin_unlock(src_ptl); spin_unlock(dst_ptl); out: return ret; } void huge_pmd_set_accessed(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd, int dirty) { spinlock_t *ptl; pmd_t entry; unsigned long haddr; ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto unlock; entry = pmd_mkyoung(orig_pmd); haddr = address & HPAGE_PMD_MASK; if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty)) update_mmu_cache_pmd(vma, address, pmd); unlock: spin_unlock(ptl); } static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd, struct page *page, unsigned long haddr) { spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; int ret = 0, i; struct page **pages; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR, GFP_KERNEL); if (unlikely(!pages)) { ret |= VM_FAULT_OOM; goto out; } for (i = 0; i < HPAGE_PMD_NR; i++) { pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE | __GFP_OTHER_NODE, vma, address, page_to_nid(page)); if (unlikely(!pages[i] || mem_cgroup_charge_anon(pages[i], mm, GFP_KERNEL))) { if (pages[i]) put_page(pages[i]); mem_cgroup_uncharge_start(); while (--i >= 0) { mem_cgroup_uncharge_page(pages[i]); put_page(pages[i]); } mem_cgroup_uncharge_end(); kfree(pages); ret |= VM_FAULT_OOM; goto out; } } for (i = 0; i < HPAGE_PMD_NR; i++) { copy_user_highpage(pages[i], page + i, haddr + PAGE_SIZE * i, vma); __SetPageUptodate(pages[i]); cond_resched(); } mmun_start = haddr; mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; VM_BUG_ON_PAGE(!PageHead(page), page); pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { pte_t *pte, entry; entry = mk_pte(pages[i], vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); page_add_new_anon_rmap(pages[i], vma, haddr); pte = pte_offset_map(&_pmd, haddr); VM_BUG_ON(!pte_none(*pte)); set_pte_at(mm, haddr, pte, entry); pte_unmap(pte); } kfree(pages); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); page_remove_rmap(page); spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); ret |= VM_FAULT_WRITE; put_page(page); out: return ret; out_free_pages: spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_start(); for (i = 0; i < HPAGE_PMD_NR; i++) { mem_cgroup_uncharge_page(pages[i]); put_page(pages[i]); } mem_cgroup_uncharge_end(); kfree(pages); goto out; } int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd) { spinlock_t *ptl; int ret = 0; struct page *page = NULL, *new_page; unsigned long haddr; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ ptl = pmd_lockptr(mm, pmd); VM_BUG_ON(!vma->anon_vma); haddr = address & HPAGE_PMD_MASK; if (is_huge_zero_pmd(orig_pmd)) goto alloc; spin_lock(ptl); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_unlock; page = pmd_page(orig_pmd); VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); if (page_mapcount(page) == 1) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1)) update_mmu_cache_pmd(vma, address, pmd); ret |= VM_FAULT_WRITE; goto out_unlock; } get_page(page); spin_unlock(ptl); alloc: if (transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), vma, haddr, numa_node_id(), 0); else new_page = NULL; if (unlikely(!new_page)) { if (!page) { split_huge_page_pmd(vma, address, pmd); ret |= VM_FAULT_FALLBACK; } else { ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); if (ret & VM_FAULT_OOM) { split_huge_page(page); ret |= VM_FAULT_FALLBACK; } put_page(page); } count_vm_event(THP_FAULT_FALLBACK); goto out; } if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { put_page(new_page); if (page) { split_huge_page(page); put_page(page); } else split_huge_page_pmd(vma, address, pmd); ret |= VM_FAULT_FALLBACK; count_vm_event(THP_FAULT_FALLBACK); goto out; } count_vm_event(THP_FAULT_ALLOC); if (!page) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); mmun_start = haddr; mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); spin_lock(ptl); if (page) put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { spin_unlock(ptl); mem_cgroup_uncharge_page(new_page); put_page(new_page); goto out_mn; } else { pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); pmdp_clear_flush(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); if (!page) { add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page); put_page(page); } ret |= VM_FAULT_WRITE; } spin_unlock(ptl); out_mn: mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out: return ret; out_unlock: spin_unlock(ptl); return ret; } struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; assert_spin_locked(pmd_lockptr(mm, pmd)); if (flags & FOLL_WRITE && !pmd_write(*pmd)) goto out; /* Avoid dumping huge zero page */ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) return ERR_PTR(-EFAULT); /* Full NUMA hinting faults to serialise migration in fault paths */ if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) goto out; page = pmd_page(*pmd); VM_BUG_ON_PAGE(!PageHead(page), page); if (flags & FOLL_TOUCH) { pmd_t _pmd; /* * We should set the dirty bit only for FOLL_WRITE but * for now the dirty bit in the pmd is meaningless. * And if the dirty bit will become meaningful and * we'll only set it with FOLL_WRITE, an atomic * set_bit will be required on the pmd to set the * young bit, instead of the current set_pmd_at. */ _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, pmd, _pmd, 1)) update_mmu_cache_pmd(vma, addr, pmd); } if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { if (page->mapping && trylock_page(page)) { lru_add_drain(); if (page->mapping) mlock_vma_page(page); unlock_page(page); } } page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON_PAGE(!PageCompound(page), page); if (flags & FOLL_GET) get_page_foll(page); out: return page; } /* NUMA hinting page fault entry point for trans huge pmds */ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { spinlock_t *ptl; struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; int page_nid = -1, this_nid = numa_node_id(); int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; int flags = 0; ptl = pmd_lock(mm, pmdp); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; /* * If there are potential migrations, wait for completion and retry * without disrupting NUMA hinting information. Do not relock and * check_same as the page may no longer be mapped. */ if (unlikely(pmd_trans_migrating(*pmdp))) { spin_unlock(ptl); wait_migrate_huge_page(vma->anon_vma, pmdp); goto out; } page = pmd_page(pmd); BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); last_cpupid = page_cpupid_last(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (page_nid == this_nid) { count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); flags |= TNF_FAULT_LOCAL; } /* * Avoid grouping on DSO/COW pages in specific and RO pages * in general, RO pages shouldn't hurt as much anyway since * they can be in shared cache state. */ if (!pmd_write(pmd)) flags |= TNF_NO_GROUP; /* * Acquire the page lock to serialise THP migrations but avoid dropping * page_table_lock if at all possible */ page_locked = trylock_page(page); target_nid = mpol_misplaced(page, vma, haddr); if (target_nid == -1) { /* If the page was locked, there are no parallel migrations */ if (page_locked) goto clear_pmdnuma; } /* Migration could have started since the pmd_trans_migrating check */ if (!page_locked) { spin_unlock(ptl); wait_on_page_locked(page); page_nid = -1; goto out; } /* * Page is misplaced. Page lock serialises migrations. Acquire anon_vma * to serialises splits */ get_page(page); spin_unlock(ptl); anon_vma = page_lock_anon_vma_read(page); /* Confirm the PMD did not change while page_table_lock was released */ spin_lock(ptl); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); page_nid = -1; goto out_unlock; } /* Bail if we fail to protect against THP splits for any reason */ if (unlikely(!anon_vma)) { put_page(page); page_nid = -1; goto clear_pmdnuma; } /* * Migrate the THP to the requested node, returns with page unlocked * and pmd_numa cleared. */ spin_unlock(ptl); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; } goto out; clear_pmdnuma: BUG_ON(!PageLocked(page)); pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); out_unlock: spin_unlock(ptl); out: if (anon_vma) page_unlock_anon_vma_read(anon_vma); if (page_nid != -1) task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); return 0; } int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { spinlock_t *ptl; int ret = 0; if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { struct page *page; pgtable_t pgtable; pmd_t orig_pmd; /* * For architectures like ppc64 we look at deposited pgtable * when calling pmdp_get_and_clear. So do the * pgtable_trans_huge_withdraw after finishing pmdp related * operations. */ orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); put_huge_zero_page(); } else { page = pmd_page(orig_pmd); page_remove_rmap(page); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON_PAGE(!PageHead(page), page); atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); tlb_remove_page(tlb, page); } pte_free(tlb->mm, pgtable); ret = 1; } return ret; } int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec) { spinlock_t *ptl; int ret = 0; if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { /* * All logical pages in the range are present * if backed by a huge page. */ spin_unlock(ptl); memset(vec, 1, (end - addr) >> PAGE_SHIFT); ret = 1; } return ret; } int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) { spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) || old_end - old_addr < HPAGE_PMD_SIZE || (new_vma->vm_flags & VM_NOHUGEPAGE)) goto out; /* * The destination pmd shouldn't be established, free_pgtables() * should have release it. */ if (WARN_ON(!pmd_none(*new_pmd))) { VM_BUG_ON(pmd_trans_huge(*new_pmd)); goto out; } /* * We don't have to worry about the ordering of src and dst * ptlocks because exclusive mmap_sem prevents deadlock. */ ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl); if (ret == 1) { new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { pgtable_t pgtable; pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); } out: return ret; } /* * Returns * - 0 if PMD could not be locked * - 1 if PMD was locked but protections unchange and TLB flush unnecessary * - HPAGE_PMD_NR is protections changed and TLB flush necessary */ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa) { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; int ret = 0; if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; ret = 1; if (!prot_numa) { entry = pmdp_get_and_clear(mm, addr, pmd); if (pmd_numa(entry)) entry = pmd_mknonnuma(entry); entry = pmd_modify(entry, newprot); ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); BUG_ON(pmd_write(entry)); } else { struct page *page = pmd_page(*pmd); /* * Do not trap faults against the zero page. The * read-only data is likely to be read-cached on the * local CPU cache and it is less useful to know about * local vs remote hits on the zero page. */ if (!is_huge_zero_page(page) && !pmd_numa(*pmd)) { pmdp_set_numa(mm, addr, pmd); ret = HPAGE_PMD_NR; } } spin_unlock(ptl); } return ret; } /* * Returns 1 if a given pmd maps a stable (not under splitting) thp. * Returns -1 if it maps a thp under splitting. Returns 0 otherwise. * * Note that if it returns 1, this routine returns without unlocking page * table locks. So callers must unlock them. */ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, spinlock_t **ptl) { *ptl = pmd_lock(vma->vm_mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { spin_unlock(*ptl); wait_split_huge_page(vma->anon_vma, pmd); return -1; } else { /* Thp mapped by 'pmd' is stable, so we can * handle it as it is. */ return 1; } } spin_unlock(*ptl); return 0; } /* * This function returns whether a given @page is mapped onto the @address * in the virtual space of @mm. * * When it's true, this function returns *pmd with holding the page table lock * and passing it back to the caller via @ptl. * If it's false, returns NULL without holding the page table lock. */ pmd_t *page_check_address_pmd(struct page *page, struct mm_struct *mm, unsigned long address, enum page_check_address_pmd_flag flag, spinlock_t **ptl) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; if (address & ~HPAGE_PMD_MASK) return NULL; pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) return NULL; pud = pud_offset(pgd, address); if (!pud_present(*pud)) return NULL; pmd = pmd_offset(pud, address); *ptl = pmd_lock(mm, pmd); if (!pmd_present(*pmd)) goto unlock; if (pmd_page(*pmd) != page) goto unlock; /* * split_vma() may create temporary aliased mappings. There is * no risk as long as all huge pmd are found and have their * splitting bit set before __split_huge_page_refcount * runs. Finding the same huge pmd more than once during the * same rmap walk is not a problem. */ if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG && pmd_trans_splitting(*pmd)) goto unlock; if (pmd_trans_huge(*pmd)) { VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG && !pmd_trans_splitt tm->tm_mday = -1; tm->tm_year = -1; alrm->enabled = !(time[3] & WM8350_RTC_ALMSTS); return 0; } static int wm8350_rtc_stop_alarm(struct wm8350 *wm8350) { int retries = WM8350_SET_ALM_RETRIES; u16 rtc_ctrl; int ret; /* Set RTC_SET to stop the clock */ ret = wm8350_set_bits(wm8350, WM8350_RTC_TIME_CONTROL, WM8350_RTC_ALMSET); if (ret < 0) return ret; /* Wait until confirmation of stopping */ do { rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); schedule_timeout_uninterruptible(msecs_to_jiffies(1)); } while (retries-- && !(rtc_ctrl & WM8350_RTC_ALMSTS)); if (!(rtc_ctrl & WM8350_RTC_ALMSTS)) return -ETIMEDOUT; return 0; } static int wm8350_rtc_start_alarm(struct wm8350 *wm8350) { int ret; int retries = WM8350_SET_ALM_RETRIES; u16 rtc_ctrl; ret = wm8350_clear_bits(wm8350, WM8350_RTC_TIME_CONTROL, WM8350_RTC_ALMSET); if (ret < 0) return ret; /* Wait until confirmation */ do { rtc_ctrl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); schedule_timeout_uninterruptible(msecs_to_jiffies(1)); } while (retries-- && rtc_ctrl & WM8350_RTC_ALMSTS); if (rtc_ctrl & WM8350_RTC_ALMSTS) return -ETIMEDOUT; return 0; } static int wm8350_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) { struct wm8350 *wm8350 = dev_get_drvdata(dev); if (enabled) return wm8350_rtc_start_alarm(wm8350); else return wm8350_rtc_stop_alarm(wm8350); } static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct wm8350 *wm8350 = dev_get_drvdata(dev); struct rtc_time *tm = &alrm->time; u16 time[3]; int ret; memset(time, 0, sizeof(time)); if (tm->tm_sec != -1) time[0] |= tm->tm_sec; else time[0] |= WM8350_RTC_ALMSECS_MASK; if (tm->tm_min != -1) time[0] |= tm->tm_min << WM8350_RTC_ALMMINS_SHIFT; else time[0] |= WM8350_RTC_ALMMINS_MASK; if (tm->tm_hour != -1) time[1] |= tm->tm_hour; else time[1] |= WM8350_RTC_ALMHRS_MASK; if (tm->tm_wday != -1) time[1] |= (tm->tm_wday + 1) << WM8350_RTC_ALMDAY_SHIFT; else time[1] |= WM8350_RTC_ALMDAY_MASK; if (tm->tm_mday != -1) time[2] |= tm->tm_mday; else time[2] |= WM8350_RTC_ALMDATE_MASK; if (tm->tm_mon != -1) time[2] |= (tm->tm_mon + 1) << WM8350_RTC_ALMMTH_SHIFT; else time[2] |= WM8350_RTC_ALMMTH_MASK; ret = wm8350_rtc_stop_alarm(wm8350); if (ret < 0) return ret; /* Write time to RTC */ ret = wm8350_block_write(wm8350, WM8350_ALARM_SECONDS_MINUTES, 3, time); if (ret < 0) return ret; if (alrm->enabled) ret = wm8350_rtc_start_alarm(wm8350); return ret; } static int wm8350_rtc_update_irq_enable(struct device *dev, unsigned int enabled) { struct wm8350 *wm8350 = dev_get_drvdata(dev); /* Suppress duplicate changes since genirq nests enable and * disable calls. */ if (enabled == wm8350->rtc.update_enabled) return 0; if (enabled) wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC); else wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); wm8350->rtc.update_enabled = enabled; return 0; } static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data) { struct wm8350 *wm8350 = data; struct rtc_device *rtc = wm8350->rtc.rtc; int ret; rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); /* Make it one shot */ ret = wm8350_set_bits(wm8350, WM8350_RTC_TIME_CONTROL, WM8350_RTC_ALMSET); if (ret != 0) { dev_err(&(wm8350->rtc.pdev->dev), "Failed to disable alarm: %d\n", ret); } return IRQ_HANDLED; } static irqreturn_t wm8350_rtc_update_handler(int irq, void *data) { struct wm8350 *wm8350 = data; struct rtc_device *rtc = wm8350->rtc.rtc; rtc_update_irq(rtc, 1, RTC_IRQF | RTC_UF); return IRQ_HANDLED; } static const struct rtc_class_ops wm8350_rtc_ops = { .read_time = wm8350_rtc_readtime, .set_time = wm8350_rtc_settime, .read_alarm = wm8350_rtc_readalarm, .set_alarm = wm8350_rtc_setalarm, .alarm_irq_enable = wm8350_rtc_alarm_irq_enable, .update_irq_enable = wm8350_rtc_update_irq_enable, }; #ifdef CONFIG_PM static int wm8350_rtc_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct wm8350 *wm8350 = dev_get_drvdata(&pdev->dev); int ret = 0; u16 reg; reg = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); if (device_may_wakeup(&wm8350->rtc.pdev->dev) && reg & WM8350_RTC_ALMSTS) { ret = wm8350_rtc_stop_alarm(wm8350); if (ret != 0) dev_err(&pdev->dev, "Failed to stop RTC alarm: %d\n", ret); } return ret; } static int wm8350_rtc_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct wm8350 *wm8350 = dev_get_drvdata(&pdev->dev); int ret; if (wm8350->rtc.alarm_enabled) { ret = wm8350_rtc_start_alarm(wm8350); if (ret != 0) dev_err(&pdev->dev, "Failed to restart RTC alarm: %d\n", ret); } return 0; } #else #define wm8350_rtc_suspend NULL #define wm8350_rtc_resume NULL #endif static int wm8350_rtc_probe(struct platform_device *pdev) { struct wm8350 *wm8350 = platform_get_drvdata(pdev); struct wm8350_rtc *wm_rtc = &wm8350->rtc; int ret = 0; u16 timectl, power5; timectl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); if (timectl & WM8350_RTC_BCD) { dev_err(&pdev->dev, "RTC BCD mode not supported\n"); return -EINVAL; } if (timectl & WM8350_RTC_12HR) { dev_err(&pdev->dev, "RTC 12 hour mode not supported\n"); return -EINVAL; } /* enable the RTC if it's not already enabled */ power5 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5); if (!(power5 & WM8350_RTC_TICK_ENA)) { dev_info(wm8350->dev, "Starting RTC\n"); wm8350_reg_unlock(wm8350); ret = wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_RTC_TICK_ENA); if (ret < 0) { dev_err(&pdev->dev, "failed to enable RTC: %d\n", ret); return ret; } wm8350_reg_lock(wm8350); } if (timectl & WM8350_RTC_STS) { int retries; ret = wm8350_clear_bits(wm8350, WM8350_RTC_TIME_CONTROL, WM8350_RTC_SET); if (ret < 0) { dev_err(&pdev->dev, "failed to start: %d\n", ret); return ret; } retries = WM8350_SET_TIME_RETRIES; do { timectl = wm8350_reg_read(wm8350, WM8350_RTC_TIME_CONTROL); } while (timectl & WM8350_RTC_STS && --retries); if (retries == 0) { dev_err(&pdev->dev, "failed to start: timeout\n"); return -ENODEV; } } device_init_wakeup(&pdev->dev, 1); wm_rtc->rtc = rtc_device_register("wm8350", &pdev->dev, &wm8350_rtc_ops, THIS_MODULE); if (IS_ERR(wm_rtc->rtc)) { ret = PTR_ERR(wm_rtc->rtc); dev_err(&pdev->dev, "failed to register RTC: %d\n", ret); return ret; } wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350_rtc_update_handler, 0, "RTC Seconds", wm8350); wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC); wm8350_register_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350_rtc_alarm_handler, 0, "RTC Alarm", wm8350); return 0; } static int __devexit wm8350_rtc_remove(struct platform_device *pdev) { struct wm8350 *wm8350 = platform_get_drvdata(pdev); struct wm8350_rtc *wm_rtc = &wm8350->rtc; wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350); wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350); rtc_device_unregister(wm_rtc->rtc); return 0; } static struct dev_pm_ops wm8350_rtc_pm_ops = { .suspend = wm8350_rtc_suspend, .resume = wm8350_rtc_resume, }; static struct platform_driver wm8350_rtc_driver = { .probe = wm8350_rtc_probe, .remove = __devexit_p(wm8350_rtc_remove), .driver = { .name = "wm8350-rtc", .pm = &wm8350_rtc_pm_ops, }, }; static int __init wm8350_rtc_init(void) { return platform_driver_register(&wm8350_rtc_driver); } module_init(wm8350_rtc_init); static void __exit wm8350_rtc_exit(void) { platform_driver_unregister(&wm8350_rtc_driver); } module_exit(wm8350_rtc_exit); MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>"); MODULE_DESCRIPTION("RTC driver for the WM8350"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:wm8350-rtc"); new * vm_next->vm_start isn't page aligned and it could previously * contain an hugepage: check if we need to split an huge pmd. */ if (adjust_next > 0) { struct vm_area_struct *next = vma->vm_next; unsigned long nstart = next->vm_start; nstart += adjust_next << PAGE_SHIFT; if (nstart & ~HPAGE_PMD_MASK && (nstart & HPAGE_PMD_MASK) >= next->vm_start && (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end) split_huge_page_address(next->vm_mm, nstart); } }