/*
 *  Copyright (C) 2009  Red Hat, Inc.
 *
 *  This work is licensed under the terms of the GNU GPL, version 2. See
 *  the COPYING file in the top-level directory.
 */

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/hugetlb.h>
#include <linux/mmu_notifier.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/shrinker.h>
#include <linux/mm_inline.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/migrate.h>
#include <linux/hashtable.h>

#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"

/*
 * By default transparent hugepage support is disabled in order that avoid
 * to risk increase the memory footprint of applications without a guaranteed
 * benefit. When transparent hugepage support is enabled, is for all mappings,
 * and khugepaged scans all mappings.
 * Defrag is invoked by khugepaged hugepage allocations and by page faults
 * for all hugepage allocations.
 */
unsigned long transparent_hugepage_flags __read_mostly =
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS
	(1<<TRANSPARENT_HUGEPAGE_FLAG)|
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
#endif
	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);

/* default scan 8*512 pte (or vmas) every 30 second */
static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8;
static unsigned int khugepaged_pages_collapsed;
static unsigned int khugepaged_full_scans;
static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
/* during fragmentation poll the hugepage allocator once every minute */
static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
static struct task_struct *khugepaged_thread __read_mostly;
static DEFINE_MUTEX(khugepaged_mutex);
static DEFINE_SPINLOCK(khugepaged_mm_lock);
static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
/*
 * default collapse hugepages if there is at least one pte mapped like
 * it would have happened if the vma was large enough during page
 * fault.
 */
static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;

static int khugepaged(void *none);
static int khugepaged_slab_init(void);

#define MM_SLOTS_HASH_BITS 10
static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);

static struct kmem_cache *mm_slot_cache __read_mostly;

/**
 * struct mm_slot - hash lookup from mm to mm_slot
 * @hash: hash collision list
 * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
 * @mm: the mm that this information is valid for
 */
struct mm_slot {
	struct hlist_node hash;
	struct list_head mm_node;
	struct mm_struct *mm;
};

/**
 * struct khugepaged_scan - cursor for scanning
 * @mm_head: the head of the mm list to scan
 * @mm_slot: the current mm_slot we are scanning
 * @address: the next address inside that to be scanned
 *
 * There is only the one khugepaged_scan instance of this cursor structure.
 */
struct khugepaged_scan {
	struct list_head mm_head;
	struct mm_slot *mm_slot;
	unsigned long address;
};
static struct khugepaged_scan khugepaged_scan = {
	.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
};


static int set_recommended_min_free_kbytes(void)
{
	struct zone *zone;
	int nr_zones = 0;
	unsigned long recommended_min;

	if (!khugepaged_enabled())
		return 0;

	for_each_populated_zone(zone)
		nr_zones++;

	/* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
	recommended_min = pageblock_nr_pages * nr_zones * 2;

	/*
	 * Make sure that on average at least two pageblocks are almost free
	 * of another type, one for a migratetype to fall back to and a
	 * second to avoid subsequent fallbacks of other types There are 3
	 * MIGRATE_TYPES we care about.
	 */
	recommended_min += pageblock_nr_pages * nr_zones *
			   MIGRATE_PCPTYPES * MIGRATE_PCPTYPES;

	/* don't ever allow to reserve more than 5% of the lowmem */
	recommended_min = min(recommended_min,
			      (unsigned long) nr_free_buffer_pages() / 20);
	recommended_min <<= (PAGE_SHIFT-10);

	if (recommended_min > min_free_kbytes) {
		if (user_min_free_kbytes >= 0)
			pr_info("raising min_free_kbytes from %d to %lu "
				"to help transparent hugepage allocations\n",
				min_free_kbytes, recommended_min);

		min_free_kbytes = recommended_min;
	}
	setup_per_zone_wmarks();
	return 0;
}
late_initcall(set_recommended_min_free_kbytes);

static int start_khugepaged(void)
{
	int err = 0;
	if (khugepaged_enabled()) {
		if (!khugepaged_thread)
			khugepaged_thread = kthread_run(khugepaged, NULL,
							"khugepaged");
		if (unlikely(IS_ERR(khugepaged_thread))) {
			printk(KERN_ERR
			       "khugepaged: kthread_run(khugepaged) failed\n");
			err = PTR_ERR(khugepaged_thread);
			khugepaged_thread = NULL;
		}

		if (!list_empty(&khugepaged_scan.mm_head))
			wake_up_interruptible(&khugepaged_wait);

		set_recommended_min_free_kbytes();
	} else if (khugepaged_thread) {
		kthread_stop(khugepaged_thread);
		khugepaged_thread = NULL;
	}

	return err;
}

static atomic_t huge_zero_refcount;
static struct page *huge_zero_page __read_mostly;

static inline bool is_huge_zero_page(struct page *page)
{
	return ACCESS_ONCE(huge_zero_page) == page;
}

static inline bool is_huge_zero_pmd(pmd_t pmd)
{
	return is_huge_zero_page(pmd_page(pmd));
}

static struct page *get_huge_zero_page(void)
{
	struct page *zero_page;
retry:
	if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
		return ACCESS_ONCE(huge_zero_page);

	zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
			HPAGE_PMD_ORDER);
	if (!zero_page) {
		count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
		return NULL;
	}
	count_vm_event(THP_ZERO_PAGE_ALLOC);
	preempt_disable();
	if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
		preempt_enable();
		__free_page(zero_page);
		goto retry;
	}

	/* We take additional reference here. It will be put back by shrinker */
	atomic_set(&huge_zero_refcount, 2);
	preempt_enable();
	return ACCESS_ONCE(huge_zero_page);
}

static void put_huge_zero_page(void)
{
	/*
	 * Counter should never go to zero here. Only shrinker can put
	 * last reference.
	 */
	BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
}

static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
					struct shrink_control *sc)
{
	/* we can free zero page only if last reference remains */
	return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
}

static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
				       struct shrink_control *sc)
{
	if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
		struct page *zero_page = xchg(&huge_zero_page, NULL);
		BUG_ON(zero_page == NULL);
		__free_page(zero_page);
		return HPAGE_PMD_NR;
	}

	return 0;
}

static struct shrinker huge_zero_page_shrinker = {
	.count_objects = shrink_huge_zero_page_count,
	.scan_objects = shrink_huge_zero_page_scan,
	.seeks = DEFAULT_SEEKS,
};

#ifdef CONFIG_SYSFS

static ssize_t double_flag_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf,
				enum transparent_hugepage_flag enabled,
				enum transparent_hugepage_flag req_madv)
{
	if (test_bit(enabled, &transparent_hugepage_flags)) {
		VM_BUG_ON(test_bit(req_madv, &transparent_hugepage_flags));
		return sprintf(buf, "[always] madvise never\n");
	} else if (test_bit(req_madv, &transparent_hugepage_flags))
		return sprintf(buf, "always [madvise] never\n");
	else
		return sprintf(buf, "always madvise [never]\n");
}
static ssize_t double_flag_store(struct kobject *kobj,
				 struct kobj_attribute *attr,
				 const char *buf, size_t count,
				 enum transparent_hugepage_flag enabled,
				 enum transparent_hugepage_flag req_madv)
{
	if (!memcmp("always", buf,
		    min(sizeof("always")-1, count))) {
		set_bit(enabled, &transparent_hugepage_flags);
		clear_bit(req_madv, &transparent_hugepage_flags);
	} else if (!memcmp("madvise", buf,
			   min(sizeof("madvise")-1, count))) {
		clear_bit(enabled, &transparent_hugepage_flags);
		set_bit(req_madv, &transparent_hugepage_flags);
	} else if (!memcmp("never", buf,
			   min(sizeof("never")-1, count))) {
		clear_bit(enabled, &transparent_hugepage_flags);
		clear_bit(req_madv, &transparent_hugepage_flags);
	} else
		return -EINVAL;

	return count;
}

static ssize_t enabled_show(struct kobject *kobj,
			    struct kobj_attribute *attr, char *buf)
{
	return double_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_FLAG,
				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
}
static ssize_t enabled_store(struct kobject *kobj,
			     struct kobj_attribute *attr,
			     const char *buf, size_t count)
{
	ssize_t ret;

	ret = double_flag_store(kobj, attr, buf, count,
				TRANSPARENT_HUGEPAGE_FLAG,
				TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);

	if (ret > 0) {
		int err;

		mutex_lock(&khugepaged_mutex);
		err = start_khugepaged();
		mutex_unlock(&khugepaged_mutex);

		if (err)
			ret = err;
	}

	return ret;
}
static struct kobj_attribute enabled_attr =
	__ATTR(enabled, 0644, enabled_show, enabled_store);

static ssize_t single_flag_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf,
				enum transparent_hugepage_flag flag)
{
	return sprintf(buf, "%d\n",
		       !!test_bit(flag, &transparent_hugepage_flags));
}

static ssize_t single_flag_store(struct kobject *kobj,
				 struct kobj_attribute *attr,
				 const char *buf, size_t count,
				 enum transparent_hugepage_flag flag)
{
	unsigned long value;
	int ret;

	ret = kstrtoul(buf, 10, &value);
	if (ret < 0)
		return ret;
	if (value > 1)
		return -EINVAL;

	if (value)
		set_bit(flag, &transparent_hugepage_flags);
	else
		clear_bit(flag, &transparent_hugepage_flags);

	return count;
}

/*
 * Currently defrag only disables __GFP_NOWAIT for allocation. A blind
 * __GFP_REPEAT is too aggressive, it's never worth swapping tons of
 * memory just to allocate one more hugepage.
 */
static ssize_t defrag_show(struct kobject *kobj,
			   struct kobj_attribute *attr, char *buf)
{
	return double_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
				TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
}
static ssize_t defrag_store(struct kobject *kobj,
			    struct kobj_attribute *attr,
			    const char *buf, size_t count)
{
	return double_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
				 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
}
static struct kobj_attribute defrag_attr =
	__ATTR(defrag, 0644, defrag_show, defrag_store);

static ssize_t use_zero_page_show(struct kobject *kobj,
		struct kobj_attribute *attr, char *buf)
{
	return single_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
}
static ssize_t use_zero_page_store(struct kobject *kobj,
		struct kobj_attribute *attr, const char *buf, size_t count)
{
	return single_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
}
static struct kobj_attribute use_zero_page_attr =
	__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
#ifdef CONFIG_DEBUG_VM
static ssize_t debug_cow_show(struct kobject *kobj,
				struct kobj_attribute *attr, char *buf)
{
	return single_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
}
static ssize_t debug_cow_store(struct kobject *kobj,
			       struct kobj_attribute *attr,
			       const char *buf, size_t count)
{
	return single_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG);
}
static struct kobj_attribute debug_cow_attr =
	__ATTR(debug_cow, 0644, debug_cow_show, debug_cow_store);
#endif /* CONFIG_DEBUG_VM */

static struct attribute *hugepage_attr[] = {
	&enabled_attr.attr,
	&defrag_attr.attr,
	&use_zero_page_attr.attr,
#ifdef CONFIG_DEBUG_VM
	&debug_cow_attr.attr,
#endif
	NULL,
};

static struct attribute_group hugepage_attr_group = {
	.attrs = hugepage_attr,
};

static ssize_t scan_sleep_millisecs_show(struct kobject *kobj,
					 struct kobj_attribute *attr,
					 char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_scan_sleep_millisecs);
}

static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
					  struct kobj_attribute *attr,
					  const char *buf, size_t count)
{
	unsigned long msecs;
	int err;

	err = kstrtoul(buf, 10, &msecs);
	if (err || msecs > UINT_MAX)
		return -EINVAL;

	khugepaged_scan_sleep_millisecs = msecs;
	wake_up_interruptible(&khugepaged_wait);

	return count;
}
static struct kobj_attribute scan_sleep_millisecs_attr =
	__ATTR(scan_sleep_millisecs, 0644, scan_sleep_millisecs_show,
	       scan_sleep_millisecs_store);

static ssize_t alloc_sleep_millisecs_show(struct kobject *kobj,
					  struct kobj_attribute *attr,
					  char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_alloc_sleep_millisecs);
}

static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj,
					   struct kobj_attribute *attr,
					   const char *buf, size_t count)
{
	unsigned long msecs;
	int err;

	err = kstrtoul(buf, 10, &msecs);
	if (err || msecs > UINT_MAX)
		return -EINVAL;

	khugepaged_alloc_sleep_millisecs = msecs;
	wake_up_interruptible(&khugepaged_wait);

	return count;
}
static struct kobj_attribute alloc_sleep_millisecs_attr =
	__ATTR(alloc_sleep_millisecs, 0644, alloc_sleep_millisecs_show,
	       alloc_sleep_millisecs_store);

static ssize_t pages_to_scan_show(struct kobject *kobj,
				  struct kobj_attribute *attr,
				  char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_pages_to_scan);
}
static ssize_t pages_to_scan_store(struct kobject *kobj,
				   struct kobj_attribute *attr,
				   const char *buf, size_t count)
{
	int err;
	unsigned long pages;

	err = kstrtoul(buf, 10, &pages);
	if (err || !pages || pages > UINT_MAX)
		return -EINVAL;

	khugepaged_pages_to_scan = pages;

	return count;
}
static struct kobj_attribute pages_to_scan_attr =
	__ATTR(pages_to_scan, 0644, pages_to_scan_show,
	       pages_to_scan_store);

static ssize_t pages_collapsed_show(struct kobject *kobj,
				    struct kobj_attribute *attr,
				    char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_pages_collapsed);
}
static struct kobj_attribute pages_collapsed_attr =
	__ATTR_RO(pages_collapsed);

static ssize_t full_scans_show(struct kobject *kobj,
			       struct kobj_attribute *attr,
			       char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_full_scans);
}
static struct kobj_attribute full_scans_attr =
	__ATTR_RO(full_scans);

static ssize_t khugepaged_defrag_show(struct kobject *kobj,
				      struct kobj_attribute *attr, char *buf)
{
	return single_flag_show(kobj, attr, buf,
				TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
}
static ssize_t khugepaged_defrag_store(struct kobject *kobj,
				       struct kobj_attribute *attr,
				       const char *buf, size_t count)
{
	return single_flag_store(kobj, attr, buf, count,
				 TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG);
}
static struct kobj_attribute khugepaged_defrag_attr =
	__ATTR(defrag, 0644, khugepaged_defrag_show,
	       khugepaged_defrag_store);

/*
 * max_ptes_none controls if khugepaged should collapse hugepages over
 * any unmapped ptes in turn potentially increasing the memory
 * footprint of the vmas. When max_ptes_none is 0 khugepaged will not
 * reduce the available free memory in the system as it
 * runs. Increasing max_ptes_none will instead potentially reduce the
 * free memory in the system during the khugepaged scan.
 */
static ssize_t khugepaged_max_ptes_none_show(struct kobject *kobj,
					     struct kobj_attribute *attr,
					     char *buf)
{
	return sprintf(buf, "%u\n", khugepaged_max_ptes_none);
}
static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj,
					      struct kobj_attribute *attr,
					      const char *buf, size_t count)
{
	int err;
	unsigned long max_ptes_none;

	err = kstrtoul(buf, 10, &max_ptes_none);
	if (err || max_ptes_none > HPAGE_PMD_NR-1)
		return -EINVAL;

	khugepaged_max_ptes_none = max_ptes_none;

	return count;
}
static struct kobj_attribute khugepaged_max_ptes_none_attr =
	__ATTR(max_ptes_none, 0644, khugepaged_max_ptes_none_show,
	       khugepaged_max_ptes_none_store);

static struct attribute *khugepaged_attr[] = {
	&khugepaged_defrag_attr.attr,
	&khugepaged_max_ptes_none_attr.attr,
	&pages_to_scan_attr.attr,
	&pages_collapsed_attr.attr,
	&full_scans_attr.attr,
	&scan_sleep_millisecs_attr.attr,
	&alloc_sleep_millisecs_attr.attr,
	NULL,
};

static struct attribute_group khugepaged_attr_group = {
	.attrs = khugepaged_attr,
	.name = "khugepaged",
};

static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
	int err;

	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
	if (unlikely(!*hugepage_kobj)) {
		printk(KERN_ERR "hugepage: failed to create transparent hugepage kobject\n");
		return -ENOMEM;
	}

	err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
	if (err) {
		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
		goto delete_obj;
	}

	err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
	if (err) {
		printk(KERN_ERR "hugepage: failed to register transparent hugepage group\n");
		goto remove_hp_group;
	}

	return 0;

remove_hp_group:
	sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group);
delete_obj:
	kobject_put(*hugepage_kobj);
	return err;
}

static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
{
	sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
	sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
	kobject_put(hugepage_kobj);
}
#else
static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
	return 0;
}

static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
{
}
#endif /* CONFIG_SYSFS */

static int __init hugepage_init(void)
{
	int err;
	struct kobject *hugepage_kobj;

	if (!has_transparent_hugepage()) {
		transparent_hugepage_flags = 0;
		return -EINVAL;
	}

	err = hugepage_init_sysfs(&hugepage_kobj);
	if (err)
		return err;

	err = khugepaged_slab_init();
	if (err)
		goto out;

	register_shrinker(&huge_zero_page_shrinker);

	/*
	 * By default disable transparent hugepages on smaller systems,
	 * where the extra memory used could hurt more than TLB overhead
	 * is likely to save.  The admin can still enable it through /sys.
	 */
	if (totalram_pages < (512 << (20 - PAGE_SHIFT)))
		transparent_hugepage_flags = 0;

	start_khugepaged();

	return 0;
out:
	hugepage_exit_sysfs(hugepage_kobj);
	return err;
}
subsys_initcall(hugepage_init);

static int __init setup_transparent_hugepage(char *str)
{
	int ret = 0;
	if (!str)
		goto out;
	if (!strcmp(str, "always")) {
		set_bit(TRANSPARENT_HUGEPAGE_FLAG,
			&transparent_hugepage_flags);
		clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			  &transparent_hugepage_flags);
		ret = 1;
	} else if (!strcmp(str, "madvise")) {
		clear_bit(TRANSPARENT_HUGEPAGE_FLAG,
			  &transparent_hugepage_flags);
		set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			&transparent_hugepage_flags);
		ret = 1;
	} else if (!strcmp(str, "never")) {
		clear_bit(TRANSPARENT_HUGEPAGE_FLAG,
			  &transparent_hugepage_flags);
		clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
			  &transparent_hugepage_flags);
		ret = 1;
	}
out:
	if (!ret)
		printk(KERN_WARNING
		       "transparent_hugepage= cannot parse, ignored\n");
	return ret;
}
__setup("transparent_hugepage=", setup_transparent_hugepage);

pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
{
	if (likely(vma->vm_flags & VM_WRITE))
		pmd = pmd_mkwrite(pmd);
	return pmd;
}

static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
{
	pmd_t entry;
	entry = mk_pmd(page, prot);
	entry = pmd_mkhuge(entry);
	return entry;
}

static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
					struct vm_area_struct *vma,
					unsigned long haddr, pmd_t *pmd,
					struct page *page)
{
	pgtable_t pgtable;
	spinlock_t *ptl;

	VM_BUG_ON_PAGE(!PageCompound(page), page);
	pgtable = pte_alloc_one(mm, haddr);
	if (unlikely(!pgtable))
		return VM_FAULT_OOM;

	clear_huge_page(page, haddr, HPAGE_PMD_NR);
	/*
	 * The memory barrier inside __SetPageUptodate makes sure that
	 * clear_huge_page writes become visible before the set_pmd_at()
	 * write.
	 */
	__SetPageUptodate(page);

	ptl = pmd_lock(mm, pmd);
	if (unlikely(!pmd_none(*pmd))) {
		spin_unlock(ptl);
		mem_cgroup_uncharge_page(page);
		put_page(page);
		pte_free(mm, pgtable);
	} else {
		pmd_t entry;
		entry = mk_huge_pmd(page, vma->vm_page_prot);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		page_add_new_anon_rmap(page, vma, haddr);
		pgtable_trans_huge_deposit(mm, pmd, pgtable);
		set_pmd_at(mm, haddr, pmd, entry);
		add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
		atomic_long_inc(&mm->nr_ptes);
		spin_unlock(ptl);
	}

	return 0;
}

static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
{
	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
}

static inline struct page *alloc_hugepage_vma(int defrag,
					      struct vm_area_struct *vma,
					      unsigned long haddr, int nd,
					      gfp_t extra_gfp)
{
	return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
			       HPAGE_PMD_ORDER, vma, haddr, nd);
}

/* Caller must hold page table lock. */
static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
		struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
		struct page *zero_page)
{
	pmd_t entry;
	if (!pmd_none(*pmd))
		return false;
	entry = mk_pmd(zero_page, vma->vm_page_prot);
	entry = pmd_wrprotect(entry);
	entry = pmd_mkhuge(entry);
	pgtable_trans_huge_deposit(mm, pmd, pgtable);
	set_pmd_at(mm, haddr, pmd, entry);
	atomic_long_inc(&mm->nr_ptes);
	return true;
}

int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
			       unsigned long address, pmd_t *pmd,
			       unsigned int flags)
{
	struct page *page;
	unsigned long haddr = address & HPAGE_PMD_MASK;

	if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
		return VM_FAULT_FALLBACK;
	if (unlikely(anon_vma_prepare(vma)))
		return VM_FAULT_OOM;
	if (unlikely(khugepaged_enter(vma)))
		return VM_FAULT_OOM;
	if (!(flags & FAULT_FLAG_WRITE) &&
			transparent_hugepage_use_zero_page()) {
		spinlock_t *ptl;
		pgtable_t pgtable;
		struct page *zero_page;
		bool set;
		pgtable = pte_alloc_one(mm, haddr);
		if (unlikely(!pgtable))
			return VM_FAULT_OOM;
		zero_page = get_huge_zero_page();
		if (unlikely(!zero_page)) {
			pte_free(mm, pgtable);
			count_vm_event(THP_FAULT_FALLBACK);
			return VM_FAULT_FALLBACK;
		}
		ptl = pmd_lock(mm, pmd);
		set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
				zero_page);
		spin_unlock(ptl);
		if (!set) {
			pte_free(mm, pgtable);
			put_huge_zero_page();
		}
		return 0;
	}
	page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
			vma, haddr, numa_node_id(), 0);
	if (unlikely(!page)) {
		count_vm_event(THP_FAULT_FALLBACK);
		return VM_FAULT_FALLBACK;
	}
	if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) {
		put_page(page);
		count_vm_event(THP_FAULT_FALLBACK);
		return VM_FAULT_FALLBACK;
	}
	if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
		mem_cgroup_uncharge_page(page);
		put_page(page);
		count_vm_event(THP_FAULT_FALLBACK);
		return VM_FAULT_FALLBACK;
	}

	count_vm_event(THP_FAULT_ALLOC);
	return 0;
}

int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
		  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
		  struct vm_area_struct *vma)
{
	spinlock_t *dst_ptl, *src_ptl;
	struct page *src_page;
	pmd_t pmd;
	pgtable_t pgtable;
	int ret;

	ret = -ENOMEM;
	pgtable = pte_alloc_one(dst_mm, addr);
	if (unlikely(!pgtable))
		goto out;

	dst_ptl = pmd_lock(dst_mm, dst_pmd);
	src_ptl = pmd_lockptr(src_mm, src_pmd);
	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);

	ret = -EAGAIN;
	pmd = *src_pmd;
	if (unlikely(!pmd_trans_huge(pmd))) {
		pte_free(dst_mm, pgtable);
		goto out_unlock;
	}
	/*
	 * When page table lock is held, the huge zero pmd should not be
	 * under splitting since we don't split the page itself, only pmd to
	 * a page table.
	 */
	if (is_huge_zero_pmd(pmd)) {
		struct page *zero_page;
		bool set;
		/*
		 * get_huge_zero_page() will never allocate a new page here,
		 * since we already have a zero page to copy. It just takes a
		 * reference.
		 */
		zero_page = get_huge_zero_page();
		set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
				zero_page);
		BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
		ret = 0;
		goto out_unlock;
	}

	if (unlikely(pmd_trans_splitting(pmd))) {
		/* split huge page running from under us */
		spin_unlock(src_ptl);
		spin_unlock(dst_ptl);
		pte_free(dst_mm, pgtable);

		wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
		goto out;
	}
	src_page = pmd_page(pmd);
	VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
	get_page(src_page);
	page_dup_rmap(src_page);
	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);

	pmdp_set_wrprotect(src_mm, addr, src_pmd);
	pmd = pmd_mkold(pmd_wrprotect(pmd));
	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
	set_pmd_at(dst_mm, addr, dst_pmd, pmd);
	atomic_long_inc(&dst_mm->nr_ptes);

	ret = 0;
out_unlock:
	spin_unlock(src_ptl);
	spin_unlock(dst_ptl);
out:
	return ret;
}

void huge_pmd_set_accessed(struct mm_struct *mm,
			   struct vm_area_struct *vma,
			   unsigned long address,
			   pmd_t *pmd, pmd_t orig_pmd,
			   int dirty)
{
	spinlock_t *ptl;
	pmd_t entry;
	unsigned long haddr;

	ptl = pmd_lock(mm, pmd);
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
		goto unlock;

	entry = pmd_mkyoung(orig_pmd);
	haddr = address & HPAGE_PMD_MASK;
	if (pmdp_set_access_flags(vma, haddr, pmd, entry, dirty))
		update_mmu_cache_pmd(vma, address, pmd);

unlock:
	spin_unlock(ptl);
}

static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
					struct vm_area_struct *vma,
					unsigned long address,
					pmd_t *pmd, pmd_t orig_pmd,
					struct page *page,
					unsigned long haddr)
{
	spinlock_t *ptl;
	pgtable_t pgtable;
	pmd_t _pmd;
	int ret = 0, i;
	struct page **pages;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */

	pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR,
			GFP_KERNEL);
	if (unlikely(!pages)) {
		ret |= VM_FAULT_OOM;
		goto out;
	}

	for (i = 0; i < HPAGE_PMD_NR; i++) {
		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
					       __GFP_OTHER_NODE,
					       vma, address, page_to_nid(page));
		if (unlikely(!pages[i] ||
			     mem_cgroup_charge_anon(pages[i], mm,
						       GFP_KERNEL))) {
			if (pages[i])
				put_page(pages[i]);
			mem_cgroup_uncharge_start();
			while (--i >= 0) {
				mem_cgroup_uncharge_page(pages[i]);
				put_page(pages[i]);
			}
			mem_cgroup_uncharge_end();
			kfree(pages);
			ret |= VM_FAULT_OOM;
			goto out;
		}
	}

	for (i = 0; i < HPAGE_PMD_NR; i++) {
		copy_user_highpage(pages[i], page + i,
				   haddr + PAGE_SIZE * i, vma);
		__SetPageUptodate(pages[i]);
		cond_resched();
	}

	mmun_start = haddr;
	mmun_end   = haddr + HPAGE_PMD_SIZE;
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);

	ptl = pmd_lock(mm, pmd);
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
		goto out_free_pages;
	VM_BUG_ON_PAGE(!PageHead(page), page);

	pmdp_clear_flush(vma, haddr, pmd);
	/* leave pmd empty until pte is filled */

	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
	pmd_populate(mm, &_pmd, pgtable);

	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
		pte_t *pte, entry;
		entry = mk_pte(pages[i], vma->vm_page_prot);
		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
		page_add_new_anon_rmap(pages[i], vma, haddr);
		pte = pte_offset_map(&_pmd, haddr);
		VM_BUG_ON(!pte_none(*pte));
		set_pte_at(mm, haddr, pte, entry);
		pte_unmap(pte);
	}
	kfree(pages);

	smp_wmb(); /* make pte visible before pmd */
	pmd_populate(mm, pmd, pgtable);
	page_remove_rmap(page);
	spin_unlock(ptl);

	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);

	ret |= VM_FAULT_WRITE;
	put_page(page);

out:
	return ret;

out_free_pages:
	spin_unlock(ptl);
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
	mem_cgroup_uncharge_start();
	for (i = 0; i < HPAGE_PMD_NR; i++) {
		mem_cgroup_uncharge_page(pages[i]);
		put_page(pages[i]);
	}
	mem_cgroup_uncharge_end();
	kfree(pages);
	goto out;
}

int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
			unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
{
	spinlock_t *ptl;
	int ret = 0;
	struct page *page = NULL, *new_page;
	unsigned long haddr;
	unsigned long mmun_start;	/* For mmu_notifiers */
	unsigned long mmun_end;		/* For mmu_notifiers */

	ptl = pmd_lockptr(mm, pmd);
	VM_BUG_ON(!vma->anon_vma);
	haddr = address & HPAGE_PMD_MASK;
	if (is_huge_zero_pmd(orig_pmd))
		goto alloc;
	spin_lock(ptl);
	if (unlikely(!pmd_same(*pmd, orig_pmd)))
		goto out_unlock;

	page = pmd_page(orig_pmd);
	VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
	if (page_mapcount(page) == 1) {
		pmd_t entry;
		entry = pmd_mkyoung(orig_pmd);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
			update_mmu_cache_pmd(vma, address, pmd);
		ret |= VM_FAULT_WRITE;
		goto out_unlock;
	}
	get_page(page);
	spin_unlock(ptl);
alloc:
	if (transparent_hugepage_enabled(vma) &&
	    !transparent_hugepage_debug_cow())
		new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
					      vma, haddr, numa_node_id(), 0);
	else
		new_page = NULL;

	if (unlikely(!new_page)) {
		if (!page) {
			split_huge_page_pmd(vma, address, pmd);
			ret |= VM_FAULT_FALLBACK;
		} else {
			ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
					pmd, orig_pmd, page, haddr);
			if (ret & VM_FAULT_OOM) {
				split_huge_page(page);
				ret |= VM_FAULT_FALLBACK;
			}
			put_page(page);
		}
		count_vm_event(THP_FAULT_FALLBACK);
		goto out;
	}

	if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) {
		put_page(new_page);
		if (page) {
			split_huge_page(page);
			put_page(page);
		} else
			split_huge_page_pmd(vma, address, pmd);
		ret |= VM_FAULT_FALLBACK;
		count_vm_event(THP_FAULT_FALLBACK);
		goto out;
	}

	count_vm_event(THP_FAULT_ALLOC);

	if (!page)
		clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
	else
		copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
	__SetPageUptodate(new_page);

	mmun_start = haddr;
	mmun_end   = haddr + HPAGE_PMD_SIZE;
	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);

	spin_lock(ptl);
	if (page)
		put_page(page);
	if (unlikely(!pmd_same(*pmd, orig_pmd))) {
		spin_unlock(ptl);
		mem_cgroup_uncharge_page(new_page);
		put_page(new_page);
		goto out_mn;
	} else {
		pmd_t entry;
		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		pmdp_clear_flush(vma, haddr, pmd);
		page_add_new_anon_rmap(new_page, vma, haddr);
		set_pmd_at(mm, haddr, pmd, entry);
		update_mmu_cache_pmd(vma, address, pmd);
		if (!page) {
			add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
			put_huge_zero_page();
		} else {
			VM_BUG_ON_PAGE(!PageHead(page), page);
			page_remove_rmap(page);
			put_page(page);
		}
		ret |= VM_FAULT_WRITE;
	}
	spin_unlock(ptl);
out_mn:
	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out:
	return ret;
out_unlock:
	spin_unlock(ptl);
	return ret;
}

struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
				   unsigned long addr,
				   pmd_t *pmd,
				   unsigned int flags)
{
	struct mm_struct *mm = vma->vm_mm;
	struct page *page = NULL;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	if (flags & FOLL_WRITE && !pmd_write(*pmd))
		goto out;

	/* Avoid dumping huge zero page */
	if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
		return ERR_PTR(-EFAULT);

	/* Full NUMA hinting faults to serialise migration in fault paths */
	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
		goto out;

	page = pmd_page(*pmd);
	VM_BUG_ON_PAGE(!PageHead(page), page);
	if (flags & FOLL_TOUCH) {
		pmd_t _pmd;
		/*
		 * We should set the dirty bit only for FOLL_WRITE but
		 * for now the dirty bit in the pmd is meaningless.
		 * And if the dirty bit will become meaningful and
		 * we'll only set it with FOLL_WRITE, an atomic
		 * set_bit will be required on the pmd to set the
		 * young bit, instead of the current set_pmd_at.
		 */
		_pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
		if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
					  pmd, _pmd,  1))
			update_mmu_cache_pmd(vma, addr, pmd);
	}
	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
		if (page->mapping && trylock_page(page)) {
			lru_add_drain();
			if (page->mapping)
				mlock_vma_page(page);
			unlock_page(page);
		}
	}
	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
	VM_BUG_ON_PAGE(!PageCompound(page), page);
	if (flags & FOLL_GET)
		get_page_foll(page);

out:
	return page;
}

/* NUMA hinting page fault entry point for trans huge pmds */
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
				unsigned long addr, pmd_t pmd, pmd_t *pmdp)
{
	spinlock_t *ptl;
	struct anon_vma *anon_vma = NULL;
	struct page *page;
	unsigned long haddr = addr & HPAGE_PMD_MASK;
	int page_nid = -1, this_nid = numa_node_id();
	int target_nid, last_cpupid = -1;
	bool page_locked;
	bool migrated = false;
	int flags = 0;

	ptl = pmd_lock(mm, pmdp);
	if (unlikely(!pmd_same(pmd, *pmdp)))
		goto out_unlock;

	/*
	 * If there are potential migrations, wait for completion and retry
	 * without disrupting NUMA hinting information. Do not relock and
	 * check_same as the page may no longer be mapped.
	 */
	if (unlikely(pmd_trans_migrating(*pmdp))) {
		spin_unlock(ptl);
		wait_migrate_huge_page(vma->anon_vma, pmdp);
		goto out;
	}

	page = pmd_page(pmd);
	BUG_ON(is_huge_zero_page(page));
	page_nid = page_to_nid(page);
	last_cpupid = page_cpupid_last(page);
	count_vm_numa_event(NUMA_HINT_FAULTS);
	if (page_nid == this_nid) {
		count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
		flags |= TNF_FAULT_LOCAL;
	}

	/*
	 * Avoid grouping on DSO/COW pages in specific and RO pages
	 * in general, RO pages shouldn't hurt as much anyway since
	 * they can be in shared cache state.
	 */
	if (!pmd_write(pmd))
		flags |= TNF_NO_GROUP;

	/*
	 * Acquire the page lock to serialise THP migrations but avoid dropping
	 * page_table_lock if at all possible
	 */
	page_locked = trylock_page(page);
	target_nid = mpol_misplaced(page, vma, haddr);
	if (target_nid == -1) {
		/* If the page was locked, there are no parallel migrations */
		if (page_locked)
			goto clear_pmdnuma;
	}

	/* Migration could have started since the pmd_trans_migrating check */
	if (!page_locked) {
		spin_unlock(ptl);
		wait_on_page_locked(page);
		page_nid = -1;
		goto out;
	}

	/*
	 * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
	 * to serialises splits
	 */
	get_page(page);
	spin_unlock(ptl);
	anon_vma = page_lock_anon_vma_read(page);

	/* Confirm the PMD did not change while page_table_lock was released */
	spin_lock(ptl);
	if (unlikely(!pmd_same(pmd, *pmdp))) {
		unlock_page(page);
		put_page(page);
		page_nid = -1;
		goto out_unlock;
	}

	/* Bail if we fail to protect against THP splits for any reason */
	if (unlikely(!anon_vma)) {
		put_page(page);
		page_nid = -1;
		goto clear_pmdnuma;
	}

	/*
	 * Migrate the THP to the requested node, returns with page unlocked
	 * and pmd_numa cleared.
	 */
	spin_unlock(ptl);
	migrated = migrate_misplaced_transhuge_page(mm, vma,
				pmdp, pmd, addr, page, target_nid);
	if (migrated) {
		flags |= TNF_MIGRATED;
		page_nid = target_nid;
	}

	goto out;
clear_pmdnuma:
	BUG_ON(!PageLocked(page));
	pmd = pmd_mknonnuma(pmd);
	set_pmd_at(mm, haddr, pmdp, pmd);
	VM_BUG_ON(pmd_numa(*pmdp));
	update_mmu_cache_pmd(vma, addr, pmdp);
	unlock_page(page);
out_unlock:
	spin_unlock(ptl);

out:
	if (anon_vma)
		page_unlock_anon_vma_read(anon_vma);

	if (page_nid != -1)
		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);

	return 0;
}

int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
		 pmd_t *pmd, unsigned long addr)
{
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		struct page *page;
		pgtable_t pgtable;
		pmd_t orig_pmd;
		/*
		 * For architectures like ppc64 we look at deposited pgtable
		 * when calling pmdp_get_and_clear. So do the
		 * pgtable_trans_huge_withdraw after finishing pmdp related
		 * operations.
		 */
		orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd);
		tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
		pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
		if (is_huge_zero_pmd(orig_pmd)) {
			atomic_long_dec(&tlb->mm->nr_ptes);
			spin_unlock(ptl);
			put_huge_zero_page();
		} else {
			page = pmd_page(orig_pmd);
			page_remove_rmap(page);
			VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
			VM_BUG_ON_PAGE(!PageHead(page), page);
			atomic_long_dec(&tlb->mm->nr_ptes);
			spin_unlock(ptl);
			tlb_remove_page(tlb, page);
		}
		pte_free(tlb->mm, pgtable);
		ret = 1;
	}
	return ret;
}

int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, unsigned long end,
		unsigned char *vec)
{
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		/*
		 * All logical pages in the range are present
		 * if backed by a huge page.
		 */
		spin_unlock(ptl);
		memset(vec, 1, (end - addr) >> PAGE_SHIFT);
		ret = 1;
	}

	return ret;
}

int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
		  unsigned long old_addr,
		  unsigned long new_addr, unsigned long old_end,
		  pmd_t *old_pmd, pmd_t *new_pmd)
{
	spinlock_t *old_ptl, *new_ptl;
	int ret = 0;
	pmd_t pmd;

	struct mm_struct *mm = vma->vm_mm;

	if ((old_addr & ~HPAGE_PMD_MASK) ||
	    (new_addr & ~HPAGE_PMD_MASK) ||
	    old_end - old_addr < HPAGE_PMD_SIZE ||
	    (new_vma->vm_flags & VM_NOHUGEPAGE))
		goto out;

	/*
	 * The destination pmd shouldn't be established, free_pgtables()
	 * should have release it.
	 */
	if (WARN_ON(!pmd_none(*new_pmd))) {
		VM_BUG_ON(pmd_trans_huge(*new_pmd));
		goto out;
	}

	/*
	 * We don't have to worry about the ordering of src and dst
	 * ptlocks because exclusive mmap_sem prevents deadlock.
	 */
	ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl);
	if (ret == 1) {
		new_ptl = pmd_lockptr(mm, new_pmd);
		if (new_ptl != old_ptl)
			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
		pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
		VM_BUG_ON(!pmd_none(*new_pmd));

		if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
			pgtable_t pgtable;
			pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
		}
		set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
		if (new_ptl != old_ptl)
			spin_unlock(new_ptl);
		spin_unlock(old_ptl);
	}
out:
	return ret;
}

/*
 * Returns
 *  - 0 if PMD could not be locked
 *  - 1 if PMD was locked but protections unchange and TLB flush unnecessary
 *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
 */
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, pgprot_t newprot, int prot_numa)
{
	struct mm_struct *mm = vma->vm_mm;
	spinlock_t *ptl;
	int ret = 0;

	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
		pmd_t entry;
		ret = 1;
		if (!prot_numa) {
			entry = pmdp_get_and_clear(mm, addr, pmd);
			if (pmd_numa(entry))
				entry = pmd_mknonnuma(entry);
			entry = pmd_modify(entry, newprot);
			ret = HPAGE_PMD_NR;
			set_pmd_at(mm, addr, pmd, entry);
			BUG_ON(pmd_write(entry));
		} else {
			struct page *page = pmd_page(*pmd);

			/*
			 * Do not trap faults against the zero page. The
			 * read-only data is likely to be read-cached on the
			 * local CPU cache and it is less useful to know about
			 * local vs remote hits on the zero page.
			 */
			if (!is_huge_zero_page(page) &&
			    !pmd_numa(*pmd)) {
				pmdp_set_numa(mm, addr, pmd);
				ret = HPAGE_PMD_NR;
			}
		}
		spin_unlock(ptl);
	}

	return ret;
}

/*
 * Returns 1 if a given pmd maps a stable (not under splitting) thp.
 * Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
 *
 * Note that if it returns 1, this routine returns without unlocking page
 * table locks. So callers must unlock them.
 */
int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
		spinlock_t **ptl)
{
	*ptl = pmd_lock(vma->vm_mm, pmd);
	if (likely(pmd_trans_huge(*pmd))) {
		if (unlikely(pmd_trans_splitting(*pmd))) {
			spin_unlock(*ptl);
			wait_split_huge_page(vma->anon_vma, pmd);
			return -1;
		} else {
			/* Thp mapped by 'pmd' is stable, so we can
			 * handle it as it is. */
			return 1;
		}
	}
	spin_unlock(*ptl);
	return 0;
}

/*
 * This function returns whether a given @page is mapped onto the @address
 * in the virtual space of @mm.
 *
 * When it's true, this function returns *pmd with holding the page table lock
 * and passing it back to the caller via @ptl.
 * If it's false, returns NULL without holding the page table lock.
 */
pmd_t *page_check_address_pmd(struct page *page,
			      struct mm_struct *mm,
			      unsigned long address,
			      enum page_check_address_pmd_flag flag,
			      spinlock_t **ptl)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (address & ~HPAGE_PMD_MASK)
		return NULL;

	pgd = pgd_offset(mm, address);
	if (!pgd_present(*pgd))
		return NULL;
	pud = pud_offset(pgd, address);
	if (!pud_present(*pud))
		return NULL;
	pmd = pmd_offset(pud, address);

	*ptl = pmd_lock(mm, pmd);
	if (!pmd_present(*pmd))
		goto unlock;
	if (pmd_page(*pmd) != page)
		goto unlock;
	/*
	 * split_vma() may create temporary aliased mappings. There is
	 * no risk as long as all huge pmd are found and have their
	 * splitting bit set before __split_huge_page_refcount
	 * runs. Finding the same huge pmd more than once during the
	 * same rmap walk is not a problem.
	 */
	if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG &&
	    pmd_trans_splitting(*pmd))
		goto unlock;
	if (pmd_trans_huge(*pmd)) {
		VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG &&
			  !pmd_trans_splitt</span>
		tm<span class="hl opt">-&gt;</span>tm_mday <span class="hl opt">= -</span><span class="hl num">1</span><span class="hl opt">;</span>

	tm<span class="hl opt">-&gt;</span>tm_year <span class="hl opt">= -</span><span class="hl num">1</span><span class="hl opt">;</span>

	alrm<span class="hl opt">-&gt;</span>enabled <span class="hl opt">= !(</span>time<span class="hl opt">[</span><span class="hl num">3</span><span class="hl opt">] &amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">);</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_stop_alarm</span><span class="hl opt">(</span><span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">int</span> retries <span class="hl opt">=</span> WM8350_SET_ALM_RETRIES<span class="hl opt">;</span>
	u16 rtc_ctrl<span class="hl opt">;</span>
	<span class="hl kwb">int</span> ret<span class="hl opt">;</span>

	<span class="hl com">/* Set RTC_SET to stop the clock */</span>
	ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_set_bits</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">,</span>
			      WM8350_RTC_ALMSET<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">)</span>
		<span class="hl kwa">return</span> ret<span class="hl opt">;</span>

	<span class="hl com">/* Wait until confirmation of stopping */</span>
	<span class="hl kwa">do</span> <span class="hl opt">{</span>
		rtc_ctrl <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">);</span>
		<span class="hl kwd">schedule_timeout_uninterruptible</span><span class="hl opt">(</span><span class="hl kwd">msecs_to_jiffies</span><span class="hl opt">(</span><span class="hl num">1</span><span class="hl opt">));</span>
	<span class="hl opt">}</span> <span class="hl kwa">while</span> <span class="hl opt">(</span>retries<span class="hl opt">-- &amp;&amp; !(</span>rtc_ctrl <span class="hl opt">&amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">));</span>

	<span class="hl kwa">if</span> <span class="hl opt">(!(</span>rtc_ctrl <span class="hl opt">&amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">))</span>
		<span class="hl kwa">return</span> <span class="hl opt">-</span>ETIMEDOUT<span class="hl opt">;</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_start_alarm</span><span class="hl opt">(</span><span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">int</span> ret<span class="hl opt">;</span>
	<span class="hl kwb">int</span> retries <span class="hl opt">=</span> WM8350_SET_ALM_RETRIES<span class="hl opt">;</span>
	u16 rtc_ctrl<span class="hl opt">;</span>

	ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_clear_bits</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">,</span>
				WM8350_RTC_ALMSET<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">)</span>
		<span class="hl kwa">return</span> ret<span class="hl opt">;</span>

	<span class="hl com">/* Wait until confirmation */</span>
	<span class="hl kwa">do</span> <span class="hl opt">{</span>
		rtc_ctrl <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">);</span>
		<span class="hl kwd">schedule_timeout_uninterruptible</span><span class="hl opt">(</span><span class="hl kwd">msecs_to_jiffies</span><span class="hl opt">(</span><span class="hl num">1</span><span class="hl opt">));</span>
	<span class="hl opt">}</span> <span class="hl kwa">while</span> <span class="hl opt">(</span>retries<span class="hl opt">-- &amp;&amp;</span> rtc_ctrl <span class="hl opt">&amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">);</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>rtc_ctrl <span class="hl opt">&amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">)</span>
		<span class="hl kwa">return</span> <span class="hl opt">-</span>ETIMEDOUT<span class="hl opt">;</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_alarm_irq_enable</span><span class="hl opt">(</span><span class="hl kwb">struct</span> device <span class="hl opt">*</span>dev<span class="hl opt">,</span>
				       <span class="hl kwb">unsigned int</span> enabled<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">dev_get_drvdata</span><span class="hl opt">(</span>dev<span class="hl opt">);</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>enabled<span class="hl opt">)</span>
		<span class="hl kwa">return</span> <span class="hl kwd">wm8350_rtc_start_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
	<span class="hl kwa">else</span>
		<span class="hl kwa">return</span> <span class="hl kwd">wm8350_rtc_stop_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_setalarm</span><span class="hl opt">(</span><span class="hl kwb">struct</span> device <span class="hl opt">*</span>dev<span class="hl opt">,</span> <span class="hl kwb">struct</span> rtc_wkalrm <span class="hl opt">*</span>alrm<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">dev_get_drvdata</span><span class="hl opt">(</span>dev<span class="hl opt">);</span>
	<span class="hl kwb">struct</span> rtc_time <span class="hl opt">*</span>tm <span class="hl opt">= &amp;</span>alrm<span class="hl opt">-&gt;</span>time<span class="hl opt">;</span>
	u16 time<span class="hl opt">[</span><span class="hl num">3</span><span class="hl opt">];</span>
	<span class="hl kwb">int</span> ret<span class="hl opt">;</span>

	<span class="hl kwd">memset</span><span class="hl opt">(</span>time<span class="hl opt">,</span> <span class="hl num">0</span><span class="hl opt">,</span> <span class="hl kwa">sizeof</span><span class="hl opt">(</span>time<span class="hl opt">));</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_sec <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">0</span><span class="hl opt">] |=</span> tm<span class="hl opt">-&gt;</span>tm_sec<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">0</span><span class="hl opt">] |=</span> WM8350_RTC_ALMSECS_MASK<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_min <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">0</span><span class="hl opt">] |=</span> tm<span class="hl opt">-&gt;</span>tm_min <span class="hl opt">&lt;&lt;</span> WM8350_RTC_ALMMINS_SHIFT<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">0</span><span class="hl opt">] |=</span> WM8350_RTC_ALMMINS_MASK<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_hour <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">1</span><span class="hl opt">] |=</span> tm<span class="hl opt">-&gt;</span>tm_hour<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">1</span><span class="hl opt">] |=</span> WM8350_RTC_ALMHRS_MASK<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_wday <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">1</span><span class="hl opt">] |= (</span>tm<span class="hl opt">-&gt;</span>tm_wday <span class="hl opt">+</span> <span class="hl num">1</span><span class="hl opt">) &lt;&lt;</span> WM8350_RTC_ALMDAY_SHIFT<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">1</span><span class="hl opt">] |=</span> WM8350_RTC_ALMDAY_MASK<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_mday <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">2</span><span class="hl opt">] |=</span> tm<span class="hl opt">-&gt;</span>tm_mday<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">2</span><span class="hl opt">] |=</span> WM8350_RTC_ALMDATE_MASK<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>tm<span class="hl opt">-&gt;</span>tm_mon <span class="hl opt">!= -</span><span class="hl num">1</span><span class="hl opt">)</span>
		time<span class="hl opt">[</span><span class="hl num">2</span><span class="hl opt">] |= (</span>tm<span class="hl opt">-&gt;</span>tm_mon <span class="hl opt">+</span> <span class="hl num">1</span><span class="hl opt">) &lt;&lt;</span> WM8350_RTC_ALMMTH_SHIFT<span class="hl opt">;</span>
	<span class="hl kwa">else</span>
		time<span class="hl opt">[</span><span class="hl num">2</span><span class="hl opt">] |=</span> WM8350_RTC_ALMMTH_MASK<span class="hl opt">;</span>

	ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_rtc_stop_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">)</span>
		<span class="hl kwa">return</span> ret<span class="hl opt">;</span>

	<span class="hl com">/* Write time to RTC */</span>
	ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_block_write</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_ALARM_SECONDS_MINUTES<span class="hl opt">,</span>
				 <span class="hl num">3</span><span class="hl opt">,</span> time<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">)</span>
		<span class="hl kwa">return</span> ret<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>alrm<span class="hl opt">-&gt;</span>enabled<span class="hl opt">)</span>
		ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_rtc_start_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>

	<span class="hl kwa">return</span> ret<span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_update_irq_enable</span><span class="hl opt">(</span><span class="hl kwb">struct</span> device <span class="hl opt">*</span>dev<span class="hl opt">,</span>
					<span class="hl kwb">unsigned int</span> enabled<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">dev_get_drvdata</span><span class="hl opt">(</span>dev<span class="hl opt">);</span>

	<span class="hl com">/* Suppress duplicate changes since genirq nests enable and</span>
<span class="hl com">	 * disable calls. */</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>enabled <span class="hl opt">==</span> wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>update_enabled<span class="hl opt">)</span>
		<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>enabled<span class="hl opt">)</span>
		<span class="hl kwd">wm8350_unmask_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_SEC<span class="hl opt">);</span>
	<span class="hl kwa">else</span>
		<span class="hl kwd">wm8350_mask_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_SEC<span class="hl opt">);</span>

	wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>update_enabled <span class="hl opt">=</span> enabled<span class="hl opt">;</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static</span> irqreturn_t <span class="hl kwd">wm8350_rtc_alarm_handler</span><span class="hl opt">(</span><span class="hl kwb">int</span> irq<span class="hl opt">,</span> <span class="hl kwb">void</span> <span class="hl opt">*</span>data<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> data<span class="hl opt">;</span>
	<span class="hl kwb">struct</span> rtc_device <span class="hl opt">*</span>rtc <span class="hl opt">=</span> wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>rtc<span class="hl opt">;</span>
	<span class="hl kwb">int</span> ret<span class="hl opt">;</span>

	<span class="hl kwd">rtc_update_irq</span><span class="hl opt">(</span>rtc<span class="hl opt">,</span> <span class="hl num">1</span><span class="hl opt">,</span> RTC_IRQF <span class="hl opt">|</span> RTC_AF<span class="hl opt">);</span>

	<span class="hl com">/* Make it one shot */</span>
	ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_set_bits</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">,</span>
			      WM8350_RTC_ALMSET<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">!=</span> <span class="hl num">0</span><span class="hl opt">) {</span>
		<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;(</span>wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">),</span>
			<span class="hl str">&quot;Failed to disable alarm: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span> ret<span class="hl opt">);</span>
	<span class="hl opt">}</span>

	<span class="hl kwa">return</span> IRQ_HANDLED<span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static</span> irqreturn_t <span class="hl kwd">wm8350_rtc_update_handler</span><span class="hl opt">(</span><span class="hl kwb">int</span> irq<span class="hl opt">,</span> <span class="hl kwb">void</span> <span class="hl opt">*</span>data<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> data<span class="hl opt">;</span>
	<span class="hl kwb">struct</span> rtc_device <span class="hl opt">*</span>rtc <span class="hl opt">=</span> wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>rtc<span class="hl opt">;</span>

	<span class="hl kwd">rtc_update_irq</span><span class="hl opt">(</span>rtc<span class="hl opt">,</span> <span class="hl num">1</span><span class="hl opt">,</span> RTC_IRQF <span class="hl opt">|</span> RTC_UF<span class="hl opt">);</span>

	<span class="hl kwa">return</span> IRQ_HANDLED<span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static const struct</span> rtc_class_ops wm8350_rtc_ops <span class="hl opt">= {</span>
	<span class="hl opt">.</span>read_time <span class="hl opt">=</span> wm8350_rtc_readtime<span class="hl opt">,</span>
	<span class="hl opt">.</span>set_time <span class="hl opt">=</span> wm8350_rtc_settime<span class="hl opt">,</span>
	<span class="hl opt">.</span>read_alarm <span class="hl opt">=</span> wm8350_rtc_readalarm<span class="hl opt">,</span>
	<span class="hl opt">.</span>set_alarm <span class="hl opt">=</span> wm8350_rtc_setalarm<span class="hl opt">,</span>
	<span class="hl opt">.</span>alarm_irq_enable <span class="hl opt">=</span> wm8350_rtc_alarm_irq_enable<span class="hl opt">,</span>
	<span class="hl opt">.</span>update_irq_enable <span class="hl opt">=</span> wm8350_rtc_update_irq_enable<span class="hl opt">,</span>
<span class="hl opt">};</span>

<span class="hl ppc">#ifdef CONFIG_PM</span>
<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_suspend</span><span class="hl opt">(</span><span class="hl kwb">struct</span> device <span class="hl opt">*</span>dev<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> platform_device <span class="hl opt">*</span>pdev <span class="hl opt">=</span> <span class="hl kwd">to_platform_device</span><span class="hl opt">(</span>dev<span class="hl opt">);</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">dev_get_drvdata</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">);</span>
	<span class="hl kwb">int</span> ret <span class="hl opt">=</span> <span class="hl num">0</span><span class="hl opt">;</span>
	u16 reg<span class="hl opt">;</span>

	reg <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">);</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span><span class="hl kwd">device_may_wakeup</span><span class="hl opt">(&amp;</span>wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">) &amp;&amp;</span>
	    reg <span class="hl opt">&amp;</span> WM8350_RTC_ALMSTS<span class="hl opt">) {</span>
		ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_rtc_stop_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
		<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">!=</span> <span class="hl num">0</span><span class="hl opt">)</span>
			<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;Failed to stop RTC alarm: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span>
				ret<span class="hl opt">);</span>
	<span class="hl opt">}</span>

	<span class="hl kwa">return</span> ret<span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_resume</span><span class="hl opt">(</span><span class="hl kwb">struct</span> device <span class="hl opt">*</span>dev<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> platform_device <span class="hl opt">*</span>pdev <span class="hl opt">=</span> <span class="hl kwd">to_platform_device</span><span class="hl opt">(</span>dev<span class="hl opt">);</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">dev_get_drvdata</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">);</span>
	<span class="hl kwb">int</span> ret<span class="hl opt">;</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">.</span>alarm_enabled<span class="hl opt">) {</span>
		ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_rtc_start_alarm</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
		<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">!=</span> <span class="hl num">0</span><span class="hl opt">)</span>
			<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span>
				<span class="hl str">&quot;Failed to restart RTC alarm: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span> ret<span class="hl opt">);</span>
	<span class="hl opt">}</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl ppc">#else</span>
<span class="hl ppc">#define wm8350_rtc_suspend NULL</span>
<span class="hl ppc">#define wm8350_rtc_resume NULL</span>
<span class="hl ppc">#endif</span>

<span class="hl kwb">static int</span> <span class="hl kwd">wm8350_rtc_probe</span><span class="hl opt">(</span><span class="hl kwb">struct</span> platform_device <span class="hl opt">*</span>pdev<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">platform_get_drvdata</span><span class="hl opt">(</span>pdev<span class="hl opt">);</span>
	<span class="hl kwb">struct</span> wm8350_rtc <span class="hl opt">*</span>wm_rtc <span class="hl opt">= &amp;</span>wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">;</span>
	<span class="hl kwb">int</span> ret <span class="hl opt">=</span> <span class="hl num">0</span><span class="hl opt">;</span>
	u16 timectl<span class="hl opt">,</span> power5<span class="hl opt">;</span>

	timectl <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>timectl <span class="hl opt">&amp;</span> WM8350_RTC_BCD<span class="hl opt">) {</span>
		<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;RTC BCD mode not supported</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">);</span>
		<span class="hl kwa">return</span> <span class="hl opt">-</span>EINVAL<span class="hl opt">;</span>
	<span class="hl opt">}</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span>timectl <span class="hl opt">&amp;</span> WM8350_RTC_12HR<span class="hl opt">) {</span>
		<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;RTC 12 hour mode not supported</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">);</span>
		<span class="hl kwa">return</span> <span class="hl opt">-</span>EINVAL<span class="hl opt">;</span>
	<span class="hl opt">}</span>

	<span class="hl com">/* enable the RTC if it&apos;s not already enabled */</span>
	power5 <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_POWER_MGMT_5<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(!(</span>power5 <span class="hl opt">&amp;</span>  WM8350_RTC_TICK_ENA<span class="hl opt">)) {</span>
		<span class="hl kwd">dev_info</span><span class="hl opt">(</span>wm8350<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;Starting RTC</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">);</span>

		<span class="hl kwd">wm8350_reg_unlock</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>

		ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_set_bits</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_POWER_MGMT_5<span class="hl opt">,</span>
				      WM8350_RTC_TICK_ENA<span class="hl opt">);</span>
		<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">) {</span>
			<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;failed to enable RTC: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span> ret<span class="hl opt">);</span>
			<span class="hl kwa">return</span> ret<span class="hl opt">;</span>
		<span class="hl opt">}</span>

		<span class="hl kwd">wm8350_reg_lock</span><span class="hl opt">(</span>wm8350<span class="hl opt">);</span>
	<span class="hl opt">}</span>

	<span class="hl kwa">if</span> <span class="hl opt">(</span>timectl <span class="hl opt">&amp;</span> WM8350_RTC_STS<span class="hl opt">) {</span>
		<span class="hl kwb">int</span> retries<span class="hl opt">;</span>

		ret <span class="hl opt">=</span> <span class="hl kwd">wm8350_clear_bits</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_RTC_TIME_CONTROL<span class="hl opt">,</span>
					WM8350_RTC_SET<span class="hl opt">);</span>
		<span class="hl kwa">if</span> <span class="hl opt">(</span>ret <span class="hl opt">&lt;</span> <span class="hl num">0</span><span class="hl opt">) {</span>
			<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;failed to start: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span> ret<span class="hl opt">);</span>
			<span class="hl kwa">return</span> ret<span class="hl opt">;</span>
		<span class="hl opt">}</span>

		retries <span class="hl opt">=</span> WM8350_SET_TIME_RETRIES<span class="hl opt">;</span>
		<span class="hl kwa">do</span> <span class="hl opt">{</span>
			timectl <span class="hl opt">=</span> <span class="hl kwd">wm8350_reg_read</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span>
						  WM8350_RTC_TIME_CONTROL<span class="hl opt">);</span>
		<span class="hl opt">}</span> <span class="hl kwa">while</span> <span class="hl opt">(</span>timectl <span class="hl opt">&amp;</span> WM8350_RTC_STS <span class="hl opt">&amp;&amp; --</span>retries<span class="hl opt">);</span>

		<span class="hl kwa">if</span> <span class="hl opt">(</span>retries <span class="hl opt">==</span> <span class="hl num">0</span><span class="hl opt">) {</span>
			<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;failed to start: timeout</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">);</span>
			<span class="hl kwa">return</span> <span class="hl opt">-</span>ENODEV<span class="hl opt">;</span>
		<span class="hl opt">}</span>
	<span class="hl opt">}</span>

	<span class="hl kwd">device_init_wakeup</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl num">1</span><span class="hl opt">);</span>

	wm_rtc<span class="hl opt">-&gt;</span>rtc <span class="hl opt">=</span> <span class="hl kwd">rtc_device_register</span><span class="hl opt">(</span><span class="hl str">&quot;wm8350&quot;</span><span class="hl opt">, &amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span>
					  <span class="hl opt">&amp;</span>wm8350_rtc_ops<span class="hl opt">,</span> THIS_MODULE<span class="hl opt">);</span>
	<span class="hl kwa">if</span> <span class="hl opt">(</span><span class="hl kwd">IS_ERR</span><span class="hl opt">(</span>wm_rtc<span class="hl opt">-&gt;</span>rtc<span class="hl opt">)) {</span>
		ret <span class="hl opt">=</span> <span class="hl kwd">PTR_ERR</span><span class="hl opt">(</span>wm_rtc<span class="hl opt">-&gt;</span>rtc<span class="hl opt">);</span>
		<span class="hl kwd">dev_err</span><span class="hl opt">(&amp;</span>pdev<span class="hl opt">-&gt;</span>dev<span class="hl opt">,</span> <span class="hl str">&quot;failed to register RTC: %d</span><span class="hl esc">\n</span><span class="hl str">&quot;</span><span class="hl opt">,</span> ret<span class="hl opt">);</span>
		<span class="hl kwa">return</span> ret<span class="hl opt">;</span>
	<span class="hl opt">}</span>

	<span class="hl kwd">wm8350_register_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_SEC<span class="hl opt">,</span>
			    wm8350_rtc_update_handler<span class="hl opt">,</span> <span class="hl num">0</span><span class="hl opt">,</span>
			    <span class="hl str">&quot;RTC Seconds&quot;</span><span class="hl opt">,</span> wm8350<span class="hl opt">);</span>
	<span class="hl kwd">wm8350_mask_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_SEC<span class="hl opt">);</span>

	<span class="hl kwd">wm8350_register_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_ALM<span class="hl opt">,</span>
			    wm8350_rtc_alarm_handler<span class="hl opt">,</span> <span class="hl num">0</span><span class="hl opt">,</span>
			    <span class="hl str">&quot;RTC Alarm&quot;</span><span class="hl opt">,</span> wm8350<span class="hl opt">);</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static int</span> __devexit <span class="hl kwd">wm8350_rtc_remove</span><span class="hl opt">(</span><span class="hl kwb">struct</span> platform_device <span class="hl opt">*</span>pdev<span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwb">struct</span> wm8350 <span class="hl opt">*</span>wm8350 <span class="hl opt">=</span> <span class="hl kwd">platform_get_drvdata</span><span class="hl opt">(</span>pdev<span class="hl opt">);</span>
	<span class="hl kwb">struct</span> wm8350_rtc <span class="hl opt">*</span>wm_rtc <span class="hl opt">= &amp;</span>wm8350<span class="hl opt">-&gt;</span>rtc<span class="hl opt">;</span>

	<span class="hl kwd">wm8350_free_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_SEC<span class="hl opt">,</span> wm8350<span class="hl opt">);</span>
	<span class="hl kwd">wm8350_free_irq</span><span class="hl opt">(</span>wm8350<span class="hl opt">,</span> WM8350_IRQ_RTC_ALM<span class="hl opt">,</span> wm8350<span class="hl opt">);</span>

	<span class="hl kwd">rtc_device_unregister</span><span class="hl opt">(</span>wm_rtc<span class="hl opt">-&gt;</span>rtc<span class="hl opt">);</span>

	<span class="hl kwa">return</span> <span class="hl num">0</span><span class="hl opt">;</span>
<span class="hl opt">}</span>

<span class="hl kwb">static struct</span> dev_pm_ops wm8350_rtc_pm_ops <span class="hl opt">= {</span>
	<span class="hl opt">.</span>suspend <span class="hl opt">=</span> wm8350_rtc_suspend<span class="hl opt">,</span>
	<span class="hl opt">.</span>resume <span class="hl opt">=</span> wm8350_rtc_resume<span class="hl opt">,</span>
<span class="hl opt">};</span>

<span class="hl kwb">static struct</span> platform_driver wm8350_rtc_driver <span class="hl opt">= {</span>
	<span class="hl opt">.</span>probe <span class="hl opt">=</span> wm8350_rtc_probe<span class="hl opt">,</span>
	<span class="hl opt">.</span>remove <span class="hl opt">=</span> <span class="hl kwd">__devexit_p</span><span class="hl opt">(</span>wm8350_rtc_remove<span class="hl opt">),</span>
	<span class="hl opt">.</span>driver <span class="hl opt">= {</span>
		<span class="hl opt">.</span>name <span class="hl opt">=</span> <span class="hl str">&quot;wm8350-rtc&quot;</span><span class="hl opt">,</span>
		<span class="hl opt">.</span>pm <span class="hl opt">= &amp;</span>wm8350_rtc_pm_ops<span class="hl opt">,</span>
	<span class="hl opt">},</span>
<span class="hl opt">};</span>

<span class="hl kwb">static int</span> __init <span class="hl kwd">wm8350_rtc_init</span><span class="hl opt">(</span><span class="hl kwb">void</span><span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwa">return</span> <span class="hl kwd">platform_driver_register</span><span class="hl opt">(&amp;</span>wm8350_rtc_driver<span class="hl opt">);</span>
<span class="hl opt">}</span>
<span class="hl kwd">module_init</span><span class="hl opt">(</span>wm8350_rtc_init<span class="hl opt">);</span>

<span class="hl kwb">static void</span> __exit <span class="hl kwd">wm8350_rtc_exit</span><span class="hl opt">(</span><span class="hl kwb">void</span><span class="hl opt">)</span>
<span class="hl opt">{</span>
	<span class="hl kwd">platform_driver_unregister</span><span class="hl opt">(&amp;</span>wm8350_rtc_driver<span class="hl opt">);</span>
<span class="hl opt">}</span>
<span class="hl kwd">module_exit</span><span class="hl opt">(</span>wm8350_rtc_exit<span class="hl opt">);</span>

<span class="hl kwd">MODULE_AUTHOR</span><span class="hl opt">(</span><span class="hl str">&quot;Mark Brown &lt;broonie&#64;opensource.wolfsonmicro.com&gt;&quot;</span><span class="hl opt">);</span>
<span class="hl kwd">MODULE_DESCRIPTION</span><span class="hl opt">(</span><span class="hl str">&quot;RTC driver for the WM8350&quot;</span><span class="hl opt">);</span>
<span class="hl kwd">MODULE_LICENSE</span><span class="hl opt">(</span><span class="hl str">&quot;GPL&quot;</span><span class="hl opt">);</span>
<span class="hl kwd">MODULE_ALIAS</span><span class="hl opt">(</span><span class="hl str">&quot;platform:wm8350-rtc&quot;</span><span class="hl opt">);</span>
</code></pre></td></tr></table>
</div> <!-- class=content -->
<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.2.2</a> (<a href='https://git-scm.com/'>git 2.25.0</a>) at 2025-10-01 19:06:20 -0400</div>
</div> <!-- id=cgit -->
</body>
</html>
 new
	 * vm_next->vm_start isn't page aligned and it could previously
	 * contain an hugepage: check if we need to split an huge pmd.
	 */
	if (adjust_next > 0) {
		struct vm_area_struct *next = vma->vm_next;
		unsigned long nstart = next->vm_start;
		nstart += adjust_next << PAGE_SHIFT;
		if (nstart & ~HPAGE_PMD_MASK &&
		    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
		    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
			split_huge_page_address(next->vm_mm, nstart);
	}
}