x86: lockless get_user_pages_fast()

Implement get_user_pages_fast without locking in the fastpath on x86. Do an optimistic lockless pagetable walk, without taking mmap_sem or any page table locks or even mmap_sem. Page table existence is guaranteed by turning interrupts off (combined with the fact that we're always looking up the current mm, means we can do the lockless page table walk within the constraints of the TLB shootdown design). Basically we can do this lockless pagetable walk in a similar manner to the way the CPU's pagetable walker does not have to take any locks to find present ptes. This patch (combined with the subsequent ones to convert direct IO to use it) was found to give about 10% performance improvement on a 2 socket 8 core Intel Xeon system running an OLTP workload on DB2 v9.5 "To test the effects of the patch, an OLTP workload was run on an IBM x3850 M2 server with 2 processors (quad-core Intel Xeon processors at 2.93 GHz) using IBM DB2 v9.5 running Linux 2.6.24rc7 kernel. Comparing runs with and without the patch resulted in an overall performance benefit of ~9.8%. Correspondingly, oprofiles showed that samples from __up_read and __down_read routines that is seen during thread contention for system resources was reduced from 2.8% down to .05%. Monitoring the /proc/vmstat output from the patched run showed that the counter for fast_gup contained a very high number while the fast_gup_slow value was zero." (fast_gup is the old name for get_user_pages_fast, fast_gup_slow is a counter we had for the number of times the slowpath was invoked). The main reason for the improvement is that DB2 has multiple threads each issuing direct-IO. Direct-IO uses get_user_pages, and thus the threads contend the mmap_sem cacheline, and can also contend on page table locks. I would anticipate larger performance gains on larger systems, however I think DB2 uses an adaptive mix of threads and processes, so it could be that thread contention remains pretty constant as machine size increases. In which case, we stuck with "only" a 10% gain. The downside of using get_user_pages_fast is that if there is not a pte with the correct permissions for the access, we end up falling back to get_user_pages and so the get_user_pages_fast is a bit of extra work. However this should not be the common case in most performance critical code. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: Kconfig fix] [akpm@linux-foundation.org: Makefile fix/cleanup] [akpm@linux-foundation.org: warning fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Andy Whitcroft <apw@shadowen.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andi Kleen <andi@firstfloor.org> Cc: Dave Kleikamp <shaggy@austin.ibm.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Cc: Zach Brown <zach.brown@oracle.com> Cc: Jens Axboe <jens.axboe@oracle.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <npiggin@suse.de> 2008-07-25 22:45:24 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-07-26 15:00:06 -0400
commit: 8174c430e445a93016ef18f717fe570214fa38bf (patch)
tree: f1b4426eae7401425e9102c7b3e141be86f0930c /arch
parent: 21cc199baa815d7b3f1ace4be20b9558cbddc00f (diff)
3 files changed, 260 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6b2debfabddc..6bdde845818e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86
        select HAVE_IDE
        select HAVE_OPROFILE
        select HAVE_IOREMAP_PROT
+        select HAVE_GET_USER_PAGES_FAST
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
        select HAVE_KRETPROBES
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1fbb844c3d7a..2977ea37791f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,7 @@
 obj-y   :=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
            pat.o pgtable.o
+obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
 obj-$(CONFIG_X86_32)            += pgtable_32.o
 obj-$(CONFIG_HUGETLB_PAGE)      += hugetlbpage.o
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
new file mode 100644
index 000000000000..6f733121f32e
--- /dev/null
+++ b/arch/x86/mm/gup.c
@@ -0,0 +1,258 @@
+/*
+ * Lockless get_user_pages_fast for x86
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+#ifndef CONFIG_X86_PAE
+        return *ptep;
+#else
+        /*
+         * With get_user_pages_fast, we walk down the pagetables without taking
+         * any locks.  For this we would like to load the pointers atoimcally,
+         * but that is not possible (without expensive cmpxchg8b) on PAE.  What
+         * we do have is the guarantee that a pte will only either go from not
+         * present to present, or present to not present or both -- it will not
+         * switch to a completely different present page without a TLB flush in
+         * between; something that we are blocking by holding interrupts off.
+         *
+         * Setting ptes from not present to present goes:
+         * ptep->pte_high = h;
+         * smp_wmb();
+         * ptep->pte_low = l;
+         *
+         * And present to not present goes:
+         * ptep->pte_low = 0;
+         * smp_wmb();
+         * ptep->pte_high = 0;
+         *
+         * We must ensure here that the load of pte_low sees l iff pte_high
+         * sees h. We load pte_high *after* loading pte_low, which ensures we
+         * don't see an older value of pte_high.  *Then* we recheck pte_low,
+         * which ensures that we haven't picked up a changed pte high. We might
+         * have got rubbish values from pte_low and pte_high, but we are
+         * guaranteed that pte_low will not have the present bit set *unless*
+         * it is 'l'. And get_user_pages_fast only operates on present ptes, so
+         * we're safe.
+         *
+         * gup_get_pte should not be used or copied outside gup.c without being
+         * very careful -- it does not atomically load the pte or anything that
+         * is likely to be useful for you.
+         */
+        pte_t pte;
+retry:
+        pte.pte_low = ptep->pte_low;
+        smp_rmb();
+        pte.pte_high = ptep->pte_high;
+        smp_rmb();
+        if (unlikely(pte.pte_low != ptep->pte_low))
+                goto retry;
+        return pte;
+#endif
+}
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask;
+        pte_t *ptep;
+        mask = _PAGE_PRESENT|_PAGE_USER;
+        if (write)
+                mask |= _PAGE_RW;
+        ptep = pte_offset_map(&pmd, addr);
+        do {
+                pte_t pte = gup_get_pte(ptep);
+                struct page *page;
+                if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+                        pte_unmap(ptep);
+                        return 0;
+                }
+                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+                page = pte_page(pte);
+                get_page(page);
+                pages[*nr] = page;
+                (*nr)++;
+        } while (ptep++, addr += PAGE_SIZE, addr != end);
+        pte_unmap(ptep - 1);
+        return 1;
+}
+static inline void get_head_page_multiple(struct page *page, int nr)
+{
+        VM_BUG_ON(page != compound_head(page));
+        VM_BUG_ON(page_count(page) == 0);
+        atomic_add(nr, &page->_count);
+}
+static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
+                unsigned long end, int write, struct page **pages, int *nr)
+{
+        unsigned long mask;
+        pte_t pte = *(pte_t *)&pmd;
+        struct page *head, *page;
+        int refs;
+        mask = _PAGE_PRESENT|_PAGE_USER;
+        if (write)
+                mask |= _PAGE_RW;
+        if ((pte_val(pte) & mask) != mask)
+                return 0;
+        /* hugepages are never "special" */
+        VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
+        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+        refs = 0;
+        head = pte_page(pte);
+        page = head + ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
+        do {
+                VM_BUG_ON(compound_head(page) != head);
+                pages[*nr] = page;
+                (*nr)++;
+                page++;
+                refs++;
+        } while (addr += PAGE_SIZE, addr != end);
+        get_head_page_multiple(head, refs);
+        return 1;
+}
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+                int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pmd_t *pmdp;
+        pmdp = pmd_offset(&pud, addr);
+        do {
+                pmd_t pmd = *pmdp;
+                next = pmd_addr_end(addr, end);
+                if (pmd_none(pmd))
+                        return 0;
+                if (unlikely(pmd_large(pmd))) {
+                        if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
+                                return 0;
+                } else {
+                        if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+                                return 0;
+                }
+        } while (pmdp++, addr = next, addr != end);
+        return 1;
+}
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
+{
+        unsigned long next;
+        pud_t *pudp;
+        pudp = pud_offset(&pgd, addr);
+        do {
+                pud_t pud = *pudp;
+                next = pud_addr_end(addr, end);
+                if (pud_none(pud))
+                        return 0;
+                if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+                        return 0;
+        } while (pudp++, addr = next, addr != end);
+        return 1;
+}
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+                        struct page **pages)
+{
+        struct mm_struct *mm = current->mm;
+        unsigned long end = start + (nr_pages << PAGE_SHIFT);
+        unsigned long addr = start;
+        unsigned long next;
+        pgd_t *pgdp;
+        int nr = 0;
+        if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+                                        start, nr_pages*PAGE_SIZE)))
+                goto slow_irqon;
+        /*
+         * XXX: batch / limit 'nr', to avoid large irq off latency
+         * needs some instrumenting to determine the common sizes used by
+         * important workloads (eg. DB2), and whether limiting the batch size
+         * will decrease performance.
+         *
+         * It seems like we're in the clear for the moment. Direct-IO is
+         * the main guy that batches up lots of get_user_pages, and even
+         * they are limited to 64-at-a-time which is not so many.
+         */
+        /*
+         * This doesn't prevent pagetable teardown, but does prevent
+         * the pagetables and pages from being freed on x86.
+         *
+         * So long as we atomically load page table pointers versus teardown
+         * (which we do on x86, with the above PAE exception), we can follow the
+         * address down to the the page and take a ref on it.
+         */
+        local_irq_disable();
+        pgdp = pgd_offset(mm, addr);
+        do {
+                pgd_t pgd = *pgdp;
+                next = pgd_addr_end(addr, end);
+                if (pgd_none(pgd))
+                        goto slow;
+                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+                        goto slow;
+        } while (pgdp++, addr = next, addr != end);
+        local_irq_enable();
+        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+        return nr;
+        {
+                int ret;
+slow:
+                local_irq_enable();
+slow_irqon:
+                /* Try to get the remaining pages with get_user_pages */
+                start += nr << PAGE_SHIFT;
+                pages += nr;
+                down_read(&mm->mmap_sem);
+                ret = get_user_pages(current, mm, start,
+                        (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+                up_read(&mm->mmap_sem);
+                /* Have to be a bit careful with return values */
+                if (nr > 0) {
+                        if (ret < 0)
+                                ret = nr;
+                        else
+                                ret += nr;
+                }
+                return ret;
+        }
+}
author	Nick Piggin <npiggin@suse.de>	2008-07-25 22:45:24 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-07-26 15:00:06 -0400
commit	8174c430e445a93016ef18f717fe570214fa38bf (patch)
tree	f1b4426eae7401425e9102c7b3e141be86f0930c /arch
parent	21cc199baa815d7b3f1ace4be20b9558cbddc00f (diff)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6b2debfabddc..6bdde845818e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86
22	select HAVE_IDE	22	select HAVE_IDE
23	select HAVE_OPROFILE	23	select HAVE_OPROFILE
24	select HAVE_IOREMAP_PROT	24	select HAVE_IOREMAP_PROT
		25	select HAVE_GET_USER_PAGES_FAST
25	select HAVE_KPROBES	26	select HAVE_KPROBES
26	select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X	27	select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
27	select HAVE_KRETPROBES	28	select HAVE_KRETPROBES


diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7a..2977ea37791f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile
@@ -1,6 +1,7 @@
1	obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \	1	obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2	pat.o pgtable.o	2	pat.o pgtable.o
3		3
		4	obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
4	obj-$(CONFIG_X86_32) += pgtable_32.o	5	obj-$(CONFIG_X86_32) += pgtable_32.o
5		6
6	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o	7	obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o


diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..6f733121f32e --- /dev/null +++ b/arch/x86/mm/gup.c
@@ -0,0 +1,258 @@
		1	/*
		2	* Lockless get_user_pages_fast for x86
		3	*
		4	* Copyright (C) 2008 Nick Piggin
		5	* Copyright (C) 2008 Novell Inc.
		6	*/
		7	#include <linux/sched.h>
		8	#include <linux/mm.h>
		9	#include <linux/vmstat.h>
		10	#include <linux/highmem.h>
		11
		12	#include <asm/pgtable.h>
		13
		14	static inline pte_t gup_get_pte(pte_t *ptep)
		15	{
		16	#ifndef CONFIG_X86_PAE
		17	return *ptep;
		18	#else
		19	/*
		20	* With get_user_pages_fast, we walk down the pagetables without taking
		21	* any locks. For this we would like to load the pointers atoimcally,
		22	* but that is not possible (without expensive cmpxchg8b) on PAE. What
		23	* we do have is the guarantee that a pte will only either go from not
		24	* present to present, or present to not present or both -- it will not
		25	* switch to a completely different present page without a TLB flush in
		26	* between; something that we are blocking by holding interrupts off.
		27	*
		28	* Setting ptes from not present to present goes:
		29	* ptep->pte_high = h;
		30	* smp_wmb();
		31	* ptep->pte_low = l;
		32	*
		33	* And present to not present goes:
		34	* ptep->pte_low = 0;
		35	* smp_wmb();
		36	* ptep->pte_high = 0;
		37	*
		38	* We must ensure here that the load of pte_low sees l iff pte_high
		39	* sees h. We load pte_high after loading pte_low, which ensures we
		40	* don't see an older value of pte_high. Then we recheck pte_low,
		41	* which ensures that we haven't picked up a changed pte high. We might
		42	* have got rubbish values from pte_low and pte_high, but we are
		43	* guaranteed that pte_low will not have the present bit set unless
		44	* it is 'l'. And get_user_pages_fast only operates on present ptes, so
		45	* we're safe.
		46	*
		47	* gup_get_pte should not be used or copied outside gup.c without being
		48	* very careful -- it does not atomically load the pte or anything that
		49	* is likely to be useful for you.
		50	*/
		51	pte_t pte;
		52
		53	retry:
		54	pte.pte_low = ptep->pte_low;
		55	smp_rmb();
		56	pte.pte_high = ptep->pte_high;
		57	smp_rmb();
		58	if (unlikely(pte.pte_low != ptep->pte_low))
		59	goto retry;
		60
		61	return pte;
		62	#endif
		63	}
		64
		65	/*
		66	* The performance critical leaf functions are made noinline otherwise gcc
		67	* inlines everything into a single function which results in too much
		68	* register pressure.
		69	*/
		70	static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
		71	unsigned long end, int write, struct page *pages, int nr)
		72	{
		73	unsigned long mask;
		74	pte_t *ptep;
		75
		76	mask = _PAGE_PRESENT\|_PAGE_USER;
		77	if (write)
		78	mask \|= _PAGE_RW;
		79
		80	ptep = pte_offset_map(&pmd, addr);
		81	do {
		82	pte_t pte = gup_get_pte(ptep);
		83	struct page *page;
		84
		85	if ((pte_val(pte) & (mask \| _PAGE_SPECIAL)) != mask) {
		86	pte_unmap(ptep);
		87	return 0;
		88	}
		89	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		90	page = pte_page(pte);
		91	get_page(page);
		92	pages[*nr] = page;
		93	(*nr)++;
		94
		95	} while (ptep++, addr += PAGE_SIZE, addr != end);
		96	pte_unmap(ptep - 1);
		97
		98	return 1;
		99	}
		100
		101	static inline void get_head_page_multiple(struct page *page, int nr)
		102	{
		103	VM_BUG_ON(page != compound_head(page));
		104	VM_BUG_ON(page_count(page) == 0);
		105	atomic_add(nr, &page->_count);
		106	}
		107
		108	static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
		109	unsigned long end, int write, struct page *pages, int nr)
		110	{
		111	unsigned long mask;
		112	pte_t pte = (pte_t )&pmd;
		113	struct page head, page;
		114	int refs;
		115
		116	mask = _PAGE_PRESENT\|_PAGE_USER;
		117	if (write)
		118	mask \|= _PAGE_RW;
		119	if ((pte_val(pte) & mask) != mask)
		120	return 0;
		121	/* hugepages are never "special" */
		122	VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL);
		123	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
		124
		125	refs = 0;
		126	head = pte_page(pte);
		127	page = head + ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
		128	do {
		129	VM_BUG_ON(compound_head(page) != head);
		130	pages[*nr] = page;
		131	(*nr)++;
		132	page++;
		133	refs++;
		134	} while (addr += PAGE_SIZE, addr != end);
		135	get_head_page_multiple(head, refs);
		136
		137	return 1;
		138	}
		139
		140	static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
		141	int write, struct page *pages, int nr)
		142	{
		143	unsigned long next;
		144	pmd_t *pmdp;
		145
		146	pmdp = pmd_offset(&pud, addr);
		147	do {
		148	pmd_t pmd = *pmdp;
		149
		150	next = pmd_addr_end(addr, end);
		151	if (pmd_none(pmd))
		152	return 0;
		153	if (unlikely(pmd_large(pmd))) {
		154	if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
		155	return 0;
		156	} else {
		157	if (!gup_pte_range(pmd, addr, next, write, pages, nr))
		158	return 0;
		159	}
		160	} while (pmdp++, addr = next, addr != end);
		161
		162	return 1;
		163	}
		164
		165	static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
		166	int write, struct page *pages, int nr)
		167	{
		168	unsigned long next;
		169	pud_t *pudp;
		170
		171	pudp = pud_offset(&pgd, addr);
		172	do {
		173	pud_t pud = *pudp;
		174
		175	next = pud_addr_end(addr, end);
		176	if (pud_none(pud))
		177	return 0;
		178	if (!gup_pmd_range(pud, addr, next, write, pages, nr))
		179	return 0;
		180	} while (pudp++, addr = next, addr != end);
		181
		182	return 1;
		183	}
		184
		185	int get_user_pages_fast(unsigned long start, int nr_pages, int write,
		186	struct page **pages)
		187	{
		188	struct mm_struct *mm = current->mm;
		189	unsigned long end = start + (nr_pages << PAGE_SHIFT);
		190	unsigned long addr = start;
		191	unsigned long next;
		192	pgd_t *pgdp;
		193	int nr = 0;
		194
		195	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
		196	start, nr_pages*PAGE_SIZE)))
		197	goto slow_irqon;
		198
		199	/*
		200	* XXX: batch / limit 'nr', to avoid large irq off latency
		201	* needs some instrumenting to determine the common sizes used by
		202	* important workloads (eg. DB2), and whether limiting the batch size
		203	* will decrease performance.
		204	*
		205	* It seems like we're in the clear for the moment. Direct-IO is
		206	* the main guy that batches up lots of get_user_pages, and even
		207	* they are limited to 64-at-a-time which is not so many.
		208	*/
		209	/*
		210	* This doesn't prevent pagetable teardown, but does prevent
		211	* the pagetables and pages from being freed on x86.
		212	*
		213	* So long as we atomically load page table pointers versus teardown
		214	* (which we do on x86, with the above PAE exception), we can follow the
		215	* address down to the the page and take a ref on it.
		216	*/
		217	local_irq_disable();
		218	pgdp = pgd_offset(mm, addr);
		219	do {
		220	pgd_t pgd = *pgdp;
		221
		222	next = pgd_addr_end(addr, end);
		223	if (pgd_none(pgd))
		224	goto slow;
		225	if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
		226	goto slow;
		227	} while (pgdp++, addr = next, addr != end);
		228	local_irq_enable();
		229
		230	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
		231	return nr;
		232
		233	{
		234	int ret;
		235
		236	slow:
		237	local_irq_enable();
		238	slow_irqon:
		239	/* Try to get the remaining pages with get_user_pages */
		240	start += nr << PAGE_SHIFT;
		241	pages += nr;
		242
		243	down_read(&mm->mmap_sem);
		244	ret = get_user_pages(current, mm, start,
		245	(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		246	up_read(&mm->mmap_sem);
		247
		248	/* Have to be a bit careful with return values */
		249	if (nr > 0) {
		250	if (ret < 0)
		251	ret = nr;
		252	else
		253	ret += nr;
		254	}
		255
		256	return ret;
		257	}
		258	}