aboutsummaryrefslogblamecommitdiffstats
path: root/crypto/cts.c
blob: ccf9c5de39584b32d34f26dc9a0e278f812825ac (plain) (tree)


























































































                                                                        


                                


















































































                                                                               


                                












































































































































































                                                                               
/*
 * CTS: Cipher Text Stealing mode
 *
 * COPYRIGHT (c) 2008
 * The Regents of the University of Michigan
 * ALL RIGHTS RESERVED
 *
 * Permission is granted to use, copy, create derivative works
 * and redistribute this software and such derivative works
 * for any purpose, so long as the name of The University of
 * Michigan is not used in any advertising or publicity
 * pertaining to the use of distribution of this software
 * without specific, written prior authorization.  If the
 * above copyright notice or any other identification of the
 * University of Michigan is included in any copy of any
 * portion of this software, then the disclaimer below must
 * also be included.
 *
 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGES.
 */

/* Derived from various:
 *	Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
 */

/*
 * This is the Cipher Text Stealing mode as described by
 * Section 8 of rfc2040 and referenced by rfc3962.
 * rfc3962 includes errata information in its Appendix A.
 */

#include <crypto/algapi.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <crypto/scatterwalk.h>
#include <linux/slab.h>

struct crypto_cts_ctx {
	struct crypto_blkcipher *child;
};

static int crypto_cts_setkey(struct crypto_tfm *parent, const u8 *key,
			     unsigned int keylen)
{
	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(parent);
	struct crypto_blkcipher *child = ctx->child;
	int err;

	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
				       CRYPTO_TFM_REQ_MASK);
	err = crypto_blkcipher_setkey(child, key, keylen);
	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
				     CRYPTO_TFM_RES_MASK);
	return err;
}

static int cts_cbc_encrypt(struct crypto_cts_ctx *ctx,
			   struct blkcipher_desc *desc,
			   struct scatterlist *dst,
			   struct scatterlist *src,
			   unsigned int offset,
			   unsigned int nbytes)
{
	int bsize = crypto_blkcipher_blocksize(desc->tfm);
	u8 tmp[bsize], tmp2[bsize];
	struct blkcipher_desc lcldesc;
	struct scatterlist sgsrc[1], sgdst[1];
	int lastn = nbytes - bsize;
	u8 iv[bsize];
	u8 s[bsize * 2], d[bsize * 2];
	int err;

	if (lastn < 0)
		return -EINVAL;

	sg_init_table(sgsrc, 1);
	sg_init_table(sgdst, 1);

	memset(s, 0, sizeof(s));
	scatterwalk_map_and_copy(s, src, offset, nbytes, 0);

	memcpy(iv, desc->info, bsize);

	lcldesc.tfm = ctx->child;
	lcldesc.info = iv;
	lcldesc.flags = desc->flags;

	sg_set_buf(&sgsrc[0], s, bsize);
	sg_set_buf(&sgdst[0], tmp, bsize);
	err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize);

	memcpy(d + bsize, tmp, lastn);

	lcldesc.info = tmp;

	sg_set_buf(&sgsrc[0], s + bsize, bsize);
	sg_set_buf(&sgdst[0], tmp2, bsize);
	err = crypto_blkcipher_encrypt_iv(&lcldesc, sgdst, sgsrc, bsize);

	memcpy(d, tmp2, bsize);

	scatterwalk_map_and_copy(d, dst, offset, nbytes, 1);

	memcpy(desc->info, tmp2, bsize);

	return err;
}

static int crypto_cts_encrypt(struct blkcipher_desc *desc,
			      struct scatterlist *dst, struct scatterlist *src,
			      unsigned int nbytes)
{
	struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
	int bsize = crypto_blkcipher_blocksize(desc->tfm);
	int tot_blocks = (nbytes + bsize - 1) / bsize;
	int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0;
	struct blkcipher_desc lcldesc;
	int err;

	lcldesc.tfm = ctx->child;
	lcldesc.info = desc->info;
	lcldesc.flags = desc->flags;

	if (tot_blocks == 1) {
		err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src, bsize);
	} else if (nbytes <= bsize * 2) {
		err = cts_cbc_encrypt(ctx, desc, dst, src, 0, nbytes);
	} else {
		/* do normal function for tot_blocks - 2 */
		err = crypto_blkcipher_encrypt_iv(&lcldesc, dst, src,
							cbc_blocks * bsize);
		if (err == 0) {
			/* do cts for final two blocks */
			err = cts_cbc_encrypt(ctx, desc, dst, src,
						cbc_blocks * bsize,
						nbytes - (cbc_blocks * bsize));
		}
	}

	return err;
}

static int cts_cbc_decrypt(struct crypto_cts_ctx *ctx,
			   struct blkcipher_desc *desc,
			   struct scatterlist *dst,
			   struct scatterlist *src,
			   unsigned int offset,
			   unsigned int nbytes)
{
	int bsize = crypto_blkcipher_blocksize(desc->tfm);
	u8 tmp[bsize];
	struct blkcipher_desc lcldesc;
	struct scatterlist sgsrc[1], sgdst[1];
	int lastn = nbytes - bsize;
	u8 iv[bsize];
	u8 s[bsize * 2], d[bsize * 2];
	int err;

	if (lastn < 0)
		return -EINVAL;

	sg_init_table(sgsrc, 1);
	sg_init_table(sgdst, 1);

	scatterwalk_map_and_copy(s, src, offset, nbytes, 0);

	lcldesc.tfm = ctx->child;
	lcldesc.info = iv;
	lcldesc.flags = desc->flags;

	/* 1. Decrypt Cn-1 (s) to create Dn (tmp)*/
	memset(iv, 0, sizeof(iv));
	sg_set_buf(&sgsrc[0], s, bsize);
	sg_set_buf(&sgdst[0], tmp, bsize);
	err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);
	if (err)
		return err;
	/* 2. Pad Cn with zeros at the end to create C of length BB */
	memset(iv, 0, sizeof(iv));
	memcpy(iv, s + bsize, lastn);
	/* 3. Exclusive-or Dn (tmp) with C (iv) to create Xn (tmp) */
	crypto_xor(tmp, iv, bsize);
	/* 4. Select the first Ln bytes of Xn (tmp) to create Pn */
	memcpy(d + bsize, tmp, lastn);

	/* 5. Append the tail (BB - Ln) bytes of Xn (tmp) to Cn to create En */
	memcpy(s + bsize + lastn, tmp + lastn, bsize - lastn);
	/* 6. Decrypt En to create Pn-1 */
	memset(iv, 0, sizeof(iv));
	sg_set_buf(&sgsrc[0], s + bsize, bsize);
	sg_set_buf(&sgdst[0], d, bsize);
	err = crypto_blkcipher_decrypt_iv(&lcldesc, sgdst, sgsrc, bsize);

	/* XOR with previous block */
	crypto_xor(d, desc->info, bsize);

	scatterwalk_map_and_copy(d, dst, offset, nbytes, 1);

	memcpy(desc->info, s, bsize);
	return err;
}

static int crypto_cts_decrypt(struct blkcipher_desc *desc,
			      struct scatterlist *dst, struct scatterlist *src,
			      unsigned int nbytes)
{
	struct crypto_cts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
	int bsize = crypto_blkcipher_blocksize(desc->tfm);
	int tot_blocks = (nbytes + bsize - 1) / bsize;
	int cbc_blocks = tot_blocks > 2 ? tot_blocks - 2 : 0;
	struct blkcipher_desc lcldesc;
	int err;

	lcldesc.tfm = ctx->child;
	lcldesc.info = desc->info;
	lcldesc.flags = desc->flags;

	if (tot_blocks == 1) {
		err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src, bsize);
	} else if (nbytes <= bsize * 2) {
		err = cts_cbc_decrypt(ctx, desc, dst, src, 0, nbytes);
	} else {
		/* do normal function for tot_blocks - 2 */
		err = crypto_blkcipher_decrypt_iv(&lcldesc, dst, src,
							cbc_blocks * bsize);
		if (err == 0) {
			/* do cts for final two blocks */
			err = cts_cbc_decrypt(ctx, desc, dst, src,
						cbc_blocks * bsize,
						nbytes - (cbc_blocks * bsize));
		}
	}
	return err;
}

static int crypto_cts_init_tfm(struct crypto_tfm *tfm)
{
	struct crypto_instance *inst = (void *)tfm->__crt_alg;
	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm);
	struct crypto_blkcipher *cipher;

	cipher = crypto_spawn_blkcipher(spawn);
	if (IS_ERR(cipher))
		return PTR_ERR(cipher);

	ctx->child = cipher;
	return 0;
}

static void crypto_cts_exit_tfm(struct crypto_tfm *tfm)
{
	struct crypto_cts_ctx *ctx = crypto_tfm_ctx(tfm);
	crypto_free_blkcipher(ctx->child);
}

static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb)
{
	struct crypto_instance *inst;
	struct crypto_alg *alg;
	int err;

	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
	if (err)
		return ERR_PTR(err);

	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER,
				  CRYPTO_ALG_TYPE_MASK);
	err = PTR_ERR(alg);
	if (IS_ERR(alg))
		return ERR_PTR(err);

	inst = ERR_PTR(-EINVAL);
	if (!is_power_of_2(alg->cra_blocksize))
		goto out_put_alg;

	inst = crypto_alloc_instance("cts", alg);
	if (IS_ERR(inst))
		goto out_put_alg;

	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
	inst->alg.cra_priority = alg->cra_priority;
	inst->alg.cra_blocksize = alg->cra_blocksize;
	inst->alg.cra_alignmask = alg->cra_alignmask;
	inst->alg.cra_type = &crypto_blkcipher_type;

	/* We access the data as u32s when xoring. */
	inst->alg.cra_alignmask |= __alignof__(u32) - 1;

	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;

	inst->alg.cra_blkcipher.geniv = "seqiv";

	inst->alg.cra_ctxsize = sizeof(struct crypto_cts_ctx);

	inst->alg.cra_init = crypto_cts_init_tfm;
	inst->alg.cra_exit = crypto_cts_exit_tfm;

	inst->alg.cra_blkcipher.setkey = crypto_cts_setkey;
	inst->alg.cra_blkcipher.encrypt = crypto_cts_encrypt;
	inst->alg.cra_blkcipher.decrypt = crypto_cts_decrypt;

out_put_alg:
	crypto_mod_put(alg);
	return inst;
}

static void crypto_cts_free(struct crypto_instance *inst)
{
	crypto_drop_spawn(crypto_instance_ctx(inst));
	kfree(inst);
}

static struct crypto_template crypto_cts_tmpl = {
	.name = "cts",
	.alloc = crypto_cts_alloc,
	.free = crypto_cts_free,
	.module = THIS_MODULE,
};

static int __init crypto_cts_module_init(void)
{
	return crypto_register_template(&crypto_cts_tmpl);
}

static void __exit crypto_cts_module_exit(void)
{
	crypto_unregister_template(&crypto_cts_tmpl);
}

module_init(crypto_cts_module_init);
module_exit(crypto_cts_module_exit);

MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("CTS-CBC CipherText Stealing for CBC");
1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204
/*
 * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Further, this software is distributed without any warranty that it is
 * free of the rightful claim of any third person regarding infringement
 * or the like.  Any license provided herein, whether implied or
 * otherwise, applies only to this software file.  Patent licenses, if
 * any, provided herein do not apply to combinations of this program with
 * other software, or any other product whatsoever.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write the Free Software Foundation, Inc., 59
 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
 *
 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
 * Mountain View, CA  94043, or:
 *
 * http://www.sgi.com
 *
 * For further information regarding this notice, see:
 *
 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
 */

/*
 * Free space allocation for XFS.
 */

#include "xfs.h"
#include "xfs_macros.h"
#include "xfs_types.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"

/*
 * Prototypes for internal functions.
 */

STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
		xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);

/*
 * Internal functions.
 */

/*
 * Single level of the xfs_alloc_delete record deletion routine.
 * Delete record pointed to by cur/level.
 * Remove the record from its block then rebalance the tree.
 * Return 0 for error, 1 for done, 2 to go on to the next level.
 */
STATIC int				/* error */
xfs_alloc_delrec(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level removing record from */
	int			*stat)	/* fail/done/go-on */
{
	xfs_agf_t		*agf;	/* allocation group freelist header */
	xfs_alloc_block_t	*block;	/* btree block record/key lives in */
	xfs_agblock_t		bno;	/* btree block number */
	xfs_buf_t		*bp;	/* buffer for block */
	int			error;	/* error return value */
	int			i;	/* loop index */
	xfs_alloc_key_t		key;	/* kp points here if block is level 0 */
	xfs_agblock_t		lbno;	/* left block's block number */
	xfs_buf_t		*lbp;	/* left block's buffer pointer */
	xfs_alloc_block_t	*left;	/* left btree block */
	xfs_alloc_key_t		*lkp=NULL;	/* left block key pointer */
	xfs_alloc_ptr_t		*lpp=NULL;	/* left block address pointer */
	int			lrecs=0;	/* number of records in left block */
	xfs_alloc_rec_t		*lrp;	/* left block record pointer */
	xfs_mount_t		*mp;	/* mount structure */
	int			ptr;	/* index in btree block for this rec */
	xfs_agblock_t		rbno;	/* right block's block number */
	xfs_buf_t		*rbp;	/* right block's buffer pointer */
	xfs_alloc_block_t	*right;	/* right btree block */
	xfs_alloc_key_t		*rkp;	/* right block key pointer */
	xfs_alloc_ptr_t		*rpp;	/* right block address pointer */
	int			rrecs=0;	/* number of records in right block */
	xfs_alloc_rec_t		*rrp;	/* right block record pointer */
	xfs_btree_cur_t		*tcur;	/* temporary btree cursor */

	/*
	 * Get the index of the entry being deleted, check for nothing there.
	 */
	ptr = cur->bc_ptrs[level];
	if (ptr == 0) {
		*stat = 0;
		return 0;
	}
	/*
	 * Get the buffer & block containing the record or key/ptr.
	 */
	bp = cur->bc_bufs[level];
	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
		return error;
#endif
	/*
	 * Fail if we're off the end of the block.
	 */
	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
		*stat = 0;
		return 0;
	}
	XFS_STATS_INC(xs_abt_delrec);
	/*
	 * It's a nonleaf.  Excise the key and ptr being deleted, by
	 * sliding the entries past them down one.
	 * Log the changed areas of the block.
	 */
	if (level > 0) {
		lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
		lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
		for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
				return error;
		}
#endif
		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
			memmove(&lkp[ptr - 1], &lkp[ptr],
				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lkp)); /* INT_: mem copy */
			memmove(&lpp[ptr - 1], &lpp[ptr],
				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lpp)); /* INT_: mem copy */
			xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
			xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
		}
	}
	/*
	 * It's a leaf.  Excise the record being deleted, by sliding the
	 * entries past it down one.  Log the changed areas of the block.
	 */
	else {
		lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
			memmove(&lrp[ptr - 1], &lrp[ptr],
				(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*lrp));
			xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
		}
		/*
		 * If it's the first record in the block, we'll need a key
		 * structure to pass up to the next level (updkey).
		 */
		if (ptr == 1) {
			key.ar_startblock = lrp->ar_startblock; /* INT_: direct copy */
			key.ar_blockcount = lrp->ar_blockcount; /* INT_: direct copy */
			lkp = &key;
		}
	}
	/*
	 * Decrement and log the number of entries in the block.
	 */
	INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
	xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
	/*
	 * See if the longest free extent in the allocation group was
	 * changed by this operation.  True if it's the by-size btree, and
	 * this is the leaf level, and there is no right sibling block,
	 * and this was the last record.
	 */
	agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
	mp = cur->bc_mp;

	if (level == 0 &&
	    cur->bc_btnum == XFS_BTNUM_CNT &&
	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
	    ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
		ASSERT(ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT) + 1);
		/*
		 * There are still records in the block.  Grab the size
		 * from the last one.
		 */
		if (INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
			rrp = XFS_ALLOC_REC_ADDR(block, INT_GET(block->bb_numrecs, ARCH_CONVERT), cur);
			INT_COPY(agf->agf_longest, rrp->ar_blockcount, ARCH_CONVERT);
		}
		/*
		 * No free extents left.
		 */
		else
			agf->agf_longest = 0;
		mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest =
			INT_GET(agf->agf_longest, ARCH_CONVERT);
		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
			XFS_AGF_LONGEST);
	}
	/*
	 * Is this the root level?  If so, we're almost done.
	 */
	if (level == cur->bc_nlevels - 1) {
		/*
		 * If this is the root level,
		 * and there's only one entry left,
		 * and it's NOT the leaf level,
		 * then we can get rid of this level.
		 */
		if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) {
			/*
			 * lpp is still set to the first pointer in the block.
			 * Make it the new root of the btree.
			 */
			bno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
			INT_COPY(agf->agf_roots[cur->bc_btnum], *lpp, ARCH_CONVERT);
			INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, -1);
			mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_levels[cur->bc_btnum]--;
			/*
			 * Put this buffer/block on the ag's freelist.
			 */
			if ((error = xfs_alloc_put_freelist(cur->bc_tp,
					cur->bc_private.a.agbp, NULL, bno)))
				return error;
			/*
			 * Since blocks move to the free list without the
			 * coordination used in xfs_bmap_finish, we can't allow
			 * block to be available for reallocation and
			 * non-transaction writing (user data) until we know
			 * that the transaction that moved it to the free list
			 * is permanently on disk. We track the blocks by
			 * declaring these blocks as "busy"; the busy list is
			 * maintained on a per-ag basis and each transaction
			 * records which entries should be removed when the
			 * iclog commits to disk. If a busy block is
			 * allocated, the iclog is pushed up to the LSN
			 * that freed the block.
			 */
			xfs_alloc_mark_busy(cur->bc_tp,
				INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);

			xfs_trans_agbtree_delta(cur->bc_tp, -1);
			xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
				XFS_AGF_ROOTS | XFS_AGF_LEVELS);
			/*
			 * Update the cursor so there's one fewer level.
			 */
			xfs_btree_setbuf(cur, level, NULL);
			cur->bc_nlevels--;
		} else if (level > 0 &&
			   (error = xfs_alloc_decrement(cur, level, &i)))
			return error;
		*stat = 1;
		return 0;
	}
	/*
	 * If we deleted the leftmost entry in the block, update the
	 * key values above us in the tree.
	 */
	if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
		return error;
	/*
	 * If the number of records remaining in the block is at least
	 * the minimum, we're done.
	 */
	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
		if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
			return error;
		*stat = 1;
		return 0;
	}
	/*
	 * Otherwise, we have to move some records around to keep the
	 * tree balanced.  Look at the left and right sibling blocks to
	 * see if we can re-balance by moving only one record.
	 */
	rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
	lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
	bno = NULLAGBLOCK;
	ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
	/*
	 * Duplicate the cursor so our btree manipulations here won't
	 * disrupt the next level up.
	 */
	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
		return error;
	/*
	 * If there's a right sibling, see if it's ok to shift an entry
	 * out of it.
	 */
	if (rbno != NULLAGBLOCK) {
		/*
		 * Move the temp cursor to the last entry in the next block.
		 * Actually any entry but the first would suffice.
		 */
		i = xfs_btree_lastrec(tcur, level);
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		if ((error = xfs_alloc_increment(tcur, level, &i)))
			goto error0;
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		i = xfs_btree_lastrec(tcur, level);
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		/*
		 * Grab a pointer to the block.
		 */
		rbp = tcur->bc_bufs[level];
		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
#ifdef DEBUG
		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
			goto error0;
#endif
		/*
		 * Grab the current block number, for future use.
		 */
		bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
		/*
		 * If right block is full enough so that removing one entry
		 * won't make it too empty, and left-shifting an entry out
		 * of right to us works, we're done.
		 */
		if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
			if ((error = xfs_alloc_lshift(tcur, level, &i)))
				goto error0;
			if (i) {
				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
				xfs_btree_del_cursor(tcur,
						     XFS_BTREE_NOERROR);
				if (level > 0 &&
				    (error = xfs_alloc_decrement(cur, level,
					    &i)))
					return error;
				*stat = 1;
				return 0;
			}
		}
		/*
		 * Otherwise, grab the number of records in right for
		 * future reference, and fix up the temp cursor to point
		 * to our block again (last record).
		 */
		rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
		if (lbno != NULLAGBLOCK) {
			i = xfs_btree_firstrec(tcur, level);
			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			if ((error = xfs_alloc_decrement(tcur, level, &i)))
				goto error0;
			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		}
	}
	/*
	 * If there's a left sibling, see if it's ok to shift an entry
	 * out of it.
	 */
	if (lbno != NULLAGBLOCK) {
		/*
		 * Move the temp cursor to the first entry in the
		 * previous block.
		 */
		i = xfs_btree_firstrec(tcur, level);
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		if ((error = xfs_alloc_decrement(tcur, level, &i)))
			goto error0;
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		xfs_btree_firstrec(tcur, level);
		/*
		 * Grab a pointer to the block.
		 */
		lbp = tcur->bc_bufs[level];
		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
#ifdef DEBUG
		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
			goto error0;
#endif
		/*
		 * Grab the current block number, for future use.
		 */
		bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
		/*
		 * If left block is full enough so that removing one entry
		 * won't make it too empty, and right-shifting an entry out
		 * of left to us works, we're done.
		 */
		if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
		     XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
			if ((error = xfs_alloc_rshift(tcur, level, &i)))
				goto error0;
			if (i) {
				ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
				       XFS_ALLOC_BLOCK_MINRECS(level, cur));
				xfs_btree_del_cursor(tcur,
						     XFS_BTREE_NOERROR);
				if (level == 0)
					cur->bc_ptrs[0]++;
				*stat = 1;
				return 0;
			}
		}
		/*
		 * Otherwise, grab the number of records in right for
		 * future reference.
		 */
		lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
	}
	/*
	 * Delete the temp cursor, we're done with it.
	 */
	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
	/*
	 * If here, we need to do a join to keep the tree balanced.
	 */
	ASSERT(bno != NULLAGBLOCK);
	/*
	 * See if we can join with the left neighbor block.
	 */
	if (lbno != NULLAGBLOCK &&
	    lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
		/*
		 * Set "right" to be the starting block,
		 * "left" to be the left neighbor.
		 */
		rbno = bno;
		right = block;
		rbp = bp;
		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
				cur->bc_private.a.agno, lbno, 0, &lbp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
		if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
			return error;
	}
	/*
	 * If that won't work, see if we can join with the right neighbor block.
	 */
	else if (rbno != NULLAGBLOCK &&
		 rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
		  XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
		/*
		 * Set "left" to be the starting block,
		 * "right" to be the right neighbor.
		 */
		lbno = bno;
		left = block;
		lbp = bp;
		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
				cur->bc_private.a.agno, rbno, 0, &rbp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
		if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
			return error;
	}
	/*
	 * Otherwise, we can't fix the imbalance.
	 * Just return.  This is probably a logic error, but it's not fatal.
	 */
	else {
		if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
			return error;
		*stat = 1;
		return 0;
	}
	/*
	 * We're now going to join "left" and "right" by moving all the stuff
	 * in "right" to "left" and deleting "right".
	 */
	if (level > 0) {
		/*
		 * It's a non-leaf.  Move keys and pointers.
		 */
		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
				return error;
		}
#endif
		memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); /* INT_: structure copy */
		memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); /* INT_: structure copy */
		xfs_alloc_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
		xfs_alloc_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
	} else {
		/*
		 * It's a leaf.  Move records.
		 */
		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
		memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
		xfs_alloc_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
				   INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
	}
	/*
	 * If we joined with the left neighbor, set the buffer in the
	 * cursor to the left block, and fix up the index.
	 */
	if (bp != lbp) {
		xfs_btree_setbuf(cur, level, lbp);
		cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
	}
	/*
	 * If we joined with the right neighbor and there's a level above
	 * us, increment the cursor at that level.
	 */
	else if (level + 1 < cur->bc_nlevels &&
		 (error = xfs_alloc_increment(cur, level + 1, &i)))
		return error;
	/*
	 * Fix up the number of records in the surviving block.
	 */
	INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
	/*
	 * Fix up the right block pointer in the surviving block, and log it.
	 */
	left->bb_rightsib = right->bb_rightsib; /* INT_: direct copy */
	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
	/*
	 * If there is a right sibling now, make it point to the
	 * remaining block.
	 */
	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
		xfs_alloc_block_t	*rrblock;
		xfs_buf_t		*rrbp;

		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
				cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
				&rrbp, XFS_ALLOC_BTREE_REF)))
			return error;
		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
			return error;
		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
	}
	/*
	 * Free the deleting block by putting it on the freelist.
	 */
	if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
			NULL, rbno)))
		return error;
	/*
	 * Since blocks move to the free list without the coordination
	 * used in xfs_bmap_finish, we can't allow block to be available
	 * for reallocation and non-transaction writing (user data)
	 * until we know that the transaction that moved it to the free
	 * list is permanently on disk. We track the blocks by declaring
	 * these blocks as "busy"; the busy list is maintained on a
	 * per-ag basis and each transaction records which entries
	 * should be removed when the iclog commits to disk. If a
	 * busy block is allocated, the iclog is pushed up to the
	 * LSN that freed the block.
	 */
	xfs_alloc_mark_busy(cur->bc_tp,
		INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);

	xfs_trans_agbtree_delta(cur->bc_tp, -1);
	/*
	 * Adjust the current level's cursor so that we're left referring
	 * to the right node, after we're done.
	 * If this leaves the ptr value 0 our caller will fix it up.
	 */
	if (level > 0)
		cur->bc_ptrs[level]--;
	/*
	 * Return value means the next level up has something to do.
	 */
	*stat = 2;
	return 0;

error0:
	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
	return error;
}

/*
 * Insert one record/level.  Return information to the caller
 * allowing the next level up to proceed if necessary.
 */
STATIC int				/* error */
xfs_alloc_insrec(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level to insert record at */
	xfs_agblock_t		*bnop,	/* i/o: block number inserted */
	xfs_alloc_rec_t		*recp,	/* i/o: record data inserted */
	xfs_btree_cur_t		**curp,	/* output: new cursor replacing cur */
	int			*stat)	/* output: success/failure */
{
	xfs_agf_t		*agf;	/* allocation group freelist header */
	xfs_alloc_block_t	*block;	/* btree block record/key lives in */
	xfs_buf_t		*bp;	/* buffer for block */
	int			error;	/* error return value */
	int			i;	/* loop index */
	xfs_alloc_key_t		key;	/* key value being inserted */
	xfs_alloc_key_t		*kp;	/* pointer to btree keys */
	xfs_agblock_t		nbno;	/* block number of allocated block */
	xfs_btree_cur_t		*ncur;	/* new cursor to be used at next lvl */
	xfs_alloc_key_t		nkey;	/* new key value, from split */
	xfs_alloc_rec_t		nrec;	/* new record value, for caller */
	int			optr;	/* old ptr value */
	xfs_alloc_ptr_t		*pp;	/* pointer to btree addresses */
	int			ptr;	/* index in btree block for this rec */
	xfs_alloc_rec_t		*rp;	/* pointer to btree records */

	ASSERT(INT_GET(recp->ar_blockcount, ARCH_CONVERT) > 0);
	/*
	 * If we made it to the root level, allocate a new root block
	 * and we're done.
	 */
	if (level >= cur->bc_nlevels) {
		XFS_STATS_INC(xs_abt_insrec);
		if ((error = xfs_alloc_newroot(cur, &i)))
			return error;
		*bnop = NULLAGBLOCK;
		*stat = i;
		return 0;
	}
	/*
	 * Make a key out of the record data to be inserted, and save it.
	 */
	key.ar_startblock = recp->ar_startblock; /* INT_: direct copy */
	key.ar_blockcount = recp->ar_blockcount; /* INT_: direct copy */
	optr = ptr = cur->bc_ptrs[level];
	/*
	 * If we're off the left edge, return failure.
	 */
	if (ptr == 0) {
		*stat = 0;
		return 0;
	}
	XFS_STATS_INC(xs_abt_insrec);
	/*
	 * Get pointers to the btree buffer and block.
	 */
	bp = cur->bc_bufs[level];
	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
		return error;
	/*
	 * Check that the new entry is being inserted in the right place.
	 */
	if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
		if (level == 0) {
			rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
			xfs_btree_check_rec(cur->bc_btnum, recp, rp);
		} else {
			kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
			xfs_btree_check_key(cur->bc_btnum, &key, kp);
		}
	}
#endif
	nbno = NULLAGBLOCK;
	ncur = (xfs_btree_cur_t *)0;
	/*
	 * If the block is full, we can't insert the new entry until we
	 * make the block un-full.
	 */
	if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
		/*
		 * First, try shifting an entry to the right neighbor.
		 */
		if ((error = xfs_alloc_rshift(cur, level, &i)))
			return error;
		if (i) {
			/* nothing */
		}
		/*
		 * Next, try shifting an entry to the left neighbor.
		 */
		else {
			if ((error = xfs_alloc_lshift(cur, level, &i)))
				return error;
			if (i)
				optr = ptr = cur->bc_ptrs[level];
			else {
				/*
				 * Next, try splitting the current block in
				 * half. If this works we have to re-set our
				 * variables because we could be in a
				 * different block now.
				 */
				if ((error = xfs_alloc_split(cur, level, &nbno,
						&nkey, &ncur, &i)))
					return error;
				if (i) {
					bp = cur->bc_bufs[level];
					block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
					if ((error =
						xfs_btree_check_sblock(cur,
							block, level, bp)))
						return error;
#endif
					ptr = cur->bc_ptrs[level];
					nrec.ar_startblock = nkey.ar_startblock; /* INT_: direct copy */
					nrec.ar_blockcount = nkey.ar_blockcount; /* INT_: direct copy */
				}
				/*
				 * Otherwise the insert fails.
				 */
				else {
					*stat = 0;
					return 0;
				}
			}
		}
	}
	/*
	 * At this point we know there's room for our new entry in the block
	 * we're pointing at.
	 */
	if (level > 0) {
		/*
		 * It's a non-leaf entry.  Make a hole for the new data
		 * in the key and ptr regions of the block.
		 */
		kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
		pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
		for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
				return error;
		}
#endif
		memmove(&kp[ptr], &kp[ptr - 1],
			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); /* INT_: copy */
		memmove(&pp[ptr], &pp[ptr - 1],
			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); /* INT_: copy */
#ifdef DEBUG
		if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
			return error;
#endif
		/*
		 * Now stuff the new data in, bump numrecs and log the new data.
		 */
		kp[ptr - 1] = key;
		INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
		xfs_alloc_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
		xfs_alloc_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
#ifdef DEBUG
		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
			xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
				kp + ptr);
#endif
	} else {
		/*
		 * It's a leaf entry.  Make a hole for the new record.
		 */
		rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
		memmove(&rp[ptr], &rp[ptr - 1],
			(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
		/*
		 * Now stuff the new record in, bump numrecs
		 * and log the new data.
		 */
		rp[ptr - 1] = *recp; /* INT_: struct copy */
		INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
		xfs_alloc_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
#ifdef DEBUG
		if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT))
			xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
				rp + ptr);
#endif
	}
	/*
	 * Log the new number of records in the btree header.
	 */
	xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
	/*
	 * If we inserted at the start of a block, update the parents' keys.
	 */
	if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
		return error;
	/*
	 * Look to see if the longest extent in the allocation group
	 * needs to be updated.
	 */

	agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
	if (level == 0 &&
	    cur->bc_btnum == XFS_BTNUM_CNT &&
	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
	    INT_GET(recp->ar_blockcount, ARCH_CONVERT) > INT_GET(agf->agf_longest, ARCH_CONVERT)) {
		/*
		 * If this is a leaf in the by-size btree and there
		 * is no right sibling block and this block is bigger
		 * than the previous longest block, update it.
		 */
		INT_COPY(agf->agf_longest, recp->ar_blockcount, ARCH_CONVERT);
		cur->bc_mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)].pagf_longest
			= INT_GET(recp->ar_blockcount, ARCH_CONVERT);
		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
			XFS_AGF_LONGEST);
	}
	/*
	 * Return the new block number, if any.
	 * If there is one, give back a record value and a cursor too.
	 */
	*bnop = nbno;
	if (nbno != NULLAGBLOCK) {
		*recp = nrec; /* INT_: struct copy */
		*curp = ncur; /* INT_: struct copy */
	}
	*stat = 1;
	return 0;
}

/*
 * Log header fields from a btree block.
 */
STATIC void
xfs_alloc_log_block(
	xfs_trans_t		*tp,	/* transaction pointer */
	xfs_buf_t		*bp,	/* buffer containing btree block */
	int			fields)	/* mask of fields: XFS_BB_... */
{
	int			first;	/* first byte offset logged */
	int			last;	/* last byte offset logged */
	static const short	offsets[] = {	/* table of offsets */
		offsetof(xfs_alloc_block_t, bb_magic),
		offsetof(xfs_alloc_block_t, bb_level),
		offsetof(xfs_alloc_block_t, bb_numrecs),
		offsetof(xfs_alloc_block_t, bb_leftsib),
		offsetof(xfs_alloc_block_t, bb_rightsib),
		sizeof(xfs_alloc_block_t)
	};

	xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
	xfs_trans_log_buf(tp, bp, first, last);
}

/*
 * Log keys from a btree block (nonleaf).
 */
STATIC void
xfs_alloc_log_keys(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_buf_t		*bp,	/* buffer containing btree block */
	int			kfirst,	/* index of first key to log */
	int			klast)	/* index of last key to log */
{
	xfs_alloc_block_t	*block;	/* btree block to log from */
	int			first;	/* first byte offset logged */
	xfs_alloc_key_t		*kp;	/* key pointer in btree block */
	int			last;	/* last byte offset logged */

	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
	kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
	first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
	last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
}

/*
 * Log block pointer fields from a btree block (nonleaf).
 */
STATIC void
xfs_alloc_log_ptrs(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_buf_t		*bp,	/* buffer containing btree block */
	int			pfirst,	/* index of first pointer to log */
	int			plast)	/* index of last pointer to log */
{
	xfs_alloc_block_t	*block;	/* btree block to log from */
	int			first;	/* first byte offset logged */
	int			last;	/* last byte offset logged */
	xfs_alloc_ptr_t		*pp;	/* block-pointer pointer in btree blk */

	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
	pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
	first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
	last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
}

/*
 * Log records from a btree block (leaf).
 */
STATIC void
xfs_alloc_log_recs(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_buf_t		*bp,	/* buffer containing btree block */
	int			rfirst,	/* index of first record to log */
	int			rlast)	/* index of last record to log */
{
	xfs_alloc_block_t	*block;	/* btree block to log from */
	int			first;	/* first byte offset logged */
	int			last;	/* last byte offset logged */
	xfs_alloc_rec_t		*rp;	/* record pointer for btree block */


	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
	rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
#ifdef DEBUG
	{
		xfs_agf_t	*agf;
		xfs_alloc_rec_t	*p;

		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
		for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
			ASSERT(INT_GET(p->ar_startblock, ARCH_CONVERT) + INT_GET(p->ar_blockcount, ARCH_CONVERT) <=
			       INT_GET(agf->agf_length, ARCH_CONVERT));
	}
#endif
	first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
	last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
	xfs_trans_log_buf(cur->bc_tp, bp, first, last);
}

/*
 * Lookup the record.  The cursor is made to point to it, based on dir.
 * Return 0 if can't find any such record, 1 for success.
 */
STATIC int				/* error */
xfs_alloc_lookup(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_lookup_t		dir,	/* <=, ==, or >= */
	int			*stat)	/* success/failure */
{
	xfs_agblock_t		agbno;	/* a.g. relative btree block number */
	xfs_agnumber_t		agno;	/* allocation group number */
	xfs_alloc_block_t	*block=NULL;	/* current btree block */
	int			diff;	/* difference for the current key */
	int			error;	/* error return value */
	int			keyno=0;	/* current key number */
	int			level;	/* level in the btree */
	xfs_mount_t		*mp;	/* file system mount point */

	XFS_STATS_INC(xs_abt_lookup);
	/*
	 * Get the allocation group header, and the root block number.
	 */
	mp = cur->bc_mp;

	{
		xfs_agf_t	*agf;	/* a.g. freespace header */

		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
		agno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
		agbno = INT_GET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT);
	}
	/*
	 * Iterate over each level in the btree, starting at the root.
	 * For each level above the leaves, find the key we need, based
	 * on the lookup record, then follow the corresponding block
	 * pointer down to the next level.
	 */
	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
		xfs_buf_t	*bp;	/* buffer pointer for btree block */
		xfs_daddr_t	d;	/* disk address of btree block */

		/*
		 * Get the disk address we're looking for.
		 */
		d = XFS_AGB_TO_DADDR(mp, agno, agbno);
		/*
		 * If the old buffer at this level is for a different block,
		 * throw it away, otherwise just use it.
		 */
		bp = cur->bc_bufs[level];
		if (bp && XFS_BUF_ADDR(bp) != d)
			bp = (xfs_buf_t *)0;
		if (!bp) {
			/*
			 * Need to get a new buffer.  Read it, then
			 * set it in the cursor, releasing the old one.
			 */
			if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
					agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
				return error;
			xfs_btree_setbuf(cur, level, bp);
			/*
			 * Point to the btree block, now that we have the buffer
			 */
			block = XFS_BUF_TO_ALLOC_BLOCK(bp);
			if ((error = xfs_btree_check_sblock(cur, block, level,
					bp)))
				return error;
		} else
			block = XFS_BUF_TO_ALLOC_BLOCK(bp);
		/*
		 * If we already had a key match at a higher level, we know
		 * we need to use the first entry in this block.
		 */
		if (diff == 0)
			keyno = 1;
		/*
		 * Otherwise we need to search this block.  Do a binary search.
		 */
		else {
			int		high;	/* high entry number */
			xfs_alloc_key_t	*kkbase=NULL;/* base of keys in block */
			xfs_alloc_rec_t	*krbase=NULL;/* base of records in block */
			int		low;	/* low entry number */

			/*
			 * Get a pointer to keys or records.
			 */
			if (level > 0)
				kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
			else
				krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
			/*
			 * Set low and high entry numbers, 1-based.
			 */
			low = 1;
			if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
				/*
				 * If the block is empty, the tree must
				 * be an empty leaf.
				 */
				ASSERT(level == 0 && cur->bc_nlevels == 1);
				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
				*stat = 0;
				return 0;
			}
			/*
			 * Binary search the block.
			 */
			while (low <= high) {
				xfs_extlen_t	blockcount;	/* key value */
				xfs_agblock_t	startblock;	/* key value */

				XFS_STATS_INC(xs_abt_compare);
				/*
				 * keyno is average of low and high.
				 */
				keyno = (low + high) >> 1;
				/*
				 * Get startblock & blockcount.
				 */
				if (level > 0) {
					xfs_alloc_key_t	*kkp;

					kkp = kkbase + keyno - 1;
					startblock = INT_GET(kkp->ar_startblock, ARCH_CONVERT);
					blockcount = INT_GET(kkp->ar_blockcount, ARCH_CONVERT);
				} else {
					xfs_alloc_rec_t	*krp;

					krp = krbase + keyno - 1;
					startblock = INT_GET(krp->ar_startblock, ARCH_CONVERT);
					blockcount = INT_GET(krp->ar_blockcount, ARCH_CONVERT);
				}
				/*
				 * Compute difference to get next direction.
				 */
				if (cur->bc_btnum == XFS_BTNUM_BNO)
					diff = (int)startblock -
					       (int)cur->bc_rec.a.ar_startblock;
				else if (!(diff = (int)blockcount -
					    (int)cur->bc_rec.a.ar_blockcount))
					diff = (int)startblock -
					    (int)cur->bc_rec.a.ar_startblock;
				/*
				 * Less than, move right.
				 */
				if (diff < 0)
					low = keyno + 1;
				/*
				 * Greater than, move left.
				 */
				else if (diff > 0)
					high = keyno - 1;
				/*
				 * Equal, we're done.
				 */
				else
					break;
			}
		}
		/*
		 * If there are more levels, set up for the next level
		 * by getting the block number and filling in the cursor.
		 */
		if (level > 0) {
			/*
			 * If we moved left, need the previous key number,
			 * unless there isn't one.
			 */
			if (diff > 0 && --keyno < 1)
				keyno = 1;
			agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
#ifdef DEBUG
			if ((error = xfs_btree_check_sptr(cur, agbno, level)))
				return error;
#endif
			cur->bc_ptrs[level] = keyno;
		}
	}
	/*
	 * Done with the search.
	 * See if we need to adjust the results.
	 */
	if (dir != XFS_LOOKUP_LE && diff < 0) {
		keyno++;
		/*
		 * If ge search and we went off the end of the block, but it's
		 * not the last block, we're in the wrong block.
		 */
		if (dir == XFS_LOOKUP_GE &&
		    keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
		    INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
			int	i;

			cur->bc_ptrs[0] = keyno;
			if ((error = xfs_alloc_increment(cur, 0, &i)))
				return error;
			XFS_WANT_CORRUPTED_RETURN(i == 1);
			*stat = 1;
			return 0;
		}
	}
	else if (dir == XFS_LOOKUP_LE && diff > 0)
		keyno--;
	cur->bc_ptrs[0] = keyno;
	/*
	 * Return if we succeeded or not.
	 */
	if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
		*stat = 0;
	else
		*stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
	return 0;
}

/*
 * Move 1 record left from cur/level if possible.
 * Update cur to reflect the new path.
 */
STATIC int				/* error */
xfs_alloc_lshift(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level to shift record on */
	int			*stat)	/* success/failure */
{
	int			error;	/* error return value */
#ifdef DEBUG
	int			i;	/* loop index */
#endif
	xfs_alloc_key_t		key;	/* key value for leaf level upward */
	xfs_buf_t		*lbp;	/* buffer for left neighbor block */
	xfs_alloc_block_t	*left;	/* left neighbor btree block */
	int			nrec;	/* new number of left block entries */
	xfs_buf_t		*rbp;	/* buffer for right (current) block */
	xfs_alloc_block_t	*right;	/* right (current) btree block */
	xfs_alloc_key_t		*rkp=NULL;	/* key pointer for right block */
	xfs_alloc_ptr_t		*rpp=NULL;	/* address pointer for right block */
	xfs_alloc_rec_t		*rrp=NULL;	/* record pointer for right block */

	/*
	 * Set up variables for this block as "right".
	 */
	rbp = cur->bc_bufs[level];
	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
		return error;
#endif
	/*
	 * If we've got no left sibling then we can't shift an entry left.
	 */
	if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	/*
	 * If the cursor entry is the one that would be moved, don't
	 * do it... it's too complicated.
	 */
	if (cur->bc_ptrs[level] <= 1) {
		*stat = 0;
		return 0;
	}
	/*
	 * Set up the left neighbor as "left".
	 */
	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
			cur->bc_private.a.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
			XFS_ALLOC_BTREE_REF)))
		return error;
	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
		return error;
	/*
	 * If it's full, it can't take another entry.
	 */
	if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
		*stat = 0;
		return 0;
	}
	nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
	/*
	 * If non-leaf, copy a key and a ptr to the left block.
	 */
	if (level > 0) {
		xfs_alloc_key_t	*lkp;	/* key pointer for left block */
		xfs_alloc_ptr_t	*lpp;	/* address pointer for left block */

		lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
		*lkp = *rkp;
		xfs_alloc_log_keys(cur, lbp, nrec, nrec);
		lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
		if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)))
			return error;
#endif
		*lpp = *rpp; /* INT_: copy */
		xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
		xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
	}
	/*
	 * If leaf, copy a record to the left block.
	 */
	else {
		xfs_alloc_rec_t	*lrp;	/* record pointer for left block */

		lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
		*lrp = *rrp;
		xfs_alloc_log_recs(cur, lbp, nrec, nrec);
		xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
	}
	/*
	 * Bump and log left's numrecs, decrement and log right's numrecs.
	 */
	INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
	INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
	/*
	 * Slide the contents of right down one entry.
	 */
	if (level > 0) {
#ifdef DEBUG
		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
					level)))
				return error;
		}
#endif
		memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
		memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
	} else {
		memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
		rkp = &key;
	}
	/*
	 * Update the parent key values of right.
	 */
	if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
		return error;
	/*
	 * Slide the cursor value left one.
	 */
	cur->bc_ptrs[level]--;
	*stat = 1;
	return 0;
}

/*
 * Allocate a new root block, fill it in.
 */
STATIC int				/* error */
xfs_alloc_newroot(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			*stat)	/* success/failure */
{
	int			error;	/* error return value */
	xfs_agblock_t		lbno;	/* left block number */
	xfs_buf_t		*lbp;	/* left btree buffer */
	xfs_alloc_block_t	*left;	/* left btree block */
	xfs_mount_t		*mp;	/* mount structure */
	xfs_agblock_t		nbno;	/* new block number */
	xfs_buf_t		*nbp;	/* new (root) buffer */
	xfs_alloc_block_t	*new;	/* new (root) btree block */
	int			nptr;	/* new value for key index, 1 or 2 */
	xfs_agblock_t		rbno;	/* right block number */
	xfs_buf_t		*rbp;	/* right btree buffer */
	xfs_alloc_block_t	*right;	/* right btree block */

	mp = cur->bc_mp;

	ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
	/*
	 * Get a buffer from the freelist blocks, for the new root.
	 */
	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
			&nbno)))
		return error;
	/*
	 * None available, we fail.
	 */
	if (nbno == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	xfs_trans_agbtree_delta(cur->bc_tp, 1);
	nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
		0);
	new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
	/*
	 * Set the root data in the a.g. freespace structure.
	 */
	{
		xfs_agf_t	*agf;	/* a.g. freespace header */
		xfs_agnumber_t	seqno;

		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
		INT_SET(agf->agf_roots[cur->bc_btnum], ARCH_CONVERT, nbno);
		INT_MOD(agf->agf_levels[cur->bc_btnum], ARCH_CONVERT, 1);
		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
		mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
			XFS_AGF_ROOTS | XFS_AGF_LEVELS);
	}
	/*
	 * At the previous root level there are now two blocks: the old
	 * root, and the new block generated when it was split.
	 * We don't know which one the cursor is pointing at, so we
	 * set up variables "left" and "right" for each case.
	 */
	lbp = cur->bc_bufs[cur->bc_nlevels - 1];
	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
		return error;
#endif
	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
		/*
		 * Our block is left, pick up the right block.
		 */
		lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
		rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
				cur->bc_private.a.agno, rbno, 0, &rbp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
		if ((error = xfs_btree_check_sblock(cur, right,
				cur->bc_nlevels - 1, rbp)))
			return error;
		nptr = 1;
	} else {
		/*
		 * Our block is right, pick up the left block.
		 */
		rbp = lbp;
		right = left;
		rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
		lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
		if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
				cur->bc_private.a.agno, lbno, 0, &lbp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
		if ((error = xfs_btree_check_sblock(cur, left,
				cur->bc_nlevels - 1, lbp)))
			return error;
		nptr = 2;
	}
	/*
	 * Fill in the new block's btree header and log it.
	 */
	INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
	INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
	INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
	INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
	INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
	xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
	ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
	/*
	 * Fill in the key data in the new root.
	 */
	{
		xfs_alloc_key_t		*kp;	/* btree key pointer */

		kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
		if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
			kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
			kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
		} else {
			xfs_alloc_rec_t	*rp;	/* btree record pointer */

			rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
			kp[0].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
			kp[0].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
			rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
			kp[1].ar_startblock = rp->ar_startblock; /* INT_: direct copy */
			kp[1].ar_blockcount = rp->ar_blockcount; /* INT_: direct copy */
		}
	}
	xfs_alloc_log_keys(cur, nbp, 1, 2);
	/*
	 * Fill in the pointer data in the new root.
	 */
	{
		xfs_alloc_ptr_t		*pp;	/* btree address pointer */

		pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
		INT_SET(pp[0], ARCH_CONVERT, lbno);
		INT_SET(pp[1], ARCH_CONVERT, rbno);
	}
	xfs_alloc_log_ptrs(cur, nbp, 1, 2);
	/*
	 * Fix up the cursor.
	 */
	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
	cur->bc_ptrs[cur->bc_nlevels] = nptr;
	cur->bc_nlevels++;
	*stat = 1;
	return 0;
}

/*
 * Move 1 record right from cur/level if possible.
 * Update cur to reflect the new path.
 */
STATIC int				/* error */
xfs_alloc_rshift(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level to shift record on */
	int			*stat)	/* success/failure */
{
	int			error;	/* error return value */
	int			i;	/* loop index */
	xfs_alloc_key_t		key;	/* key value for leaf level upward */
	xfs_buf_t		*lbp;	/* buffer for left (current) block */
	xfs_alloc_block_t	*left;	/* left (current) btree block */
	xfs_buf_t		*rbp;	/* buffer for right neighbor block */
	xfs_alloc_block_t	*right;	/* right neighbor btree block */
	xfs_alloc_key_t		*rkp;	/* key pointer for right block */
	xfs_btree_cur_t		*tcur;	/* temporary cursor */

	/*
	 * Set up variables for this block as "left".
	 */
	lbp = cur->bc_bufs[level];
	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
		return error;
#endif
	/*
	 * If we've got no right sibling then we can't shift an entry right.
	 */
	if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	/*
	 * If the cursor entry is the one that would be moved, don't
	 * do it... it's too complicated.
	 */
	if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
		*stat = 0;
		return 0;
	}
	/*
	 * Set up the right neighbor as "right".
	 */
	if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
			cur->bc_private.a.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
			XFS_ALLOC_BTREE_REF)))
		return error;
	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
	if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
		return error;
	/*
	 * If it's full, it can't take another entry.
	 */
	if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
		*stat = 0;
		return 0;
	}
	/*
	 * Make a hole at the start of the right neighbor block, then
	 * copy the last left block entry to the hole.
	 */
	if (level > 0) {
		xfs_alloc_key_t	*lkp;	/* key pointer for left block */
		xfs_alloc_ptr_t	*lpp;	/* address pointer for left block */
		xfs_alloc_ptr_t	*rpp;	/* address pointer for right block */

		lkp = XFS_ALLOC_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
		lpp = XFS_ALLOC_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
		for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
				return error;
		}
#endif
		memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
		memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
#ifdef DEBUG
		if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
			return error;
#endif
		*rkp = *lkp; /* INT_: copy */
		*rpp = *lpp; /* INT_: copy */
		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
		xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
	} else {
		xfs_alloc_rec_t	*lrp;	/* record pointer for left block */
		xfs_alloc_rec_t	*rrp;	/* record pointer for right block */

		lrp = XFS_ALLOC_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
		memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
		*rrp = *lrp;
		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
		key.ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
		key.ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
		rkp = &key;
		xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
	}
	/*
	 * Decrement and log left's numrecs, bump and log right's numrecs.
	 */
	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
	INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
	/*
	 * Using a temporary cursor, update the parent key values of the
	 * block on the right.
	 */
	if ((error = xfs_btree_dup_cursor(cur, &tcur)))
		return error;
	i = xfs_btree_lastrec(tcur, level);
	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
	if ((error = xfs_alloc_increment(tcur, level, &i)) ||
	    (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
		goto error0;
	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
	*stat = 1;
	return 0;
error0:
	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
	return error;
}

/*
 * Split cur/level block in half.
 * Return new block number and its first record (to be inserted into parent).
 */
STATIC int				/* error */
xfs_alloc_split(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level to split */
	xfs_agblock_t		*bnop,	/* output: block number allocated */
	xfs_alloc_key_t		*keyp,	/* output: first key of new block */
	xfs_btree_cur_t		**curp,	/* output: new cursor */
	int			*stat)	/* success/failure */
{
	int			error;	/* error return value */
	int			i;	/* loop index/record number */
	xfs_agblock_t		lbno;	/* left (current) block number */
	xfs_buf_t		*lbp;	/* buffer for left block */
	xfs_alloc_block_t	*left;	/* left (current) btree block */
	xfs_agblock_t		rbno;	/* right (new) block number */
	xfs_buf_t		*rbp;	/* buffer for right block */
	xfs_alloc_block_t	*right;	/* right (new) btree block */

	/*
	 * Allocate the new block from the freelist.
	 * If we can't do it, we're toast.  Give up.
	 */
	if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
			&rbno)))
		return error;
	if (rbno == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	xfs_trans_agbtree_delta(cur->bc_tp, 1);
	rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
		rbno, 0);
	/*
	 * Set up the new block as "right".
	 */
	right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
	/*
	 * "Left" is the current (according to the cursor) block.
	 */
	lbp = cur->bc_bufs[level];
	left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
		return error;
#endif
	/*
	 * Fill in the btree header for the new block.
	 */
	INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
	right->bb_level = left->bb_level; /* INT_: direct copy */
	INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
	/*
	 * Make sure that if there's an odd number of entries now, that
	 * each new block will have the same number of entries.
	 */
	if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
	    cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
		INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
	i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
	/*
	 * For non-leaf blocks, copy keys and addresses over to the new block.
	 */
	if (level > 0) {
		xfs_alloc_key_t	*lkp;	/* left btree key pointer */
		xfs_alloc_ptr_t	*lpp;	/* left btree address pointer */
		xfs_alloc_key_t	*rkp;	/* right btree key pointer */
		xfs_alloc_ptr_t	*rpp;	/* right btree address pointer */

		lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
		lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
		rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
		rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
		for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
			if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
				return error;
		}
#endif
		memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp)); /* INT_: copy */
		memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp)); /* INT_: copy */
		xfs_alloc_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
		xfs_alloc_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
		*keyp = *rkp;
	}
	/*
	 * For leaf blocks, copy records over to the new block.
	 */
	else {
		xfs_alloc_rec_t	*lrp;	/* left btree record pointer */
		xfs_alloc_rec_t	*rrp;	/* right btree record pointer */

		lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
		rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
		memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
		xfs_alloc_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
		keyp->ar_startblock = rrp->ar_startblock; /* INT_: direct copy */
		keyp->ar_blockcount = rrp->ar_blockcount; /* INT_: direct copy */
	}
	/*
	 * Find the left block number by looking in the buffer.
	 * Adjust numrecs, sibling pointers.
	 */
	lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
	INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
	right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
	INT_SET(left->bb_rightsib, ARCH_CONVERT, rbno);
	INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
	xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
	xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
	/*
	 * If there's a block to the new block's right, make that block
	 * point back to right instead of to left.
	 */
	if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
		xfs_alloc_block_t	*rrblock;	/* rr btree block */
		xfs_buf_t		*rrbp;		/* buffer for rrblock */

		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
				cur->bc_private.a.agno, INT_GET(right->bb_rightsib, ARCH_CONVERT), 0,
				&rrbp, XFS_ALLOC_BTREE_REF)))
			return error;
		rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
		if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
			return error;
		INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, rbno);
		xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
	}
	/*
	 * If the cursor is really in the right block, move it there.
	 * If it's just pointing past the last entry in left, then we'll
	 * insert there, so don't change anything in that case.
	 */
	if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
		xfs_btree_setbuf(cur, level, rbp);
		cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
	}
	/*
	 * If there are more levels, we'll need another cursor which refers to
	 * the right block, no matter where this cursor was.
	 */
	if (level + 1 < cur->bc_nlevels) {
		if ((error = xfs_btree_dup_cursor(cur, curp)))
			return error;
		(*curp)->bc_ptrs[level + 1]++;
	}
	*bnop = rbno;
	*stat = 1;
	return 0;
}

/*
 * Update keys at all levels from here to the root along the cursor's path.
 */
STATIC int				/* error */
xfs_alloc_updkey(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_alloc_key_t		*keyp,	/* new key value to update to */
	int			level)	/* starting level for update */
{
	int			ptr;	/* index of key in block */

	/*
	 * Go up the tree from this level toward the root.
	 * At each level, update the key value to the value input.
	 * Stop when we reach a level where the cursor isn't pointing
	 * at the first entry in the block.
	 */
	for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
		xfs_alloc_block_t	*block;	/* btree block */
		xfs_buf_t		*bp;	/* buffer for block */
#ifdef DEBUG
		int			error;	/* error return value */
#endif
		xfs_alloc_key_t		*kp;	/* ptr to btree block keys */

		bp = cur->bc_bufs[level];
		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
		if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
			return error;
#endif
		ptr = cur->bc_ptrs[level];
		kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
		*kp = *keyp;
		xfs_alloc_log_keys(cur, bp, ptr, ptr);
	}
	return 0;
}

/*
 * Externally visible routines.
 */

/*
 * Decrement cursor by one record at the level.
 * For nonzero levels the leaf-ward information is untouched.
 */
int					/* error */
xfs_alloc_decrement(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level in btree, 0 is leaf */
	int			*stat)	/* success/failure */
{
	xfs_alloc_block_t	*block;	/* btree block */
	int			error;	/* error return value */
	int			lev;	/* btree level */

	ASSERT(level < cur->bc_nlevels);
	/*
	 * Read-ahead to the left at this level.
	 */
	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
	/*
	 * Decrement the ptr at this level.  If we're still in the block
	 * then we're done.
	 */
	if (--cur->bc_ptrs[level] > 0) {
		*stat = 1;
		return 0;
	}
	/*
	 * Get a pointer to the btree block.
	 */
	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, level,
			cur->bc_bufs[level])))
		return error;
#endif
	/*
	 * If we just went off the left edge of the tree, return failure.
	 */
	if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	/*
	 * March up the tree decrementing pointers.
	 * Stop when we don't go off the left edge of a block.
	 */
	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
		if (--cur->bc_ptrs[lev] > 0)
			break;
		/*
		 * Read-ahead the left block, we're going to read it
		 * in the next loop.
		 */
		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
	}
	/*
	 * If we went off the root then we are seriously confused.
	 */
	ASSERT(lev < cur->bc_nlevels);
	/*
	 * Now walk back down the tree, fixing up the cursor's buffer
	 * pointers and key numbers.
	 */
	for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
		xfs_agblock_t	agbno;	/* block number of btree block */
		xfs_buf_t	*bp;	/* buffer pointer for block */

		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
				cur->bc_private.a.agno, agbno, 0, &bp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		lev--;
		xfs_btree_setbuf(cur, lev, bp);
		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
			return error;
		cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
	}
	*stat = 1;
	return 0;
}

/*
 * Delete the record pointed to by cur.
 * The cursor refers to the place where the record was (could be inserted)
 * when the operation returns.
 */
int					/* error */
xfs_alloc_delete(
	xfs_btree_cur_t	*cur,		/* btree cursor */
	int		*stat)		/* success/failure */
{
	int		error;		/* error return value */
	int		i;		/* result code */
	int		level;		/* btree level */

	/*
	 * Go up the tree, starting at leaf level.
	 * If 2 is returned then a join was done; go to the next level.
	 * Otherwise we are done.
	 */
	for (level = 0, i = 2; i == 2; level++) {
		if ((error = xfs_alloc_delrec(cur, level, &i)))
			return error;
	}
	if (i == 0) {
		for (level = 1; level < cur->bc_nlevels; level++) {
			if (cur->bc_ptrs[level] == 0) {
				if ((error = xfs_alloc_decrement(cur, level, &i)))
					return error;
				break;
			}
		}
	}
	*stat = i;
	return 0;
}

/*
 * Get the data from the pointed-to record.
 */
int					/* error */
xfs_alloc_get_rec(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_agblock_t		*bno,	/* output: starting block of extent */
	xfs_extlen_t		*len,	/* output: length of extent */
	int			*stat)	/* output: success/failure */
{
	xfs_alloc_block_t	*block;	/* btree block */
#ifdef DEBUG
	int			error;	/* error return value */
#endif
	int			ptr;	/* record number */

	ptr = cur->bc_ptrs[0];
	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
		return error;
#endif
	/*
	 * Off the right end or left end, return failure.
	 */
	if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
		*stat = 0;
		return 0;
	}
	/*
	 * Point to the record and extract its data.
	 */
	{
		xfs_alloc_rec_t		*rec;	/* record data */

		rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
		*bno = INT_GET(rec->ar_startblock, ARCH_CONVERT);
		*len = INT_GET(rec->ar_blockcount, ARCH_CONVERT);
	}
	*stat = 1;
	return 0;
}

/*
 * Increment cursor by one record at the level.
 * For nonzero levels the leaf-ward information is untouched.
 */
int					/* error */
xfs_alloc_increment(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	int			level,	/* level in btree, 0 is leaf */
	int			*stat)	/* success/failure */
{
	xfs_alloc_block_t	*block;	/* btree block */
	xfs_buf_t		*bp;	/* tree block buffer */
	int			error;	/* error return value */
	int			lev;	/* btree level */

	ASSERT(level < cur->bc_nlevels);
	/*
	 * Read-ahead to the right at this level.
	 */
	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
	/*
	 * Get a pointer to the btree block.
	 */
	bp = cur->bc_bufs[level];
	block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
		return error;
#endif
	/*
	 * Increment the ptr at this level.  If we're still in the block
	 * then we're done.
	 */
	if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
		*stat = 1;
		return 0;
	}
	/*
	 * If we just went off the right edge of the tree, return failure.
	 */
	if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
		*stat = 0;
		return 0;
	}
	/*
	 * March up the tree incrementing pointers.
	 * Stop when we don't go off the right edge of a block.
	 */
	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
		bp = cur->bc_bufs[lev];
		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
#ifdef DEBUG
		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
			return error;
#endif
		if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
			break;
		/*
		 * Read-ahead the right block, we're going to read it
		 * in the next loop.
		 */
		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
	}
	/*
	 * If we went off the root then we are seriously confused.
	 */
	ASSERT(lev < cur->bc_nlevels);
	/*
	 * Now walk back down the tree, fixing up the cursor's buffer
	 * pointers and key numbers.
	 */
	for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
	     lev > level; ) {
		xfs_agblock_t	agbno;	/* block number of btree block */

		agbno = INT_GET(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
		if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
				cur->bc_private.a.agno, agbno, 0, &bp,
				XFS_ALLOC_BTREE_REF)))
			return error;
		lev--;
		xfs_btree_setbuf(cur, lev, bp);
		block = XFS_BUF_TO_ALLOC_BLOCK(bp);
		if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
			return error;
		cur->bc_ptrs[lev] = 1;
	}
	*stat = 1;
	return 0;
}

/*
 * Insert the current record at the point referenced by cur.
 * The cursor may be inconsistent on return if splits have been done.
 */
int					/* error */
xfs_alloc_insert(
	xfs_btree_cur_t	*cur,		/* btree cursor */
	int		*stat)		/* success/failure */
{
	int		error;		/* error return value */
	int		i;		/* result value, 0 for failure */
	int		level;		/* current level number in btree */
	xfs_agblock_t	nbno;		/* new block number (split result) */
	xfs_btree_cur_t	*ncur;		/* new cursor (split result) */
	xfs_alloc_rec_t	nrec;		/* record being inserted this level */
	xfs_btree_cur_t	*pcur;		/* previous level's cursor */

	level = 0;
	nbno = NULLAGBLOCK;
	INT_SET(nrec.ar_startblock, ARCH_CONVERT, cur->bc_rec.a.ar_startblock);
	INT_SET(nrec.ar_blockcount, ARCH_CONVERT, cur->bc_rec.a.ar_blockcount);
	ncur = (xfs_btree_cur_t *)0;
	pcur = cur;
	/*
	 * Loop going up the tree, starting at the leaf level.
	 * Stop when we don't get a split block, that must mean that
	 * the insert is finished with this level.
	 */
	do {
		/*
		 * Insert nrec/nbno into this level of the tree.
		 * Note if we fail, nbno will be null.
		 */
		if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
				&i))) {
			if (pcur != cur)
				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
			return error;
		}
		/*
		 * See if the cursor we just used is trash.
		 * Can't trash the caller's cursor, but otherwise we should
		 * if ncur is a new cursor or we're about to be done.
		 */
		if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
			cur->bc_nlevels = pcur->bc_nlevels;
			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
		}
		/*
		 * If we got a new cursor, switch to it.
		 */
		if (ncur) {
			pcur = ncur;
			ncur = (xfs_btree_cur_t *)0;
		}
	} while (nbno != NULLAGBLOCK);
	*stat = i;
	return 0;
}

/*
 * Lookup the record equal to [bno, len] in the btree given by cur.
 */
int					/* error */
xfs_alloc_lookup_eq(
	xfs_btree_cur_t	*cur,		/* btree cursor */
	xfs_agblock_t	bno,		/* starting block of extent */
	xfs_extlen_t	len,		/* length of extent */
	int		*stat)		/* success/failure */
{
	cur->bc_rec.a.ar_startblock = bno;
	cur->bc_rec.a.ar_blockcount = len;
	return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
}

/*
 * Lookup the first record greater than or equal to [bno, len]
 * in the btree given by cur.
 */
int					/* error */
xfs_alloc_lookup_ge(
	xfs_btree_cur_t	*cur,		/* btree cursor */
	xfs_agblock_t	bno,		/* starting block of extent */
	xfs_extlen_t	len,		/* length of extent */
	int		*stat)		/* success/failure */
{
	cur->bc_rec.a.ar_startblock = bno;
	cur->bc_rec.a.ar_blockcount = len;
	return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
}

/*
 * Lookup the first record less than or equal to [bno, len]
 * in the btree given by cur.
 */
int					/* error */
xfs_alloc_lookup_le(
	xfs_btree_cur_t	*cur,		/* btree cursor */
	xfs_agblock_t	bno,		/* starting block of extent */
	xfs_extlen_t	len,		/* length of extent */
	int		*stat)		/* success/failure */
{
	cur->bc_rec.a.ar_startblock = bno;
	cur->bc_rec.a.ar_blockcount = len;
	return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
}

/*
 * Update the record referred to by cur, to the value given by [bno, len].
 * This either works (return 0) or gets an EFSCORRUPTED error.
 */
int					/* error */
xfs_alloc_update(
	xfs_btree_cur_t		*cur,	/* btree cursor */
	xfs_agblock_t		bno,	/* starting block of extent */
	xfs_extlen_t		len)	/* length of extent */
{
	xfs_alloc_block_t	*block;	/* btree block to update */
	int			error;	/* error return value */
	int			ptr;	/* current record number (updating) */

	ASSERT(len > 0);
	/*
	 * Pick up the a.g. freelist struct and the current block.
	 */
	block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
#ifdef DEBUG
	if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
		return error;
#endif
	/*
	 * Get the address of the rec to be updated.
	 */
	ptr = cur->bc_ptrs[0];
	{
		xfs_alloc_rec_t		*rp;	/* pointer to updated record */

		rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
		/*
		 * Fill in the new contents and log them.
		 */
		INT_SET(rp->ar_startblock, ARCH_CONVERT, bno);
		INT_SET(rp->ar_blockcount, ARCH_CONVERT, len);
		xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
	}
	/*
	 * If it's the by-size btree and it's the last leaf block and
	 * it's the last record... then update the size of the longest
	 * extent in the a.g., which we cache in the a.g. freelist header.
	 */
	if (cur->bc_btnum == XFS_BTNUM_CNT &&
	    INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK &&
	    ptr == INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
		xfs_agf_t	*agf;	/* a.g. freespace header */
		xfs_agnumber_t	seqno;

		agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
		seqno = INT_GET(agf->agf_seqno, ARCH_CONVERT);
		cur->bc_mp->m_perag[seqno].pagf_longest = len;
		INT_SET(agf->agf_longest, ARCH_CONVERT, len);
		xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
			XFS_AGF_LONGEST);
	}
	/*
	 * Updating first record in leaf. Pass new key value up to our parent.
	 */
	if (ptr == 1) {
		xfs_alloc_key_t	key;	/* key containing [bno, len] */

		INT_SET(key.ar_startblock, ARCH_CONVERT, bno);
		INT_SET(key.ar_blockcount, ARCH_CONVERT, len);
		if ((error = xfs_alloc_updkey(cur, &key, 1)))
			return error;
	}
	return 0;
}