drivers/mmc/host/sdricoh_cs.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

/*
 *  sdricoh_cs.c - driver for Ricoh Secure Digital Card Readers that can be
 *     found on some Ricoh RL5c476 II cardbus bridge
 *
 *  Copyright (C) 2006 - 2008 Sascha Sommer <saschasommer@freenet.de>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

/*
#define DEBUG
#define VERBOSE_DEBUG
*/
#include <linux/delay.h>
#include <linux/highmem.h>
#include <linux/pci.h>
#include <linux/ioport.h>
#include <linux/scatterlist.h>
#include <linux/version.h>

#include <pcmcia/cs_types.h>
#include <pcmcia/cs.h>
#include <pcmcia/cistpl.h>
#include <pcmcia/ds.h>
#include <linux/io.h>

#include <linux/mmc/host.h>

#define DRIVER_NAME "sdricoh_cs"

static unsigned int switchlocked;

/* i/o region */
#define SDRICOH_PCI_REGION 0
#define SDRICOH_PCI_REGION_SIZE 0x1000

/* registers */
#define R104_VERSION     0x104
#define R200_CMD         0x200
#define R204_CMD_ARG     0x204
#define R208_DATAIO      0x208
#define R20C_RESP        0x20c
#define R21C_STATUS      0x21c
#define R2E0_INIT        0x2e0
#define R2E4_STATUS_RESP 0x2e4
#define R2F0_RESET       0x2f0
#define R224_MODE        0x224
#define R226_BLOCKSIZE   0x226
#define R228_POWER       0x228
#define R230_DATA        0x230

/* flags for the R21C_STATUS register */
#define STATUS_CMD_FINISHED      0x00000001
#define STATUS_TRANSFER_FINISHED 0x00000004
#define STATUS_CARD_INSERTED     0x00000020
#define STATUS_CARD_LOCKED       0x00000080
#define STATUS_CMD_TIMEOUT       0x00400000
#define STATUS_READY_TO_READ     0x01000000
#define STATUS_READY_TO_WRITE    0x02000000
#define STATUS_BUSY              0x40000000

/* timeouts */
#define INIT_TIMEOUT      100
#define CMD_TIMEOUT       100000
#define TRANSFER_TIMEOUT  100000
#define BUSY_TIMEOUT      32767

/* list of supported pcmcia devices */
static struct pcmcia_device_id pcmcia_ids[] = {
	/* vendor and device strings followed by their crc32 hashes */
	PCMCIA_DEVICE_PROD_ID12("RICOH", "Bay1Controller", 0xd9f522ed,
				0xc3901202),
	PCMCIA_DEVICE_NULL,
};

MODULE_DEVICE_TABLE(pcmcia, pcmcia_ids);

/* mmc privdata */
struct sdricoh_host {
	struct device *dev;
	struct mmc_host *mmc;	/* MMC structure */
	unsigned char __iomem *iobase;
	struct pci_dev *pci_dev;
	int app_cmd;
};

/***************** register i/o helper functions *****************************/

static inline unsigned int sdricoh_readl(struct sdricoh_host *host,
					 unsigned int reg)
{
	unsigned int value = readl(host->iobase + reg);
	dev_vdbg(host->dev, "rl %x 0x%x\n", reg, value);
	return value;
}

static inline void sdricoh_writel(struct sdricoh_host *host, unsigned int reg,
				  unsigned int value)
{
	writel(value, host->iobase + reg);
	dev_vdbg(host->dev, "wl %x 0x%x\n", reg, value);

}

static inline unsigned int sdricoh_readw(struct sdricoh_host *host,
					 unsigned int reg)
{
	unsigned int value = readw(host->iobase + reg);
	dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value);
	return value;
}

static inline void sdricoh_writew(struct sdricoh_host *host, unsigned int reg,
					 unsigned short value)
{
	writew(value, host->iobase + reg);
	dev_vdbg(host->dev, "ww %x 0x%x\n", reg, value);
}

static inline unsigned int sdricoh_readb(struct sdricoh_host *host,
					 unsigned int reg)
{
	unsigned int value = readb(host->iobase + reg);
	dev_vdbg(host->dev, "rb %x 0x%x\n", reg, value);
	return value;
}

static int sdricoh_query_status(struct sdricoh_host *host, unsigned int wanted,
				unsigned int timeout){
	unsigned int loop;
	unsigned int status = 0;
	struct device *dev = host->dev;
	for (loop = 0; loop < timeout; loop++) {
		status = sdricoh_readl(host, R21C_STATUS);
		sdricoh_writel(host, R2E4_STATUS_RESP, status);
		if (status & wanted)
			break;
	}

	if (loop == timeout) {
		dev_err(dev, "query_status: timeout waiting for %x\n", wanted);
		return -ETIMEDOUT;
	}

	/* do not do this check in the loop as some commands fail otherwise */
	if (status & 0x7F0000) {
		dev_err(dev, "waiting for status bit %x failed\n", wanted);
		return -EINVAL;
	}
	return 0;

}

static int sdricoh_mmc_cmd(struct sdricoh_host *host, unsigned char opcode,
			   unsigned int arg)
{
	unsigned int status;
	int result = 0;
	unsigned int loop = 0;
	/* reset status reg? */
	sdricoh_writel(host, R21C_STATUS, 0x18);
	/* fill parameters */
	sdricoh_writel(host, R204_CMD_ARG, arg);
	sdricoh_writel(host, R200_CMD, (0x10000 << 8) | opcode);
	/* wait for command completion */
	if (opcode) {
		for (loop = 0; loop < CMD_TIMEOUT; loop++) {
			status = sdricoh_readl(host, R21C_STATUS);
			sdricoh_writel(host, R2E4_STATUS_RESP, status);
			if (status  & STATUS_CMD_FINISHED)
				break;
		}
		/* don't check for timeout in the loop it is not always
		   reset correctly
		*/
		if (loop == CMD_TIMEOUT || status & STATUS_CMD_TIMEOUT)
			result = -ETIMEDOUT;

	}

	return result;

}

static int sdricoh_reset(struct sdricoh_host *host)
{
	dev_dbg(host->dev, "reset\n");
	sdricoh_writel(host, R2F0_RESET, 0x10001);
	sdricoh_writel(host, R2E0_INIT, 0x10000);
	if (sdricoh_readl(host, R2E0_INIT) != 0x10000)
		return -EIO;
	sdricoh_writel(host, R2E0_INIT, 0x10007);

	sdricoh_writel(host, R224_MODE, 0x2000000);
	sdricoh_writel(host, R228_POWER, 0xe0);


	/* status register ? */
	sdricoh_writel(host, R21C_STATUS, 0x18);

	return 0;
}

static int sdricoh_blockio(struct sdricoh_host *host, int read,
				u8 *buf, int len)
{
	int size;
	u32 data = 0;
	/* wait until the data is available */
	if (read) {
		if (sdricoh_query_status(host, STATUS_READY_TO_READ,
						TRANSFER_TIMEOUT))
			return -ETIMEDOUT;
		sdricoh_writel(host, R21C_STATUS, 0x18);
		/* read data */
		while (len) {
			data = sdricoh_readl(host, R230_DATA);
			size = min(len, 4);
			len -= size;
			while (size) {
				*buf = data & 0xFF;
				buf++;
				data >>= 8;
				size--;
			}
		}
	} else {
		if (sdricoh_query_status(host, STATUS_READY_TO_WRITE,
						TRANSFER_TIMEOUT))
			return -ETIMEDOUT;
		sdricoh_writel(host, R21C_STATUS, 0x18);
		/* write data */
		while (len) {
			size = min(len, 4);
			len -= size;
			while (size) {
				data >>= 8;
				data |= (u32)*buf << 24;
				buf++;
				size--;
			}
			sdricoh_writel(host, R230_DATA, data);
		}
	}

	if (len)
		return -EIO;

	return 0;
}

static void sdricoh_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
	struct sdricoh_host *host = mmc_priv(mmc);
	struct mmc_command *cmd = mrq->cmd;
	struct mmc_data *data = cmd->data;
	struct device *dev = host->dev;
	unsigned char opcode = cmd->opcode;
	int i;

	dev_dbg(dev, "=============================\n");
	dev_dbg(dev, "sdricoh_request opcode=%i\n", opcode);

	sdricoh_writel(host, R21C_STATUS, 0x18);

	/* MMC_APP_CMDs need some special handling */
	if (host->app_cmd) {
		opcode |= 64;
		host->app_cmd = 0;
	} else if (opcode == 55)
		host->app_cmd = 1;

	/* read/write commands seem to require this */
	if (data) {
		sdricoh_writew(host, R226_BLOCKSIZE, data->blksz);
		sdricoh_writel(host, R208_DATAIO, 0);
	}

	cmd->error = sdricoh_mmc_cmd(host, opcode, cmd->arg);

	/* read response buffer */
	if (cmd->flags & MMC_RSP_PRESENT) {
		if (cmd->flags & MMC_RSP_136) {
			/* CRC is stripped so we need to do some shifting. */
			for (i = 0; i < 4; i++) {
				cmd->resp[i] =
				    sdricoh_readl(host,
						  R20C_RESP + (3 - i) * 4) << 8;
				if (i != 3)
					cmd->resp[i] |=
					    sdricoh_readb(host, R20C_RESP +
							  (3 - i) * 4 - 1);
			}
		} else
			cmd->resp[0] = sdricoh_readl(host, R20C_RESP);
	}

	/* transfer data */
	if (data && cmd->error == 0) {
		dev_dbg(dev, "transfer: blksz %i blocks %i sg_len %i "
			"sg length %i\n", data->blksz, data->blocks,
			data->sg_len, data->sg->length);

		/* enter data reading mode */
		sdricoh_writel(host, R21C_STATUS, 0x837f031e);
		for (i = 0; i < data->blocks; i++) {
			size_t len = data->blksz;
			u8 *buf;
			struct page *page;
			int result;
			page = sg_page(data->sg);

			buf = kmap(page) + data->sg->offset + (len * i);
			result =
				sdricoh_blockio(host,
					data->flags & MMC_DATA_READ, buf, len);
			kunmap(page);
			flush_dcache_page(page);
			if (result) {
				dev_err(dev, "sdricoh_request: cmd %i "
					"block transfer failed\n", cmd->opcode);
				cmd->error = result;
				break;
			} else
				data->bytes_xfered += len;
		}

		sdricoh_writel(host, R208_DATAIO, 1);

		if (sdricoh_query_status(host, STATUS_TRANSFER_FINISHED,
					TRANSFER_TIMEOUT)) {
			dev_err(dev, "sdricoh_request: transfer end error\n");
			cmd->error = -EINVAL;
		}
	}
	/* FIXME check busy flag */

	mmc_request_done(mmc, mrq);
	dev_dbg(dev, "=============================\n");
}

static void sdricoh_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
	struct sdricoh_host *host = mmc_priv(mmc);
	dev_dbg(host->dev, "set_ios\n");

	if (ios->power_mode == MMC_POWER_ON) {
		sdricoh_writel(host, R228_POWER, 0xc0e0);

		if (ios->bus_width == MMC_BUS_WIDTH_4) {
			sdricoh_writel(host, R224_MODE, 0x2000300);
			sdricoh_writel(host, R228_POWER, 0x40e0);
		} else {
			sdricoh_writel(host, R224_MODE, 0x2000340);
		}

	} else if (ios->power_mode == MMC_POWER_UP) {
		sdricoh_writel(host, R224_MODE, 0x2000320);
		sdricoh_writel(host, R228_POWER, 0xe0);
	}
}

static int sdricoh_get_ro(struct mmc_host *mmc)
{
	struct sdricoh_host *host = mmc_priv(mmc);
	unsigned int status;

	status = sdricoh_readl(host, R21C_STATUS);
	sdricoh_writel(host, R2E4_STATUS_RESP, status);

	/* some notebooks seem to have the locked flag switched */
	if (switchlocked)
		return !(status & STATUS_CARD_LOCKED);

	return (status & STATUS_CARD_LOCKED);
}

static struct mmc_host_ops sdricoh_ops = {
	.request = sdricoh_request,
	.set_ios = sdricoh_set_ios,
	.get_ro = sdricoh_get_ro,
};

/* initialize the control and register it to the mmc framework */
static int sdricoh_init_mmc(struct pci_dev *pci_dev,
			    struct pcmcia_device *pcmcia_dev)
{
	int result = 0;
	void __iomem *iobase = NULL;
	struct mmc_host *mmc = NULL;
	struct sdricoh_host *host = NULL;
	struct device *dev = &pcmcia_dev->dev;
	/* map iomem */
	if (pci_resource_len(pci_dev, SDRICOH_PCI_REGION) !=
	    SDRICOH_PCI_REGION_SIZE) {
		dev_dbg(dev, "unexpected pci resource len\n");
		return -ENODEV;
	}
	iobase =
	    pci_iomap(pci_dev, SDRICOH_PCI_REGION, SDRICOH_PCI_REGION_SIZE);
	if (!iobase) {
		dev_err(dev, "unable to map iobase\n");
		return -ENODEV;
	}
	/* check version? */
	if (readl(iobase + R104_VERSION) != 0x4000) {
		dev_dbg(dev, "no supported mmc controller found\n");
		result = -ENODEV;
		goto err;
	}
	/* allocate privdata */
	mmc = pcmcia_dev->priv =
	    mmc_alloc_host(sizeof(struct sdricoh_host), &pcmcia_dev->dev);
	if (!mmc) {
		dev_err(dev, "mmc_alloc_host failed\n");
		result = -ENOMEM;
		goto err;
	}
	host = mmc_priv(mmc);

	host->iobase = iobase;
	host->dev = dev;
	host->pci_dev = pci_dev;

	mmc->ops = &sdricoh_ops;

	/* FIXME: frequency and voltage handling is done by the controller
	 */
	mmc->f_min = 450000;
	mmc->f_max = 24000000;
	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
	mmc->caps |= MMC_CAP_4_BIT_DATA;

	mmc->max_seg_size = 1024 * 512;
	mmc->max_blk_size = 512;

	/* reset the controler */
	if (sdricoh_reset(host)) {
		dev_dbg(dev, "could not reset\n");
		result = -EIO;
		goto err;

	}

	result = mmc_add_host(mmc);

	if (!result) {
		dev_dbg(dev, "mmc host registered\n");
		return 0;
	}

err:
	if (iobase)
		iounmap(iobase);
	if (mmc)
		mmc_free_host(mmc);

	return result;
}

/* search for supported mmc controllers */
static int sdricoh_pcmcia_probe(struct pcmcia_device *pcmcia_dev)
{
	struct pci_dev *pci_dev = NULL;

	dev_info(&pcmcia_dev->dev, "Searching MMC controller for pcmcia device"
		" %s %s ...\n", pcmcia_dev->prod_id[0], pcmcia_dev->prod_id[1]);

	/* search pci cardbus bridge that contains the mmc controler */
	/* the io region is already claimed by yenta_socket... */
	while ((pci_dev =
		pci_get_device(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_RL5C476,
			       pci_dev))) {
		/* try to init the device */
		if (!sdricoh_init_mmc(pci_dev, pcmcia_dev)) {
			dev_info(&pcmcia_dev->dev, "MMC controller found\n");
			return 0;
		}

	}
	dev_err(&pcmcia_dev->dev, "No MMC controller was found.\n");
	return -ENODEV;
}

static void sdricoh_pcmcia_detach(struct pcmcia_device *link)
{
	struct mmc_host *mmc = link->priv;

	dev_dbg(&link->dev, "detach\n");

	/* remove mmc host */
	if (mmc) {
		struct sdricoh_host *host = mmc_priv(mmc);
		mmc_remove_host(mmc);
		pci_iounmap(host->pci_dev, host->iobase);
		pci_dev_put(host->pci_dev);
		mmc_free_host(mmc);
	}
	pcmcia_disable_device(link);

}

#ifdef CONFIG_PM
static int sdricoh_pcmcia_suspend(struct pcmcia_device *link)
{
	struct mmc_host *mmc = link->priv;
	dev_dbg(&link->dev, "suspend\n");
	mmc_suspend_host(mmc, PMSG_SUSPEND);
	return 0;
}

static int sdricoh_pcmcia_resume(struct pcmcia_device *link)
{
	struct mmc_host *mmc = link->priv;
	dev_dbg(&link->dev, "resume\n");
	sdricoh_reset(mmc_priv(mmc));
	mmc_resume_host(mmc);
	return 0;
}
#else
#define sdricoh_pcmcia_suspend NULL
#define sdricoh_pcmcia_resume NULL
#endif

static struct pcmcia_driver sdricoh_driver = {
	.drv = {
		.name = DRIVER_NAME,
		},
	.probe = sdricoh_pcmcia_probe,
	.remove = sdricoh_pcmcia_detach,
	.id_table = pcmcia_ids,
	.suspend = sdricoh_pcmcia_suspend,
	.resume = sdricoh_pcmcia_resume,
};

/*****************************************************************************\
 *                                                                           *
 * Driver init/exit                                                          *
 *                                                                           *
\*****************************************************************************/

static int __init sdricoh_drv_init(void)
{
	return pcmcia_register_driver(&sdricoh_driver);
}

static void __exit sdricoh_drv_exit(void)
{
	pcmcia_unregister_driver(&sdricoh_driver);
}

module_init(sdricoh_drv_init);
module_exit(sdricoh_drv_exit);

module_param(switchlocked, uint, 0444);

MODULE_AUTHOR("Sascha Sommer <saschasommer@freenet.de>");
MODULE_DESCRIPTION("Ricoh PCMCIA Secure Digital Interface driver");
MODULE_LICENSE("GPL");

MODULE_PARM_DESC(switchlocked, "Switch the cards locked status."
		"Use this when unlocked cards are shown readonly (default 0)");
/*
 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 * All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_quota.h"
#include "xfs_fsops.h"

STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
STATIC int	xfs_uuid_mount(xfs_mount_t *);
STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
STATIC void	xfs_unmountfs_wait(xfs_mount_t *);


#ifdef HAVE_PERCPU_SB
STATIC void	xfs_icsb_destroy_counters(xfs_mount_t *);
STATIC void	xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
						int, int);
STATIC void	xfs_icsb_sync_counters(xfs_mount_t *);
STATIC int	xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
						int64_t, int);
STATIC int	xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);

#else

#define xfs_icsb_destroy_counters(mp)			do { } while (0)
#define xfs_icsb_balance_counter(mp, a, b, c)		do { } while (0)
#define xfs_icsb_sync_counters(mp)			do { } while (0)
#define xfs_icsb_modify_counters(mp, a, b, c)		do { } while (0)

#endif

static const struct {
	short offset;
	short type;	/* 0 = integer
			 * 1 = binary / string (no translation)
			 */
} xfs_sb_info[] = {
    { offsetof(xfs_sb_t, sb_magicnum),   0 },
    { offsetof(xfs_sb_t, sb_blocksize),  0 },
    { offsetof(xfs_sb_t, sb_dblocks),    0 },
    { offsetof(xfs_sb_t, sb_rblocks),    0 },
    { offsetof(xfs_sb_t, sb_rextents),   0 },
    { offsetof(xfs_sb_t, sb_uuid),       1 },
    { offsetof(xfs_sb_t, sb_logstart),   0 },
    { offsetof(xfs_sb_t, sb_rootino),    0 },
    { offsetof(xfs_sb_t, sb_rbmino),     0 },
    { offsetof(xfs_sb_t, sb_rsumino),    0 },
    { offsetof(xfs_sb_t, sb_rextsize),   0 },
    { offsetof(xfs_sb_t, sb_agblocks),   0 },
    { offsetof(xfs_sb_t, sb_agcount),    0 },
    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
    { offsetof(xfs_sb_t, sb_logblocks),  0 },
    { offsetof(xfs_sb_t, sb_versionnum), 0 },
    { offsetof(xfs_sb_t, sb_sectsize),   0 },
    { offsetof(xfs_sb_t, sb_inodesize),  0 },
    { offsetof(xfs_sb_t, sb_inopblock),  0 },
    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
    { offsetof(xfs_sb_t, sb_blocklog),   0 },
    { offsetof(xfs_sb_t, sb_sectlog),    0 },
    { offsetof(xfs_sb_t, sb_inodelog),   0 },
    { offsetof(xfs_sb_t, sb_inopblog),   0 },
    { offsetof(xfs_sb_t, sb_agblklog),   0 },
    { offsetof(xfs_sb_t, sb_rextslog),   0 },
    { offsetof(xfs_sb_t, sb_inprogress), 0 },
    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
    { offsetof(xfs_sb_t, sb_icount),     0 },
    { offsetof(xfs_sb_t, sb_ifree),      0 },
    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
    { offsetof(xfs_sb_t, sb_frextents),  0 },
    { offsetof(xfs_sb_t, sb_uquotino),   0 },
    { offsetof(xfs_sb_t, sb_gquotino),   0 },
    { offsetof(xfs_sb_t, sb_qflags),     0 },
    { offsetof(xfs_sb_t, sb_flags),      0 },
    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
    { offsetof(xfs_sb_t, sb_unit),	 0 },
    { offsetof(xfs_sb_t, sb_width),	 0 },
    { offsetof(xfs_sb_t, sb_dirblklog),	 0 },
    { offsetof(xfs_sb_t, sb_logsectlog), 0 },
    { offsetof(xfs_sb_t, sb_logsectsize),0 },
    { offsetof(xfs_sb_t, sb_logsunit),	 0 },
    { offsetof(xfs_sb_t, sb_features2),	 0 },
    { sizeof(xfs_sb_t),			 0 }
};

/*
 * Return a pointer to an initialized xfs_mount structure.
 */
xfs_mount_t *
xfs_mount_init(void)
{
	xfs_mount_t *mp;

	mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);

	if (xfs_icsb_init_counters(mp)) {
		mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
	}

	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
	spinlock_init(&mp->m_sb_lock, "xfs_sb");
	mutex_init(&mp->m_ilock);
	mutex_init(&mp->m_growlock);
	/*
	 * Initialize the AIL.
	 */
	xfs_trans_ail_init(mp);

	atomic_set(&mp->m_active_trans, 0);

	return mp;
}

/*
 * Free up the resources associated with a mount structure.  Assume that
 * the structure was initially zeroed, so we can tell which fields got
 * initialized.
 */
void
xfs_mount_free(
	xfs_mount_t	*mp)
{
	if (mp->m_perag) {
		int	agno;

		for (agno = 0; agno < mp->m_maxagi; agno++)
			if (mp->m_perag[agno].pagb_list)
				kmem_free(mp->m_perag[agno].pagb_list,
						sizeof(xfs_perag_busy_t) *
							XFS_PAGB_NUM_SLOTS);
		kmem_free(mp->m_perag,
			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
	}

	AIL_LOCK_DESTROY(&mp->m_ail_lock);
	spinlock_destroy(&mp->m_sb_lock);
	mutex_destroy(&mp->m_ilock);
	mutex_destroy(&mp->m_growlock);
	if (mp->m_quotainfo)
		XFS_QM_DONE(mp);

	if (mp->m_fsname != NULL)
		kmem_free(mp->m_fsname, mp->m_fsname_len);
	if (mp->m_rtname != NULL)
		kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1);
	if (mp->m_logname != NULL)
		kmem_free(mp->m_logname, strlen(mp->m_logname) + 1);

	xfs_icsb_destroy_counters(mp);
}

/*
 * Check size of device based on the (data/realtime) block count.
 * Note: this check is used by the growfs code as well as mount.
 */
int
xfs_sb_validate_fsb_count(
	xfs_sb_t	*sbp,
	__uint64_t	nblocks)
{
	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
	ASSERT(sbp->sb_blocklog >= BBSHIFT);

#if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
	if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
		return E2BIG;
#else                  /* Limited by UINT_MAX of sectors */
	if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
		return E2BIG;
#endif
	return 0;
}

/*
 * Check the validity of the SB found.
 */
STATIC int
xfs_mount_validate_sb(
	xfs_mount_t	*mp,
	xfs_sb_t	*sbp,
	int		flags)
{
	/*
	 * If the log device and data device have the
	 * same device number, the log is internal.
	 * Consequently, the sb_logstart should be non-zero.  If
	 * we have a zero sb_logstart in this case, we may be trying to mount
	 * a volume filesystem in a non-volume manner.
	 */
	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
		xfs_fs_mount_cmn_err(flags, "bad magic number");
		return XFS_ERROR(EWRONGFS);
	}

	if (!XFS_SB_GOOD_VERSION(sbp)) {
		xfs_fs_mount_cmn_err(flags, "bad version");
		return XFS_ERROR(EWRONGFS);
	}

	if (unlikely(
	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
		xfs_fs_mount_cmn_err(flags,
			"filesystem is marked as having an external log; "
			"specify logdev on the\nmount command line.");
		return XFS_ERROR(EINVAL);
	}

	if (unlikely(
	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
		xfs_fs_mount_cmn_err(flags,
			"filesystem is marked as having an internal log; "
			"do not specify logdev on\nthe mount command line.");
		return XFS_ERROR(EINVAL);
	}

	/*
	 * More sanity checking. These were stolen directly from
	 * xfs_repair.
	 */
	if (unlikely(
	    sbp->sb_agcount <= 0					||
	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
		xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed");
		return XFS_ERROR(EFSCORRUPTED);
	}

	/*
	 * Sanity check AG count, size fields against data size field
	 */
	if (unlikely(
	    sbp->sb_dblocks == 0 ||
	    sbp->sb_dblocks >
	     (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
	    sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
			      sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
		xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed");
		return XFS_ERROR(EFSCORRUPTED);
	}

	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
		xfs_fs_mount_cmn_err(flags,
			"file system too large to be mounted on this system.");
		return XFS_ERROR(E2BIG);
	}

	if (unlikely(sbp->sb_inprogress)) {
		xfs_fs_mount_cmn_err(flags, "file system busy");
		return XFS_ERROR(EFSCORRUPTED);
	}

	/*
	 * Version 1 directory format has never worked on Linux.
	 */
	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
		xfs_fs_mount_cmn_err(flags,
			"file system using version 1 directory format");
		return XFS_ERROR(ENOSYS);
	}

	/*
	 * Until this is fixed only page-sized or smaller data blocks work.
	 */
	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
		xfs_fs_mount_cmn_err(flags,
			"file system with blocksize %d bytes",
			sbp->sb_blocksize);
		xfs_fs_mount_cmn_err(flags,
			"only pagesize (%ld) or less will currently work.",
			PAGE_SIZE);
		return XFS_ERROR(ENOSYS);
	}

	return 0;
}

STATIC void
xfs_initialize_perag_icache(
	xfs_perag_t	*pag)
{
	if (!pag->pag_ici_init) {
		rwlock_init(&pag->pag_ici_lock);
		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
		pag->pag_ici_init = 1;
	}
}

xfs_agnumber_t
xfs_initialize_perag(
	xfs_mount_t	*mp,
	xfs_agnumber_t	agcount)
{
	xfs_agnumber_t	index, max_metadata;
	xfs_perag_t	*pag;
	xfs_agino_t	agino;
	xfs_ino_t	ino;
	xfs_sb_t	*sbp = &mp->m_sb;
	xfs_ino_t	max_inum = XFS_MAXINUMBER_32;

	/* Check to see if the filesystem can overflow 32 bit inodes */
	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);

	/* Clear the mount flag if no inode can overflow 32 bits
	 * on this filesystem, or if specifically requested..
	 */
	if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) {
		mp->m_flags |= XFS_MOUNT_32BITINODES;
	} else {
		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
	}

	/* If we can overflow then setup the ag headers accordingly */
	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
		/* Calculate how much should be reserved for inodes to
		 * meet the max inode percentage.
		 */
		if (mp->m_maxicount) {
			__uint64_t	icount;

			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
			do_div(icount, 100);
			icount += sbp->sb_agblocks - 1;
			do_div(icount, sbp->sb_agblocks);
			max_metadata = icount;
		} else {
			max_metadata = agcount;
		}
		for (index = 0; index < agcount; index++) {
			ino = XFS_AGINO_TO_INO(mp, index, agino);
			if (ino > max_inum) {
				index++;
				break;
			}

			/* This ag is preferred for inodes */
			pag = &mp->m_perag[index];
			pag->pagi_inodeok = 1;
			if (index < max_metadata)
				pag->pagf_metadata = 1;
			xfs_initialize_perag_icache(pag);
		}
	} else {
		/* Setup default behavior for smaller filesystems */
		for (index = 0; index < agcount; index++) {
			pag = &mp->m_perag[index];
			pag->pagi_inodeok = 1;
			xfs_initialize_perag_icache(pag);
		}
	}
	return index;
}

void
xfs_sb_from_disk(
	xfs_sb_t	*to,
	xfs_dsb_t	*from)
{
	to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
	to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
	to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
	to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
	to->sb_rextents = be64_to_cpu(from->sb_rextents);
	memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
	to->sb_logstart = be64_to_cpu(from->sb_logstart);
	to->sb_rootino = be64_to_cpu(from->sb_rootino);
	to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
	to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
	to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
	to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
	to->sb_agcount = be32_to_cpu(from->sb_agcount);
	to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
	to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
	to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
	to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
	to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
	to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
	memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
	to->sb_blocklog = from->sb_blocklog;
	to->sb_sectlog = from->sb_sectlog;
	to->sb_inodelog = from->sb_inodelog;
	to->sb_inopblog = from->sb_inopblog;
	to->sb_agblklog = from->sb_agblklog;
	to->sb_rextslog = from->sb_rextslog;
	to->sb_inprogress = from->sb_inprogress;
	to->sb_imax_pct = from->sb_imax_pct;
	to->sb_icount = be64_to_cpu(from->sb_icount);
	to->sb_ifree = be64_to_cpu(from->sb_ifree);
	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
	to->sb_frextents = be64_to_cpu(from->sb_frextents);
	to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
	to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
	to->sb_qflags = be16_to_cpu(from->sb_qflags);
	to->sb_flags = from->sb_flags;
	to->sb_shared_vn = from->sb_shared_vn;
	to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
	to->sb_unit = be32_to_cpu(from->sb_unit);
	to->sb_width = be32_to_cpu(from->sb_width);
	to->sb_dirblklog = from->sb_dirblklog;
	to->sb_logsectlog = from->sb_logsectlog;
	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
	to->sb_features2 = be32_to_cpu(from->sb_features2);
}

/*
 * Copy in core superblock to ondisk one.
 *
 * The fields argument is mask of superblock fields to copy.
 */
void
xfs_sb_to_disk(
	xfs_dsb_t	*to,
	xfs_sb_t	*from,
	__int64_t	fields)
{
	xfs_caddr_t	to_ptr = (xfs_caddr_t)to;
	xfs_caddr_t	from_ptr = (xfs_caddr_t)from;
	xfs_sb_field_t	f;
	int		first;
	int		size;

	ASSERT(fields);
	if (!fields)
		return;

	while (fields) {
		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
		first = xfs_sb_info[f].offset;
		size = xfs_sb_info[f + 1].offset - first;

		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);

		if (size == 1 || xfs_sb_info[f].type == 1) {
			memcpy(to_ptr + first, from_ptr + first, size);
		} else {
			switch (size) {
			case 2:
				*(__be16 *)(to_ptr + first) =
					cpu_to_be16(*(__u16 *)(from_ptr + first));
				break;
			case 4:
				*(__be32 *)(to_ptr + first) =
					cpu_to_be32(*(__u32 *)(from_ptr + first));
				break;
			case 8:
				*(__be64 *)(to_ptr + first) =
					cpu_to_be64(*(__u64 *)(from_ptr + first));
				break;
			default:
				ASSERT(0);
			}
		}

		fields &= ~(1LL << f);
	}
}

/*
 * xfs_readsb
 *
 * Does the initial read of the superblock.
 */
int
xfs_readsb(xfs_mount_t *mp, int flags)
{
	unsigned int	sector_size;
	unsigned int	extra_flags;
	xfs_buf_t	*bp;
	int		error;

	ASSERT(mp->m_sb_bp == NULL);
	ASSERT(mp->m_ddev_targp != NULL);

	/*
	 * Allocate a (locked) buffer to hold the superblock.
	 * This will be kept around at all times to optimize
	 * access to the superblock.
	 */
	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
	extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;

	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
				BTOBB(sector_size), extra_flags);
	if (!bp || XFS_BUF_ISERROR(bp)) {
		xfs_fs_mount_cmn_err(flags, "SB read failed");
		error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
		goto fail;
	}
	ASSERT(XFS_BUF_ISBUSY(bp));
	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);

	/*
	 * Initialize the mount structure from the superblock.
	 * But first do some basic consistency checking.
	 */
	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));

	error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
	if (error) {
		xfs_fs_mount_cmn_err(flags, "SB validate failed");
		goto fail;
	}

	/*
	 * We must be able to do sector-sized and sector-aligned IO.
	 */
	if (sector_size > mp->m_sb.sb_sectsize) {
		xfs_fs_mount_cmn_err(flags,
			"device supports only %u byte sectors (not %u)",
			sector_size, mp->m_sb.sb_sectsize);
		error = ENOSYS;
		goto fail;
	}

	/*
	 * If device sector size is smaller than the superblock size,
	 * re-read the superblock so the buffer is correctly sized.
	 */
	if (sector_size < mp->m_sb.sb_sectsize) {
		XFS_BUF_UNMANAGE(bp);
		xfs_buf_relse(bp);
		sector_size = mp->m_sb.sb_sectsize;
		bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
					BTOBB(sector_size), extra_flags);
		if (!bp || XFS_BUF_ISERROR(bp)) {
			xfs_fs_mount_cmn_err(flags, "SB re-read failed");
			error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
			goto fail;
		}
		ASSERT(XFS_BUF_ISBUSY(bp));
		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
	}

	/* Initialize per-cpu counters */
	xfs_icsb_reinit_counters(mp);

	mp->m_sb_bp = bp;
	xfs_buf_relse(bp);
	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
	return 0;

 fail:
	if (bp) {
		XFS_BUF_UNMANAGE(bp);
		xfs_buf_relse(bp);
	}
	return error;
}


/*
 * xfs_mount_common
 *
 * Mount initialization code establishing various mount
 * fields from the superblock associated with the given
 * mount structure
 */
STATIC void
xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
{
	int	i;

	mp->m_agfrotor = mp->m_agirotor = 0;
	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
	mp->m_maxagi = mp->m_sb.sb_agcount;
	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
	mp->m_litino = sbp->sb_inodesize -
		((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
	mp->m_blockmask = sbp->sb_blocksize - 1;
	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
	mp->m_blockwmask = mp->m_blockwsize - 1;
	INIT_LIST_HEAD(&mp->m_del_inodes);

	/*
	 * Setup for attributes, in case they get created.
	 * This value is for inodes getting attributes for the first time,
	 * the per-inode value is for old attribute values.
	 */
	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
	switch (sbp->sb_inodesize) {
	case 256:
		mp->m_attroffset = XFS_LITINO(mp) -
				   XFS_BMDR_SPACE_CALC(MINABTPTRS);
		break;
	case 512:
	case 1024:
	case 2048:
		mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
		break;
	default:
		ASSERT(0);
	}
	ASSERT(mp->m_attroffset < XFS_LITINO(mp));

	for (i = 0; i < 2; i++) {
		mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
			xfs_alloc, i == 0);
		mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
			xfs_alloc, i == 0);
	}
	for (i = 0; i < 2; i++) {
		mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
			xfs_bmbt, i == 0);
		mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
			xfs_bmbt, i == 0);
	}
	for (i = 0; i < 2; i++) {
		mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
			xfs_inobt, i == 0);
		mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
			xfs_inobt, i == 0);
	}

	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
					sbp->sb_inopblock);
	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
}

/*
 * xfs_initialize_perag_data
 *
 * Read in each per-ag structure so we can count up the number of
 * allocated inodes, free inodes and used filesystem blocks as this
 * information is no longer persistent in the superblock. Once we have
 * this information, write it into the in-core superblock structure.
 */
STATIC int
xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
{
	xfs_agnumber_t	index;
	xfs_perag_t	*pag;
	xfs_sb_t	*sbp = &mp->m_sb;
	uint64_t	ifree = 0;
	uint64_t	ialloc = 0;
	uint64_t	bfree = 0;
	uint64_t	bfreelst = 0;
	uint64_t	btree = 0;
	int		error;
	int		s;

	for (index = 0; index < agcount; index++) {
		/*
		 * read the agf, then the agi. This gets us
		 * all the inforamtion we need and populates the
		 * per-ag structures for us.
		 */
		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
		if (error)
			return error;

		error = xfs_ialloc_pagi_init(mp, NULL, index);
		if (error)
			return error;
		pag = &mp->m_perag[index];
		ifree += pag->pagi_freecount;
		ialloc += pag->pagi_count;
		bfree += pag->pagf_freeblks;
		bfreelst += pag->pagf_flcount;
		btree += pag->pagf_btreeblks;
	}
	/*
	 * Overwrite incore superblock counters with just-read data
	 */
	s = XFS_SB_LOCK(mp);
	sbp->sb_ifree = ifree;
	sbp->sb_icount = ialloc;
	sbp->sb_fdblocks = bfree + bfreelst + btree;
	XFS_SB_UNLOCK(mp, s);

	/* Fixup the per-cpu counters as well. */
	xfs_icsb_reinit_counters(mp);

	return 0;
}

/*
 * xfs_mountfs
 *
 * This function does the following on an initial mount of a file system:
 *	- reads the superblock from disk and init the mount struct
 *	- if we're a 32-bit kernel, do a size check on the superblock
 *		so we don't mount terabyte filesystems
 *	- init mount struct realtime fields
 *	- allocate inode hash table for fs
 *	- init directory manager
 *	- perform recovery and init the log manager
 */
int
xfs_mountfs(
	xfs_mount_t	*mp,
	int		mfsi_flags)
{
	xfs_buf_t	*bp;
	xfs_sb_t	*sbp = &(mp->m_sb);
	xfs_inode_t	*rip;
	bhv_vnode_t	*rvp = NULL;
	int		readio_log, writeio_log;
	xfs_daddr_t	d;
	__uint64_t	resblks;
	__int64_t	update_flags;
	uint		quotamount, quotaflags;
	int		agno;
	int		uuid_mounted = 0;
	int		error = 0;

	if (mp->m_sb_bp == NULL) {
		if ((error = xfs_readsb(mp, mfsi_flags))) {
			return error;
		}
	}
	xfs_mount_common(mp, sbp);

	/*
	 * Check if sb_agblocks is aligned at stripe boundary
	 * If sb_agblocks is NOT aligned turn off m_dalign since
	 * allocator alignment is within an ag, therefore ag has
	 * to be aligned at stripe boundary.
	 */
	update_flags = 0LL;
	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
		/*
		 * If stripe unit and stripe width are not multiples
		 * of the fs blocksize turn off alignment.
		 */
		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
			if (mp->m_flags & XFS_MOUNT_RETERR) {
				cmn_err(CE_WARN,
					"XFS: alignment check 1 failed");
				error = XFS_ERROR(EINVAL);
				goto error1;
			}
			mp->m_dalign = mp->m_swidth = 0;
		} else {
			/*
			 * Convert the stripe unit and width to FSBs.
			 */
			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
				if (mp->m_flags & XFS_MOUNT_RETERR) {
					error = XFS_ERROR(EINVAL);
					goto error1;
				}
				xfs_fs_cmn_err(CE_WARN, mp,
"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
					mp->m_dalign, mp->m_swidth,
					sbp->sb_agblocks);

				mp->m_dalign = 0;
				mp->m_swidth = 0;
			} else if (mp->m_dalign) {
				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
			} else {
				if (mp->m_flags & XFS_MOUNT_RETERR) {
					xfs_fs_cmn_err(CE_WARN, mp,
"stripe alignment turned off: sunit(%d) less than bsize(%d)",
                                        	mp->m_dalign,
						mp->m_blockmask +1);
					error = XFS_ERROR(EINVAL);
					goto error1;
				}
				mp->m_swidth = 0;
			}
		}

		/*
		 * Update superblock with new values
		 * and log changes
		 */
		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
			if (sbp->sb_unit != mp->m_dalign) {
				sbp->sb_unit = mp->m_dalign;
				update_flags |= XFS_SB_UNIT;
			}
			if (sbp->sb_width != mp->m_swidth) {
				sbp->sb_width = mp->m_swidth;
				update_flags |= XFS_SB_WIDTH;
			}
		}
	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
		    XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
			mp->m_dalign = sbp->sb_unit;
			mp->m_swidth = sbp->sb_width;
	}

	xfs_alloc_compute_maxlevels(mp);
	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
	xfs_ialloc_compute_maxlevels(mp);

	if (sbp->sb_imax_pct) {
		__uint64_t	icount;

		/* Make sure the maximum inode count is a multiple of the
		 * units we allocate inodes in.
		 */

		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
		do_div(icount, 100);
		do_div(icount, mp->m_ialloc_blks);
		mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
				   sbp->sb_inopblog;
	} else
		mp->m_maxicount = 0;

	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);

	/*
	 * XFS uses the uuid from the superblock as the unique
	 * identifier for fsid.  We can not use the uuid from the volume
	 * since a single partition filesystem is identical to a single
	 * partition volume/filesystem.
	 */
	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
		if (xfs_uuid_mount(mp)) {
			error = XFS_ERROR(EINVAL);
			goto error1;
		}
		uuid_mounted=1;
	}

	/*
	 * Set the default minimum read and write sizes unless
	 * already specified in a mount option.
	 * We use smaller I/O sizes when the file system
	 * is being used for NFS service (wsync mount option).
	 */
	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
		if (mp->m_flags & XFS_MOUNT_WSYNC) {
			readio_log = XFS_WSYNC_READIO_LOG;
			writeio_log = XFS_WSYNC_WRITEIO_LOG;
		} else {
			readio_log = XFS_READIO_LOG_LARGE;
			writeio_log = XFS_WRITEIO_LOG_LARGE;
		}
	} else {
		readio_log = mp->m_readio_log;
		writeio_log = mp->m_writeio_log;
	}

	if (sbp->sb_blocklog > readio_log) {
		mp->m_readio_log = sbp->sb_blocklog;
	} else {
		mp->m_readio_log = readio_log;
	}
	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
	if (sbp->sb_blocklog > writeio_log) {
		mp->m_writeio_log = sbp->sb_blocklog;
	} else {
		mp->m_writeio_log = writeio_log;
	}
	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);

	/*
	 * Set the inode cluster size.
	 * This may still be overridden by the file system
	 * block size if it is larger than the chosen cluster size.
	 */
	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;

	/*
	 * Set whether we're using inode alignment.
	 */
	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
	    mp->m_sb.sb_inoalignmt >=
	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
	else
		mp->m_inoalign_mask = 0;
	/*
	 * If we are using stripe alignment, check whether
	 * the stripe unit is a multiple of the inode alignment
	 */
	if (mp->m_dalign && mp->m_inoalign_mask &&
	    !(mp->m_dalign & mp->m_inoalign_mask))
		mp->m_sinoalign = mp->m_dalign;
	else
		mp->m_sinoalign = 0;
	/*
	 * Check that the data (and log if separate) are an ok size.
	 */
	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
		cmn_err(CE_WARN, "XFS: size check 1 failed");
		error = XFS_ERROR(E2BIG);
		goto error1;
	}
	error = xfs_read_buf(mp, mp->m_ddev_targp,
			     d - XFS_FSS_TO_BB(mp, 1),
			     XFS_FSS_TO_BB(mp, 1), 0, &bp);
	if (!error) {
		xfs_buf_relse(bp);
	} else {
		cmn_err(CE_WARN, "XFS: size check 2 failed");
		if (error == ENOSPC) {
			error = XFS_ERROR(E2BIG);
		}
		goto error1;
	}

	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
	    mp->m_logdev_targp != mp->m_ddev_targp) {
		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
			cmn_err(CE_WARN, "XFS: size check 3 failed");
			error = XFS_ERROR(E2BIG);
			goto error1;
		}
		error = xfs_read_buf(mp, mp->m_logdev_targp,
				     d - XFS_FSB_TO_BB(mp, 1),
				     XFS_FSB_TO_BB(mp, 1), 0, &bp);
		if (!error) {
			xfs_buf_relse(bp);
		} else {
			cmn_err(CE_WARN, "XFS: size check 3 failed");
			if (error == ENOSPC) {
				error = XFS_ERROR(E2BIG);
			}
			goto error1;
		}
	}

	/*
	 * Initialize realtime fields in the mount structure
	 */
	if ((error = xfs_rtmount_init(mp))) {
		cmn_err(CE_WARN, "XFS: RT mount failed");
		goto error1;
	}

	/*
	 * For client case we are done now
	 */
	if (mfsi_flags & XFS_MFSI_CLIENT) {
		return 0;
	}

	/*
	 *  Copies the low order bits of the timestamp and the randomly
	 *  set "sequence" number out of a UUID.
	 */
	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);

	mp->m_dmevmask = 0;	/* not persistent; set after each mount */

	xfs_dir_mount(mp);

	/*
	 * Initialize the attribute manager's entries.
	 */
	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;

	/*
	 * Initialize the precomputed transaction reservations values.
	 */
	xfs_trans_init(mp);

	/*
	 * Allocate and initialize the per-ag data.
	 */
	init_rwsem(&mp->m_peraglock);
	mp->m_perag =
		kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);

	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);

	/*
	 * log's mount-time initialization. Perform 1st part recovery if needed
	 */
	if (likely(sbp->sb_logblocks > 0)) {	/* check for volume case */
		error = xfs_log_mount(mp, mp->m_logdev_targp,
				      XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
				      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
		if (error) {
			cmn_err(CE_WARN, "XFS: log mount failed");
			goto error2;
		}
	} else {	/* No log has been defined */
		cmn_err(CE_WARN, "XFS: no log defined");
		XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
		error = XFS_ERROR(EFSCORRUPTED);
		goto error2;
	}

	/*
	 * Now the log is mounted, we know if it was an unclean shutdown or
	 * not. If it was, with the first phase of recovery has completed, we
	 * have consistent AG blocks on disk. We have not recovered EFIs yet,
	 * but they are recovered transactionally in the second recovery phase
	 * later.
	 *
	 * Hence we can safely re-initialise incore superblock counters from
	 * the per-ag data. These may not be correct if the filesystem was not
	 * cleanly unmounted, so we need to wait for recovery to finish before
	 * doing this.
	 *
	 * If the filesystem was cleanly unmounted, then we can trust the
	 * values in the superblock to be correct and we don't need to do
	 * anything here.
	 *
	 * If we are currently making the filesystem, the initialisation will
	 * fail as the perag data is in an undefined state.
	 */

	if (xfs_sb_version_haslazysbcount(&mp->m_sb) &&
	    !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) &&
	     !mp->m_sb.sb_inprogress) {
		error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
		if (error) {
			goto error2;
		}
	}
	/*
	 * Get and sanity-check the root inode.
	 * Save the pointer to it in the mount structure.
	 */
	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
	if (error) {
		cmn_err(CE_WARN, "XFS: failed to read root inode");
		goto error3;
	}

	ASSERT(rip != NULL);
	rvp = XFS_ITOV(rip);

	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
		cmn_err(CE_WARN, "XFS: corrupted root inode");
		cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
			XFS_BUFTARG_NAME(mp->m_ddev_targp),
			(unsigned long long)rip->i_ino);
		xfs_iunlock(rip, XFS_ILOCK_EXCL);
		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
				 mp);
		error = XFS_ERROR(EFSCORRUPTED);
		goto error4;
	}
	mp->m_rootip = rip;	/* save it */

	xfs_iunlock(rip, XFS_ILOCK_EXCL);

	/*
	 * Initialize realtime inode pointers in the mount structure
	 */
	if ((error = xfs_rtmount_inodes(mp))) {
		/*
		 * Free up the root inode.
		 */
		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
		goto error4;
	}

	/*
	 * If fs is not mounted readonly, then update the superblock
	 * unit and width changes.
	 */
	if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY))
		xfs_mount_log_sbunit(mp, update_flags);

	/*
	 * Initialise the XFS quota management subsystem for this mount
	 */
	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
		goto error4;

	/*
	 * Finish recovering the file system.  This part needed to be
	 * delayed until after the root and real-time bitmap inodes
	 * were consistently read in.
	 */
	error = xfs_log_mount_finish(mp, mfsi_flags);
	if (error) {
		cmn_err(CE_WARN, "XFS: log mount finish failed");
		goto error4;
	}

	/*
	 * Complete the quota initialisation, post-log-replay component.
	 */
	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
		goto error4;

	/*
	 * Now we are mounted, reserve a small amount of unused space for
	 * privileged transactions. This is needed so that transaction
	 * space required for critical operations can dip into this pool
	 * when at ENOSPC. This is needed for operations like create with
	 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
	 * are not allowed to use this reserved space.
	 *
	 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
	 * This may drive us straight to ENOSPC on mount, but that implies
	 * we were already there on the last unmount.
	 */
	resblks = mp->m_sb.sb_dblocks;
	do_div(resblks, 20);
	resblks = min_t(__uint64_t, resblks, 1024);
	xfs_reserve_blocks(mp, &resblks, NULL);

	return 0;

 error4:
	/*
	 * Free up the root inode.
	 */
	VN_RELE(rvp);
 error3:
	xfs_log_unmount_dealloc(mp);
 error2:
	for (agno = 0; agno < sbp->sb_agcount; agno++)
		if (mp->m_perag[agno].pagb_list)
			kmem_free(mp->m_perag[agno].pagb_list,
			  sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
	mp->m_perag = NULL;
	/* FALLTHROUGH */
 error1:
	if (uuid_mounted)
		xfs_uuid_unmount(mp);
	xfs_freesb(mp);
	return error;
}

/*
 * xfs_unmountfs
 *
 * This flushes out the inodes,dquots and the superblock, unmounts the
 * log and makes sure that incore structures are freed.
 */
int
xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
{
	__uint64_t	resblks;

	/*
	 * We can potentially deadlock here if we have an inode cluster
	 * that has been freed has it's buffer still pinned in memory because
	 * the transaction is still sitting in a iclog. The stale inodes
	 * on that buffer will have their flush locks held until the
	 * transaction hits the disk and the callbacks run. the inode
	 * flush takes the flush lock unconditionally and with nothing to
	 * push out the iclog we will never get that unlocked. hence we
	 * need to force the log first.
	 */
	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
	xfs_iflush_all(mp);

	XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);

	/*
	 * Flush out the log synchronously so that we know for sure
	 * that nothing is pinned.  This is important because bflush()
	 * will skip pinned buffers.
	 */
	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);

	xfs_binval(mp->m_ddev_targp);
	if (mp->m_rtdev_targp) {
		xfs_binval(mp->m_rtdev_targp);
	}

	/*
	 * Unreserve any blocks we have so that when we unmount we don't account
	 * the reserved free space as used. This is really only necessary for
	 * lazy superblock counting because it trusts the incore superblock
	 * counters to be aboslutely correct on clean unmount.
	 *
	 * We don't bother correcting this elsewhere for lazy superblock
	 * counting because on mount of an unclean filesystem we reconstruct the
	 * correct counter value and this is irrelevant.
	 *
	 * For non-lazy counter filesystems, this doesn't matter at all because
	 * we only every apply deltas to the superblock and hence the incore
	 * value does not matter....
	 */
	resblks = 0;
	xfs_reserve_blocks(mp, &resblks, NULL);

	xfs_log_sbcount(mp, 1);
	xfs_unmountfs_writesb(mp);
	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
	xfs_log_unmount(mp);			/* Done! No more fs ops. */

	xfs_freesb(mp);

	/*
	 * All inodes from this mount point should be freed.
	 */
	ASSERT(mp->m_inodes == NULL);

	xfs_unmountfs_close(mp, cr);
	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
		xfs_uuid_unmount(mp);

#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
	xfs_errortag_clearall(mp, 0);
#endif
	XFS_IODONE(mp);
	xfs_mount_free(mp);
	return 0;
}

void
xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
{
	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
		xfs_free_buftarg(mp->m_logdev_targp, 1);
	if (mp->m_rtdev_targp)
		xfs_free_buftarg(mp->m_rtdev_targp, 1);
	xfs_free_buftarg(mp->m_ddev_targp, 0);
}

STATIC void
xfs_unmountfs_wait(xfs_mount_t *mp)
{
	if (mp->m_logdev_targp != mp->m_ddev_targp)
		xfs_wait_buftarg(mp->m_logdev_targp);
	if (mp->m_rtdev_targp)
		xfs_wait_buftarg(mp->m_rtdev_targp);
	xfs_wait_buftarg(mp->m_ddev_targp);
}

int
xfs_fs_writable(xfs_mount_t *mp)
{
	return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) ||
		(mp->m_flags & XFS_MOUNT_RDONLY));
}

/*
 * xfs_log_sbcount
 *
 * Called either periodically to keep the on disk superblock values
 * roughly up to date or from unmount to make sure the values are
 * correct on a clean unmount.
 *
 * Note this code can be called during the process of freezing, so
 * we may need to use the transaction allocator which does not not
 * block when the transaction subsystem is in its frozen state.
 */
int
xfs_log_sbcount(
	xfs_mount_t	*mp,
	uint		sync)
{
	xfs_trans_t	*tp;
	int		error;

	if (!xfs_fs_writable(mp))
		return 0;

	xfs_icsb_sync_counters(mp);

	/*
	 * we don't need to do this if we are updating the superblock
	 * counters on every modification.
	 */
	if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
		return 0;

	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT);
	error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
					XFS_DEFAULT_LOG_COUNT);
	if (error) {
		xfs_trans_cancel(tp, 0);
		return error;
	}

	xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
	if (sync)
		xfs_trans_set_sync(tp);
	xfs_trans_commit(tp, 0);

	return 0;
}

STATIC void
xfs_mark_shared_ro(
	xfs_mount_t	*mp,
	xfs_buf_t	*bp)
{
	xfs_dsb_t	*sb = XFS_BUF_TO_SBP(bp);
	__uint16_t	version;

	if (!(sb->sb_flags & XFS_SBF_READONLY))
		sb->sb_flags |= XFS_SBF_READONLY;

	version = be16_to_cpu(sb->sb_versionnum);
	if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 ||
	    !(version & XFS_SB_VERSION_SHAREDBIT))
		version |= XFS_SB_VERSION_SHAREDBIT;
	sb->sb_versionnum = cpu_to_be16(version);
}

int
xfs_unmountfs_writesb(xfs_mount_t *mp)
{
	xfs_buf_t	*sbp;
	int		error = 0;

	/*
	 * skip superblock write if fs is read-only, or
	 * if we are doing a forced umount.
	 */
	if (!((mp->m_flags & XFS_MOUNT_RDONLY) ||
		XFS_FORCED_SHUTDOWN(mp))) {

		sbp = xfs_getsb(mp, 0);

		/*
		 * mark shared-readonly if desired
		 */
		if (mp->m_mk_sharedro)
			xfs_mark_shared_ro(mp, sbp);

		XFS_BUF_UNDONE(sbp);
		XFS_BUF_UNREAD(sbp);
		XFS_BUF_UNDELAYWRITE(sbp);
		XFS_BUF_WRITE(sbp);
		XFS_BUF_UNASYNC(sbp);
		ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
		xfsbdstrat(mp, sbp);
		/* Nevermind errors we might get here. */
		error = xfs_iowait(sbp);
		if (error)
			xfs_ioerror_alert("xfs_unmountfs_writesb",
					  mp, sbp, XFS_BUF_ADDR(sbp));
		if (error && mp->m_mk_sharedro)
			xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
		xfs_buf_relse(sbp);
	}
	return error;
}

/*
 * xfs_mod_sb() can be used to copy arbitrary changes to the
 * in-core superblock into the superblock buffer to be logged.
 * It does not provide the higher level of locking that is
 * needed to protect the in-core superblock from concurrent
 * access.
 */
void
xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
{
	xfs_buf_t	*bp;
	int		first;
	int		last;
	xfs_mount_t	*mp;
	xfs_sb_field_t	f;

	ASSERT(fields);
	if (!fields)
		return;
	mp = tp->t_mountp;
	bp = xfs_trans_getsb(tp, mp, 0);
	first = sizeof(xfs_sb_t);
	last = 0;

	/* translate/copy */

	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);

	/* find modified range */

	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
	first = xfs_sb_info[f].offset;

	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
	last = xfs_sb_info[f + 1].offset - 1;

	xfs_trans_log_buf(tp, bp, first, last);
}


/*
 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
 * a delta to a specified field in the in-core superblock.  Simply
 * switch on the field indicated and apply the delta to that field.
 * Fields are not allowed to dip below zero, so if the delta would
 * do this do not apply it and return EINVAL.
 *
 * The SB_LOCK must be held when this routine is called.
 */
int
xfs_mod_incore_sb_unlocked(
	xfs_mount_t	*mp,
	xfs_sb_field_t	field,
	int64_t		delta,
	int		rsvd)
{
	int		scounter;	/* short counter for 32 bit fields */
	long long	lcounter;	/* long counter for 64 bit fields */
	long long	res_used, rem;

	/*
	 * With the in-core superblock spin lock held, switch
	 * on the indicated field.  Apply the delta to the
	 * proper field.  If the fields value would dip below
	 * 0, then do not apply the delta and return EINVAL.
	 */
	switch (field) {
	case XFS_SBS_ICOUNT:
		lcounter = (long long)mp->m_sb.sb_icount;
		lcounter += delta;
		if (lcounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_icount = lcounter;
		return 0;
	case XFS_SBS_IFREE:
		lcounter = (long long)mp->m_sb.sb_ifree;
		lcounter += delta;
		if (lcounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_ifree = lcounter;
		return 0;
	case XFS_SBS_FDBLOCKS:
		lcounter = (long long)
			mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
		res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);

		if (delta > 0) {		/* Putting blocks back */
			if (res_used > delta) {
				mp->m_resblks_avail += delta;
			} else {
				rem = delta - res_used;
				mp->m_resblks_avail = mp->m_resblks;
				lcounter += rem;
			}
		} else {				/* Taking blocks away */

			lcounter += delta;

		/*
		 * If were out of blocks, use any available reserved blocks if
		 * were allowed to.
		 */

			if (lcounter < 0) {
				if (rsvd) {
					lcounter = (long long)mp->m_resblks_avail + delta;
					if (lcounter < 0) {
						return XFS_ERROR(ENOSPC);
					}
					mp->m_resblks_avail = lcounter;
					return 0;
				} else {	/* not reserved */
					return XFS_ERROR(ENOSPC);
				}
			}
		}

		mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
		return 0;
	case XFS_SBS_FREXTENTS:
		lcounter = (long long)mp->m_sb.sb_frextents;
		lcounter += delta;
		if (lcounter < 0) {
			return XFS_ERROR(ENOSPC);
		}
		mp->m_sb.sb_frextents = lcounter;
		return 0;
	case XFS_SBS_DBLOCKS:
		lcounter = (long long)mp->m_sb.sb_dblocks;
		lcounter += delta;
		if (lcounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_dblocks = lcounter;
		return 0;
	case XFS_SBS_AGCOUNT:
		scounter = mp->m_sb.sb_agcount;
		scounter += delta;
		if (scounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_agcount = scounter;
		return 0;
	case XFS_SBS_IMAX_PCT:
		scounter = mp->m_sb.sb_imax_pct;
		scounter += delta;
		if (scounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_imax_pct = scounter;
		return 0;
	case XFS_SBS_REXTSIZE:
		scounter = mp->m_sb.sb_rextsize;
		scounter += delta;
		if (scounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_rextsize = scounter;
		return 0;
	case XFS_SBS_RBMBLOCKS:
		scounter = mp->m_sb.sb_rbmblocks;
		scounter += delta;
		if (scounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_rbmblocks = scounter;
		return 0;
	case XFS_SBS_RBLOCKS:
		lcounter = (long long)mp->m_sb.sb_rblocks;
		lcounter += delta;
		if (lcounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_rblocks = lcounter;
		return 0;
	case XFS_SBS_REXTENTS:
		lcounter = (long long)mp->m_sb.sb_rextents;
		lcounter += delta;
		if (lcounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_rextents = lcounter;
		return 0;
	case XFS_SBS_REXTSLOG:
		scounter = mp->m_sb.sb_rextslog;
		scounter += delta;
		if (scounter < 0) {
			ASSERT(0);
			return XFS_ERROR(EINVAL);
		}
		mp->m_sb.sb_rextslog = scounter;
		return 0;
	default:
		ASSERT(0);
		return XFS_ERROR(EINVAL);
	}
}

/*
 * xfs_mod_incore_sb() is used to change a field in the in-core
 * superblock structure by the specified delta.  This modification
 * is protected by the SB_LOCK.  Just use the xfs_mod_incore_sb_unlocked()
 * routine to do the work.
 */
int
xfs_mod_incore_sb(
	xfs_mount_t	*mp,
	xfs_sb_field_t	field,
	int64_t		delta,
	int		rsvd)
{
	unsigned long	s;
	int	status;

	/* check for per-cpu counters */
	switch (field) {
#ifdef HAVE_PERCPU_SB
	case XFS_SBS_ICOUNT:
	case XFS_SBS_IFREE:
	case XFS_SBS_FDBLOCKS:
		if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
			status = xfs_icsb_modify_counters(mp, field,
							delta, rsvd);
			break;
		}
		/* FALLTHROUGH */
#endif
	default:
		s = XFS_SB_LOCK(mp);
		status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
		XFS_SB_UNLOCK(mp, s);
		break;
	}

	return status;
}

/*
 * xfs_mod_incore_sb_batch() is used to change more than one field
 * in the in-core superblock structure at a time.  This modification
 * is protected by a lock internal to this module.  The fields and
 * changes to those fields are specified in the array of xfs_mod_sb
 * structures passed in.
 *
 * Either all of the specified deltas will be applied or none of
 * them will.  If any modified field dips below 0, then all modifications
 * will be backed out and EINVAL will be returned.
 */
int
xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
{
	unsigned long	s;
	int		status=0;
	xfs_mod_sb_t	*msbp;

	/*
	 * Loop through the array of mod structures and apply each
	 * individually.  If any fail, then back out all those
	 * which have already been applied.  Do all of this within
	 * the scope of the SB_LOCK so that all of the changes will
	 * be atomic.
	 */
	s = XFS_SB_LOCK(mp);
	msbp = &msb[0];
	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
		/*
		 * Apply the delta at index n.  If it fails, break
		 * from the loop so we'll fall into the undo loop
		 * below.
		 */
		switch (msbp->msb_field) {
#ifdef HAVE_PERCPU_SB
		case XFS_SBS_ICOUNT:
		case XFS_SBS_IFREE:
		case XFS_SBS_FDBLOCKS:
			if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
				XFS_SB_UNLOCK(mp, s);
				status = xfs_icsb_modify_counters(mp,
							msbp->msb_field,
							msbp->msb_delta, rsvd);
				s = XFS_SB_LOCK(mp);
				break;
			}
			/* FALLTHROUGH */
#endif
		default:
			status = xfs_mod_incore_sb_unlocked(mp,
						msbp->msb_field,
						msbp->msb_delta, rsvd);
			break;
		}

		if (status != 0) {
			break;
		}
	}

	/*
	 * If we didn't complete the loop above, then back out
	 * any changes made to the superblock.  If you add code
	 * between the loop above and here, make sure that you
	 * preserve the value of status. Loop back until
	 * we step below the beginning of the array.  Make sure
	 * we don't touch anything back there.
	 */
	if (status != 0) {
		msbp--;
		while (msbp >= msb) {
			switch (msbp->msb_field) {
#ifdef HAVE_PERCPU_SB
			case XFS_SBS_ICOUNT:
			case XFS_SBS_IFREE:
			case XFS_SBS_FDBLOCKS:
				if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
					XFS_SB_UNLOCK(mp, s);
					status = xfs_icsb_modify_counters(mp,
							msbp->msb_field,
							-(msbp->msb_delta),
							rsvd);
					s = XFS_SB_LOCK(mp);
					break;
				}
				/* FALLTHROUGH */
#endif
			default:
				status = xfs_mod_incore_sb_unlocked(mp,
							msbp->msb_field,
							-(msbp->msb_delta),
							rsvd);
				break;
			}
			ASSERT(status == 0);
			msbp--;
		}
	}
	XFS_SB_UNLOCK(mp, s);
	return status;
}

/*
 * xfs_getsb() is called to obtain the buffer for the superblock.
 * The buffer is returned locked and read in from disk.
 * The buffer should be released with a call to xfs_brelse().
 *
 * If the flags parameter is BUF_TRYLOCK, then we'll only return
 * the superblock buffer if it can be locked without sleeping.
 * If it can't then we'll return NULL.
 */
xfs_buf_t *
xfs_getsb(
	xfs_mount_t	*mp,
	int		flags)
{
	xfs_buf_t	*bp;

	ASSERT(mp->m_sb_bp != NULL);
	bp = mp->m_sb_bp;
	if (flags & XFS_BUF_TRYLOCK) {
		if (!XFS_BUF_CPSEMA(bp)) {
			return NULL;
		}
	} else {
		XFS_BUF_PSEMA(bp, PRIBIO);
	}
	XFS_BUF_HOLD(bp);
	ASSERT(XFS_BUF_ISDONE(bp));
	return bp;
}

/*
 * Used to free the superblock along various error paths.
 */
void
xfs_freesb(
	xfs_mount_t	*mp)
{
	xfs_buf_t	*bp;

	/*
	 * Use xfs_getsb() so that the buffer will be locked
	 * when we call xfs_buf_relse().
	 */
	bp = xfs_getsb(mp, 0);
	XFS_BUF_UNMANAGE(bp);
	xfs_buf_relse(bp);
	mp->m_sb_bp = NULL;
}

/*
 * See if the UUID is unique among mounted XFS filesystems.
 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
 */
STATIC int
xfs_uuid_mount(
	xfs_mount_t	*mp)
{
	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
		cmn_err(CE_WARN,
			"XFS: Filesystem %s has nil UUID - can't mount",
			mp->m_fsname);
		return -1;
	}
	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
		cmn_err(CE_WARN,
			"XFS: Filesystem %s has duplicate UUID - can't mount",
			mp->m_fsname);
		return -1;
	}
	return 0;
}

/*
 * Remove filesystem from the UUID table.
 */
STATIC void
xfs_uuid_unmount(
	xfs_mount_t	*mp)
{
	uuid_table_remove(&mp->m_sb.sb_uuid);
}

/*
 * Used to log changes to the superblock unit and width fields which could
 * be altered by the mount options. Only the first superblock is updated.
 */
STATIC void
xfs_mount_log_sbunit(
	xfs_mount_t	*mp,
	__int64_t	fields)
{
	xfs_trans_t	*tp;

	ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID));

	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
				XFS_DEFAULT_LOG_COUNT)) {
		xfs_trans_cancel(tp, 0);
		return;
	}
	xfs_mod_sb(tp, fields);
	xfs_trans_commit(tp, 0);
}


#ifdef HAVE_PERCPU_SB
/*
 * Per-cpu incore superblock counters
 *
 * Simple concept, difficult implementation
 *
 * Basically, replace the incore superblock counters with a distributed per cpu
 * counter for contended fields (e.g.  free block count).
 *
 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
 * hence needs to be accurately read when we are running low on space. Hence
 * there is a method to enable and disable the per-cpu counters based on how
 * much "stuff" is available in them.
 *
 * Basically, a counter is enabled if there is enough free resource to justify
 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
 * ENOSPC), then we disable the counters to synchronise all callers and
 * re-distribute the available resources.
 *
 * If, once we redistributed the available resources, we still get a failure,
 * we disable the per-cpu counter and go through the slow path.
 *
 * The slow path is the current xfs_mod_incore_sb() function.  This means that
 * when we disable a per-cpu counter, we need to drain it's resources back to
 * the global superblock. We do this after disabling the counter to prevent
 * more threads from queueing up on the counter.
 *
 * Essentially, this means that we still need a lock in the fast path to enable
 * synchronisation between the global counters and the per-cpu counters. This
 * is not a problem because the lock will be local to a CPU almost all the time
 * and have little contention except when we get to ENOSPC conditions.
 *
 * Basically, this lock becomes a barrier that enables us to lock out the fast
 * path while we do things like enabling and disabling counters and
 * synchronising the counters.
 *
 * Locking rules:
 *
 * 	1. XFS_SB_LOCK() before picking up per-cpu locks
 * 	2. per-cpu locks always picked up via for_each_online_cpu() order
 * 	3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
 * 	4. modifying per-cpu counters requires holding per-cpu lock
 * 	5. modifying global counters requires holding XFS_SB_LOCK
 *	6. enabling or disabling a counter requires holding the XFS_SB_LOCK
 *	   and _none_ of the per-cpu locks.
 *
 * Disabled counters are only ever re-enabled by a balance operation
 * that results in more free resources per CPU than a given threshold.
 * To ensure counters don't remain disabled, they are rebalanced when
 * the global resource goes above a higher threshold (i.e. some hysteresis
 * is present to prevent thrashing).
 */

#ifdef CONFIG_HOTPLUG_CPU
/*
 * hot-plug CPU notifier support.
 *
 * We need a notifier per filesystem as we need to be able to identify
 * the filesystem to balance the counters out. This is achieved by
 * having a notifier block embedded in the xfs_mount_t and doing pointer
 * magic to get the mount pointer from the notifier block address.
 */
STATIC int
xfs_icsb_cpu_notify(
	struct notifier_block *nfb,
	unsigned long action,
	void *hcpu)
{
	xfs_icsb_cnts_t *cntp;
	xfs_mount_t	*mp;
	int		s;

	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
	cntp = (xfs_icsb_cnts_t *)
			per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
		/* Easy Case - initialize the area and locks, and
		 * then rebalance when online does everything else for us. */
		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
		break;
	case CPU_ONLINE:
	case CPU_ONLINE_FROZEN:
		xfs_icsb_lock(mp);
		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
		xfs_icsb_unlock(mp);
		break;
	case CPU_DEAD:
	case CPU_DEAD_FROZEN:
		/* Disable all the counters, then fold the dead cpu's
		 * count into the total on the global superblock and
		 * re-enable the counters. */
		xfs_icsb_lock(mp);
		s = XFS_SB_LOCK(mp);
		xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
		xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
		xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);

		mp->m_sb.sb_icount += cntp->icsb_icount;
		mp->m_sb.sb_ifree += cntp->icsb_ifree;
		mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;

		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));

		xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
					 XFS_ICSB_SB_LOCKED, 0);
		xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
					 XFS_ICSB_SB_LOCKED, 0);
		xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
					 XFS_ICSB_SB_LOCKED, 0);
		XFS_SB_UNLOCK(mp, s);
		xfs_icsb_unlock(mp);
		break;
	}

	return NOTIFY_OK;
}
#endif /* CONFIG_HOTPLUG_CPU */

int
xfs_icsb_init_counters(
	xfs_mount_t	*mp)
{
	xfs_icsb_cnts_t *cntp;
	int		i;

	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
	if (mp->m_sb_cnts == NULL)
		return -ENOMEM;

#ifdef CONFIG_HOTPLUG_CPU
	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
	mp->m_icsb_notifier.priority = 0;
	register_hotcpu_notifier(&mp->m_icsb_notifier);
#endif /* CONFIG_HOTPLUG_CPU */

	for_each_online_cpu(i) {
		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
	}

	mutex_init(&mp->m_icsb_mutex);

	/*
	 * start with all counters disabled so that the
	 * initial balance kicks us off correctly
	 */
	mp->m_icsb_counters = -1;
	return 0;
}

void
xfs_icsb_reinit_counters(
	xfs_mount_t	*mp)
{
	xfs_icsb_lock(mp);
	/*
	 * start with all counters disabled so that the
	 * initial balance kicks us off correctly
	 */
	mp->m_icsb_counters = -1;
	xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
	xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
	xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
	xfs_icsb_unlock(mp);
}

STATIC void
xfs_icsb_destroy_counters(
	xfs_mount_t	*mp)
{
	if (mp->m_sb_cnts) {
		unregister_hotcpu_notifier(&mp->m_icsb_notifier);
		free_percpu(mp->m_sb_cnts);
	}
	mutex_destroy(&mp->m_icsb_mutex);
}

STATIC_INLINE void
xfs_icsb_lock_cntr(
	xfs_icsb_cnts_t	*icsbp)
{
	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
		ndelay(1000);
	}
}

STATIC_INLINE void
xfs_icsb_unlock_cntr(
	xfs_icsb_cnts_t	*icsbp)
{
	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
}


STATIC_INLINE void
xfs_icsb_lock_all_counters(
	xfs_mount_t	*mp)