aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jfs/jfs_metapage.c
blob: 6740d34cd82b802e948b8760fcf13954a8a12ad2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
/*
 *   Copyright (C) International Business Machines Corp., 2000-2005
 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include <linux/seq_file.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_txnmgr.h"
#include "jfs_debug.h"

#ifdef CONFIG_JFS_STATISTICS
static struct {
	uint	pagealloc;	/* # of page allocations */
	uint	pagefree;	/* # of page frees */
	uint	lockwait;	/* # of sleeping lock_metapage() calls */
} mpStat;
#endif

#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
#define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)

static inline void unlock_metapage(struct metapage *mp)
{
	clear_bit_unlock(META_locked, &mp->flag);
	wake_up(&mp->wait);
}

static inline void __lock_metapage(struct metapage *mp)
{
	DECLARE_WAITQUEUE(wait, current);
	INCREMENT(mpStat.lockwait);
	add_wait_queue_exclusive(&mp->wait, &wait);
	do {
		set_current_state(TASK_UNINTERRUPTIBLE);
		if (metapage_locked(mp)) {
			unlock_page(mp->page);
			io_schedule();
			lock_page(mp->page);
		}
	} while (trylock_metapage(mp));
	__set_current_state(TASK_RUNNING);
	remove_wait_queue(&mp->wait, &wait);
}

/*
 * Must have mp->page locked
 */
static inline void lock_metapage(struct metapage *mp)
{
	if (trylock_metapage(mp))
		__lock_metapage(mp);
}

#define METAPOOL_MIN_PAGES 32
static struct kmem_cache *metapage_cache;
static mempool_t *metapage_mempool;

#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)

#if MPS_PER_PAGE > 1

struct meta_anchor {
	int mp_count;
	atomic_t io_count;
	struct metapage *mp[MPS_PER_PAGE];
};
#define mp_anchor(page) ((struct meta_anchor *)page_private(page))

static inline struct metapage *page_to_mp(struct page *page, int offset)
{
	if (!PagePrivate(page))
		return NULL;
	return mp_anchor(page)->mp[offset >> L2PSIZE];
}

static inline int insert_metapage(struct page *page, struct metapage *mp)
{
	struct meta_anchor *a;
	int index;
	int l2mp_blocks;	/* log2 blocks per metapage */

	if (PagePrivate(page))
		a = mp_anchor(page);
	else {
		a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
		if (!a)
			return -ENOMEM;
		set_page_private(page, (unsigned long)a);
		SetPagePrivate(page);
		kmap(page);
	}

	if (mp) {
		l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
		index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
		a->mp_count++;
		a->mp[index] = mp;
	}

	return 0;
}

static inline void remove_metapage(struct page *page, struct metapage *mp)
{
	struct meta_anchor *a = mp_anchor(page);
	int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
	int index;

	index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);

	BUG_ON(a->mp[index] != mp);

	a->mp[index] = NULL;
	if (--a->mp_count == 0) {
		kfree(a);
		set_page_private(page, 0);
		ClearPagePrivate(page);
		kunmap(page);
	}
}

static inline void inc_io(struct page *page)
{
	atomic_inc(&mp_anchor(page)->io_count);
}

static inline void dec_io(struct page *page, void (*handler) (struct page *))
{
	if (atomic_dec_and_test(&mp_anchor(page)->io_count))
		handler(page);
}

#else
static inline struct metapage *page_to_mp(struct page *page, int offset)
{
	return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
}

static inline int insert_metapage(struct page *page, struct metapage *mp)
{
	if (mp) {
		set_page_private(page, (unsigned long)mp);
		SetPagePrivate(page);
		kmap(page);
	}
	return 0;
}

static inline void remove_metapage(struct page *page, struct metapage *mp)
{
	set_page_private(page, 0);
	ClearPagePrivate(page);
	kunmap(page);
}

#define inc_io(page) do {} while(0)
#define dec_io(page, handler) handler(page)

#endif

static void init_once(void *foo)
{
	struct metapage *mp = (struct metapage *)foo;

	mp->lid = 0;
	mp->lsn = 0;
	mp->flag = 0;
	mp->data = NULL;
	mp->clsn = 0;
	mp->log = NULL;
	set_bit(META_free, &mp->flag);
	init_waitqueue_head(&mp->wait);
}

static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
{
	return mempool_alloc(metapage_mempool, gfp_mask);
}

static inline void free_metapage(struct metapage *mp)
{
	mp->flag = 0;
	set_bit(META_free, &mp->flag);

	mempool_free(mp, metapage_mempool);
}

int __init metapage_init(void)
{
	/*
	 * Allocate the metapage structures
	 */
	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
					   0, 0, init_once);
	if (metapage_cache == NULL)
		return -ENOMEM;

	metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
						    metapage_cache);

	if (metapage_mempool == NULL) {
		kmem_cache_destroy(metapage_cache);
		return -ENOMEM;
	}

	return 0;
}

void metapage_exit(void)
{
	mempool_destroy(metapage_mempool);
	kmem_cache_destroy(metapage_cache);
}

static inline void drop_metapage(struct page *page, struct metapage *mp)
{
	if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
	    test_bit(META_io, &mp->flag))
		return;
	remove_metapage(page, mp);
	INCREMENT(mpStat.pagefree);
	free_metapage(mp);
}

/*
 * Metapage address space operations
 */

static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
				    int *len)
{
	int rc = 0;
	int xflag;
	s64 xaddr;
	sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
			       inode->i_blkbits;

	if (lblock >= file_blocks)
		return 0;
	if (lblock + *len > file_blocks)
		*len = file_blocks - lblock;

	if (inode->i_ino) {
		rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
		if ((rc == 0) && *len)
			lblock = (sector_t)xaddr;
		else
			lblock = 0;
	} /* else no mapping */

	return lblock;
}

static void last_read_complete(struct page *page)
{
	if (!PageError(page))
		SetPageUptodate(page);
	unlock_page(page);
}

static void metapage_read_end_io(struct bio *bio, int err)
{
	struct page *page = bio->bi_private;

	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
		SetPageError(page);
	}

	dec_io(page, last_read_complete);
	bio_put(bio);
}

static void remove_from_logsync(struct metapage *mp)
{
	struct jfs_log *log = mp->log;
	unsigned long flags;
/*
 * This can race.  Recheck that log hasn't been set to null, and after
 * acquiring logsync lock, recheck lsn
 */
	if (!log)
		return;

	LOGSYNC_LOCK(log, flags);
	if (mp->lsn) {
		mp->log = NULL;
		mp->lsn = 0;
		mp->clsn = 0;
		log->count--;
		list_del(&mp->synclist);
	}
	LOGSYNC_UNLOCK(log, flags);
}

static void last_write_complete(struct page *page)
{
	struct metapage *mp;
	unsigned int offset;

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);
		if (mp && test_bit(META_io, &mp->flag)) {
			if (mp->lsn)
				remove_from_logsync(mp);
			clear_bit(META_io, &mp->flag);
		}
		/*
		 * I'd like to call drop_metapage here, but I don't think it's
		 * safe unless I have the page locked
		 */
	}
	end_page_writeback(page);
}

static void metapage_write_end_io(struct bio *bio, int err)
{
	struct page *page = bio->bi_private;

	BUG_ON(!PagePrivate(page));

	if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
		SetPageError(page);
	}
	dec_io(page, last_write_complete);
	bio_put(bio);
}

static int metapage_writepage(struct page *page, struct writeback_control *wbc)
{
	struct bio *bio = NULL;
	int block_offset;	/* block offset of mp within page */
	struct inode *inode = page->mapping->host;
	int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
	int len;
	int xlen;
	struct metapage *mp;
	int redirty = 0;
	sector_t lblock;
	int nr_underway = 0;
	sector_t pblock;
	sector_t next_block = 0;
	sector_t page_start;
	unsigned long bio_bytes = 0;
	unsigned long bio_offset = 0;
	int offset;
	int bad_blocks = 0;

	page_start = (sector_t)page->index <<
		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));
	set_page_writeback(page);

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);

		if (!mp || !test_bit(META_dirty, &mp->flag))
			continue;

		if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
			redirty = 1;
			/*
			 * Make sure this page isn't blocked indefinitely.
			 * If the journal isn't undergoing I/O, push it
			 */
			if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
				jfs_flush_journal(mp->log, 0);
			continue;
		}

		clear_bit(META_dirty, &mp->flag);
		set_bit(META_io, &mp->flag);
		block_offset = offset >> inode->i_blkbits;
		lblock = page_start + block_offset;
		if (bio) {
			if (xlen && lblock == next_block) {
				/* Contiguous, in memory & on disk */
				len = min(xlen, blocks_per_mp);
				xlen -= len;
				bio_bytes += len << inode->i_blkbits;
				continue;
			}
			/* Not contiguous */
			if (bio_add_page(bio, page, bio_bytes, bio_offset) <
			    bio_bytes)
				goto add_failed;
			/*
			 * Increment counter before submitting i/o to keep
			 * count from hitting zero before we're through
			 */
			inc_io(page);
			if (!bio->bi_size)
				goto dump_bio;
			submit_bio(WRITE, bio);
			nr_underway++;
			bio = NULL;
		} else
			inc_io(page);
		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
		pblock = metapage_get_blocks(inode, lblock, &xlen);
		if (!pblock) {
			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
			/*
			 * We already called inc_io(), but can't cancel it
			 * with dec_io() until we're done with the page
			 */
			bad_blocks++;
			continue;
		}
		len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);

		bio = bio_alloc(GFP_NOFS, 1);
		bio->bi_bdev = inode->i_sb->s_bdev;
		bio->bi_sector = pblock << (inode->i_blkbits - 9);
		bio->bi_end_io = metapage_write_end_io;
		bio->bi_private = page;

		/* Don't call bio_add_page yet, we may add to this vec */
		bio_offset = offset;
		bio_bytes = len << inode->i_blkbits;

		xlen -= len;
		next_block = lblock + len;
	}
	if (bio) {
		if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
				goto add_failed;
		if (!bio->bi_size)
			goto dump_bio;

		submit_bio(WRITE, bio);
		nr_underway++;
	}
	if (redirty)
		redirty_page_for_writepage(wbc, page);

	unlock_page(page);

	if (bad_blocks)
		goto err_out;

	if (nr_underway == 0)
		end_page_writeback(page);

	return 0;
add_failed:
	/* We should never reach here, since we're only adding one vec */
	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
	goto skip;
dump_bio:
	print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
		       4, bio, sizeof(*bio), 0);
skip:
	bio_put(bio);
	unlock_page(page);
	dec_io(page, last_write_complete);
err_out:
	while (bad_blocks--)
		dec_io(page, last_write_complete);
	return -EIO;
}

static int metapage_readpage(struct file *fp, struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct bio *bio = NULL;
	int block_offset;
	int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
	sector_t page_start;	/* address of page in fs blocks */
	sector_t pblock;
	int xlen;
	unsigned int len;
	int offset;

	BUG_ON(!PageLocked(page));
	page_start = (sector_t)page->index <<
		     (PAGE_CACHE_SHIFT - inode->i_blkbits);

	block_offset = 0;
	while (block_offset < blocks_per_page) {
		xlen = blocks_per_page - block_offset;
		pblock = metapage_get_blocks(inode, page_start + block_offset,
					     &xlen);
		if (pblock) {
			if (!PagePrivate(page))
				insert_metapage(page, NULL);
			inc_io(page);
			if (bio)
				submit_bio(READ, bio);

			bio = bio_alloc(GFP_NOFS, 1);
			bio->bi_bdev = inode->i_sb->s_bdev;
			bio->bi_sector = pblock << (inode->i_blkbits - 9);
			bio->bi_end_io = metapage_read_end_io;
			bio->bi_private = page;
			len = xlen << inode->i_blkbits;
			offset = block_offset << inode->i_blkbits;
			if (bio_add_page(bio, page, len, offset) < len)
				goto add_failed;
			block_offset += xlen;
		} else
			block_offset++;
	}
	if (bio)
		submit_bio(READ, bio);
	else
		unlock_page(page);

	return 0;

add_failed:
	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
	bio_put(bio);
	dec_io(page, last_read_complete);
	return -EIO;
}

static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
{
	struct metapage *mp;
	int ret = 1;
	int offset;

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);

		if (!mp)
			continue;

		jfs_info("metapage_releasepage: mp = 0x%p", mp);
		if (mp->count || mp->nohomeok ||
		    test_bit(META_dirty, &mp->flag)) {
			jfs_info("count = %ld, nohomeok = %d", mp->count,
				 mp->nohomeok);
			ret = 0;
			continue;
		}
		if (mp->lsn)
			remove_from_logsync(mp);
		remove_metapage(page, mp);
		INCREMENT(mpStat.pagefree);
		free_metapage(mp);
	}
	return ret;
}

static void metapage_invalidatepage(struct page *page, unsigned long offset)
{
	BUG_ON(offset);

	BUG_ON(PageWriteback(page));

	metapage_releasepage(page, 0);
}

const struct address_space_operations jfs_metapage_aops = {
	.readpage	= metapage_readpage,
	.writepage	= metapage_writepage,
	.releasepage	= metapage_releasepage,
	.invalidatepage	= metapage_invalidatepage,
	.set_page_dirty	= __set_page_dirty_nobuffers,
};

struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
				unsigned int size, int absolute,
				unsigned long new)
{
	int l2BlocksPerPage;
	int l2bsize;
	struct address_space *mapping;
	struct metapage *mp = NULL;
	struct page *page;
	unsigned long page_index;
	unsigned long page_offset;

	jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
		 inode->i_ino, lblock, absolute);

	l2bsize = inode->i_blkbits;
	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
	page_index = lblock >> l2BlocksPerPage;
	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
	if ((page_offset + size) > PAGE_CACHE_SIZE) {
		jfs_err("MetaData crosses page boundary!!");
		jfs_err("lblock = %lx, size  = %d", lblock, size);
		dump_stack();
		return NULL;
	}
	if (absolute)
		mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
	else {
		/*
		 * If an nfs client tries to read an inode that is larger
		 * than any existing inodes, we may try to read past the
		 * end of the inode map
		 */
		if ((lblock << inode->i_blkbits) >= inode->i_size)
			return NULL;
		mapping = inode->i_mapping;
	}

	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
		page = grab_cache_page(mapping, page_index);
		if (!page) {
			jfs_err("grab_cache_page failed!");
			return NULL;
		}
		SetPageUptodate(page);
	} else {
		page = read_mapping_page(mapping, page_index, NULL);
		if (IS_ERR(page) || !PageUptodate(page)) {
			jfs_err("read_mapping_page failed!");
			return NULL;
		}
		lock_page(page);
	}

	mp = page_to_mp(page, page_offset);
	if (mp) {
		if (mp->logical_size != size) {
			jfs_error(inode->i_sb,
				  "__get_metapage: mp->logical_size != size");
			jfs_err("logical_size = %d, size = %d",
				mp->logical_size, size);
			dump_stack();
			goto unlock;
		}
		mp->count++;
		lock_metapage(mp);
		if (test_bit(META_discard, &mp->flag)) {
			if (!new) {
				jfs_error(inode->i_sb,
					  "__get_metapage: using a "
					  "discarded metapage");
				discard_metapage(mp);
				goto unlock;
			}
			clear_bit(META_discard, &mp->flag);
		}
	} else {
		INCREMENT(mpStat.pagealloc);
		mp = alloc_metapage(GFP_NOFS);
		mp->page = page;
		mp->flag = 0;
		mp->xflag = COMMIT_PAGE;
		mp->count = 1;
		mp->nohomeok = 0;
		mp->logical_size = size;
		mp->data = page_address(page) + page_offset;
		mp->index = lblock;
		if (unlikely(insert_metapage(page, mp))) {
			free_metapage(mp);
			goto unlock;
		}
		lock_metapage(mp);
	}

	if (new) {
		jfs_info("zeroing mp = 0x%p", mp);
		memset(mp->data, 0, PSIZE);
	}

	unlock_page(page);
	jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
	return mp;

unlock:
	unlock_page(page);
	return NULL;
}

void grab_metapage(struct metapage * mp)
{
	jfs_info("grab_metapage: mp = 0x%p", mp);
	page_cache_get(mp->page);
	lock_page(mp->page);
	mp->count++;
	lock_metapage(mp);
	unlock_page(mp->page);
}

void force_metapage(struct metapage *mp)
{
	struct page *page = mp->page;
	jfs_info("force_metapage: mp = 0x%p", mp);
	set_bit(META_forcewrite, &mp->flag);
	clear_bit(META_sync, &mp->flag);
	page_cache_get(page);
	lock_page(page);
	set_page_dirty(page);
	write_one_page(page, 1);
	clear_bit(META_forcewrite, &mp->flag);
	page_cache_release(page);
}

void hold_metapage(struct metapage *mp)
{
	lock_page(mp->page);
}

void put_metapage(struct metapage *mp)
{
	if (mp->count || mp->nohomeok) {
		/* Someone else will release this */
		unlock_page(mp->page);
		return;
	}
	page_cache_get(mp->page);
	mp->count++;
	lock_metapage(mp);
	unlock_page(mp->page);
	release_metapage(mp);
}

void release_metapage(struct metapage * mp)
{
	struct page *page = mp->page;
	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);

	BUG_ON(!page);

	lock_page(page);
	unlock_metapage(mp);

	assert(mp->count);
	if (--mp->count || mp->nohomeok) {
		unlock_page(page);
		page_cache_release(page);
		return;
	}

	if (test_bit(META_dirty, &mp->flag)) {
		set_page_dirty(page);
		if (test_bit(META_sync, &mp->flag)) {
			clear_bit(META_sync, &mp->flag);
			write_one_page(page, 1);
			lock_page(page); /* write_one_page unlocks the page */
		}
	} else if (mp->lsn)	/* discard_metapage doesn't remove it */
		remove_from_logsync(mp);

	/* Try to keep metapages from using up too much memory */
	drop_metapage(page, mp);

	unlock_page(page);
	page_cache_release(page);
}

void __invalidate_metapages(struct inode *ip, s64 addr, int len)
{
	sector_t lblock;
	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
	int BlocksPerPage = 1 << l2BlocksPerPage;
	/* All callers are interested in block device's mapping */
	struct address_space *mapping =
		JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
	struct metapage *mp;
	struct page *page;
	unsigned int offset;

	/*
	 * Mark metapages to discard.  They will eventually be
	 * released, but should not be written.
	 */
	for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
	     lblock += BlocksPerPage) {
		page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
		if (!page)
			continue;
		for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
			mp = page_to_mp(page, offset);
			if (!mp)
				continue;
			if (mp->index < addr)
				continue;
			if (mp->index >= addr + len)
				break;

			clear_bit(META_dirty, &mp->flag);
			set_bit(META_discard, &mp->flag);
			if (mp->lsn)
				remove_from_logsync(mp);
		}
		unlock_page(page);
		page_cache_release(page);
	}
}

#ifdef CONFIG_JFS_STATISTICS
static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
{
	seq_printf(m,
		       "JFS Metapage statistics\n"
		       "=======================\n"
		       "page allocations = %d\n"
		       "page frees = %d\n"
		       "lock waits = %d\n",
		       mpStat.pagealloc,
		       mpStat.pagefree,
		       mpStat.lockwait);
	return 0;
}

static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, jfs_mpstat_proc_show, NULL);
}

const struct file_operations jfs_mpstat_proc_fops = {
	.owner		= THIS_MODULE,
	.open		= jfs_mpstat_proc_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};
#endif
chael@ellerman.id.au> 2007-03-22 06:51:34 -0400 committer Greg Kroah-Hartman <gregkh@suse.de> 2007-05-02 22:02:36 -0400 MSI: Consolidate BUG_ON()s.' href='/cgit/cgit.cgi/litmus-rt-budgetable-locks.git/.git/commit/drivers/pci/msi.c?h=update_litmus_2019&id=7ede9c1fa50e01a8222217d4606bcbc44cd68f1a'>7ede9c1fa50e
b3b7cc7b4138



1da177e4c3f4
032de8e2fe3c
1da177e4c3f4
032de8e2fe3c

032de8e2fe3c


78b7611c4a1e


032de8e2fe3c


1da177e4c3f4




1da177e4c3f4


70549ad9cf07
1ce03373a7f4
1da177e4c3f4

1ce03373a7f4
1da177e4c3f4

1ce03373a7f4
1da177e4c3f4
1ce03373a7f4
1da177e4c3f4



92db6d10bc1b
ded86d8d3773
1da177e4c3f4
1da177e4c3f4
c9953a73e92d
1da177e4c3f4

c9953a73e92d



b64c05e7de60
1da177e4c3f4
1da177e4c3f4












ded86d8d3773
7bd007e48067
1ce03373a7f4
b1cbf4e4dddd
80ccba1186d4

1da177e4c3f4

1da177e4c3f4
1da177e4c3f4

4cc086fa5b64
1da177e4c3f4
fc4afc7b2bdd
1da177e4c3f4
032de8e2fe3c
fc4afc7b2bdd

d52877c7b1af
fc4afc7b2bdd
128bc5fced23
ded86d8d3773

b1cbf4e4dddd
ba698ad4b7e4
b1cbf4e4dddd
d52877c7b1af






7bd007e48067
fc4afc7b2bdd
1da177e4c3f4
4cc086fa5b64
1da177e4c3f4

1ce03373a7f4
1da177e4c3f4

eaae4b3a84a3
1ce03373a7f4
1da177e4c3f4




1da177e4c3f4


032de8e2fe3c

1da177e4c3f4
fc4afc7b2bdd

1da177e4c3f4

309e57df7b76



c9953a73e92d
07ae95f988a3






d389fec6a2ae
07ae95f988a3
d389fec6a2ae
07ae95f988a3
d389fec6a2ae
07ae95f988a3
d389fec6a2ae
07ae95f988a3
d389fec6a2ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799







                                                         
                      



                            
                         

                          
                      
                      


                      



                
                              
 












































                                                                   
                                                                      
 

                    








                                                                         




                                                                                














                                                                          

                                                          



                                             

 
                                                    


                               
                                       

















                                                                               








                                                                            
                                                                       


                               
                                       
                                      

                                         
                                                

                                      


                                                                           

                                                 
                                                                           
                        
                                 
                 
                      




                                                                               
                                                 


                      
                      

                      
                                          
                 

 
                                                                  
 
                                                        














                                                                               
                                                                               


















                                                                                 
 
                                                        
 







                                                                   


                                         













                                                                               



                             








                                                                         


                      
                      
         
                          
 
 






                                                         
                                   
 



                                                 

 
                                     
 



                                                 

 
                                              
 



                                             
                                                             


                            

                                     




                          





                                                                   
                                                        
 
                
                    
                               
 


                              

                                      
 
                                 
                               
                                             


                                                                        
                                                            
         

                                                                 

                                        
                                                                 


                                                         
 
                
                               
                    
 


                               
                             
                                 
                                
 
                                                          
                                                                
                                                       
                                                                     
         
 

                                                                      
                                    



                                                                  
 





                                               
                                         
 



                                                                     
                                                                      
                                                                
                                                                     
                                                                   



                                                   
                     

                    

                                                                           


                                                                  


                                  
 
                                                
                                                            

                                                                 
                                     
                                                                             
                                    

                                        




                                                                       
                                                                     
                                                            
                                                                      
                                 
                                                            
                                                       
         
                                                    
 
                                                
                                                          
                  
                                   
                           
         
 
                                   
                                 

                               
 
                              





                                                                       

                                                             
  
                                                                 

                                                                         



                                                                     
                               
                                       

                                



                           

                                                                            



                                                                  

                                                                              
                                                          

                                                                 





                                                                            

                                          
                              

                                     
                                                         
                                            

                                               
                                             
                                                         
                                            

                                        
 
                                                            
         






                                                                  
                         
                 
 

                                   


                                                                   

                                    
                             
         






                                                          
                                    
                                 

                                




                 
                                                                      
                                                                     
                                             
                                            
  
                                                                 

                                                                    
    
                                                                        

                            
                
 
                                                                      


                                                   







                                                            





                                                                      



                                                          



                                                     


                                            



                 



                                                                     
                                                                

                                                                     
                                 


                                       
                   
 


                                                              
 
                                    
 
                                                                   
                                

                                                       
                               

                                          

                      
                              
 
                                          

                               
 
                                                         

                       
                               
                                 
                             
 

                                                                      


                                                             

                                                              
         
                                                                    
                       

                                                               















                                                                      
 
                               
 
                                             
 
                                     
 



                                                           
 
                                    
 

                                                                    


                                                                               


                                                                       


                                       




                 


                                                                       
                                                 
                                                                        

                                                                          
                                                                 

                                                                       
                                                                      
                                                                           
                                                                     



                                                                              
                                    
                 
                    
 
                     

                               



                                                                  
                                                        
                                                                  












                                                                     
                                     
 
                                                                
                               

                                                         

                               
                                                          

                      
                               
 
                                                   
 
                           

 
                                           
 
                                                          

                       
                                
                                 
                              






                                                          
 
                                
 
                                

   
                                                                   

                                                                        
                                                                     
                                                           




                                                                     


                                    

                                   
 

                                        

 



                           
 






                                                                       
 
                              
 
                               
 
                                              
 
                                       
 
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

#include <linux/err.h>
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <linux/msi.h>
#include <linux/smp.h>

#include <asm/errno.h>
#include <asm/io.h>

#include "pci.h"
#include "msi.h"

static int pci_msi_enable = 1;

/* Arch hooks */

int __attribute__ ((weak))
arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
	return 0;
}

int __attribute__ ((weak))
arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
{
	return 0;
}

int __attribute__ ((weak))
arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
	struct msi_desc *entry;
	int ret;

	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
		if (ret)
			return ret;
	}

	return 0;
}

void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
{
	return;
}

void __attribute__ ((weak))
arch_teardown_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
		if (entry->irq != 0)
			arch_teardown_msi_irq(entry->irq);
	}
}

static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
{
	u16 control;

	if (pos) {
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
		control &= ~PCI_MSI_FLAGS_ENABLE;
		if (enable)
			control |= PCI_MSI_FLAGS_ENABLE;
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	}
}

static void msi_set_enable(struct pci_dev *dev, int enable)
{
	__msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
}

static void msix_set_enable(struct pci_dev *dev, int enable)
{
	int pos;
	u16 control;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos) {
		pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
		control &= ~PCI_MSIX_FLAGS_ENABLE;
		if (enable)
			control |= PCI_MSIX_FLAGS_ENABLE;
		pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
	}
}

static inline __attribute_const__ u32 msi_mask(unsigned x)
{
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
}

static void msix_flush_writes(struct irq_desc *desc)
{
	struct msi_desc *entry;

	entry = get_irq_desc_msi(desc);
	BUG_ON(!entry || !entry->dev);
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
		/* nothing to do */
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		readl(entry->mask_base + offset);
		break;
	}
	default:
		BUG();
		break;
	}
}

/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 *
 * Returns 1 if it succeeded in masking the interrupt and 0 if the device
 * doesn't support MSI masking.
 */
static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag)
{
	struct msi_desc *entry;

	entry = get_irq_desc_msi(desc);
	BUG_ON(!entry || !entry->dev);
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
		if (entry->msi_attrib.maskbit) {
			int pos;
			u32 mask_bits;

			pos = (long)entry->mask_base;
			pci_read_config_dword(entry->dev, pos, &mask_bits);
			mask_bits &= ~(mask);
			mask_bits |= flag & mask;
			pci_write_config_dword(entry->dev, pos, mask_bits);
		} else {
			return 0;
		}
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
		readl(entry->mask_base + offset);
		break;
	}
	default:
		BUG();
		break;
	}
	entry->msi_attrib.masked = !!flag;
	return 1;
}

void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
	struct msi_desc *entry = get_irq_desc_msi(desc);
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}

void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
	struct irq_desc *desc = irq_to_desc(irq);

	read_msi_msg_desc(desc, msg);
}

void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
	struct msi_desc *entry = get_irq_desc_msi(desc);
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
		break;
	}
	default:
		BUG();
	}
	entry->msg = *msg;
}

void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
	struct irq_desc *desc = irq_to_desc(irq);

	write_msi_msg_desc(desc, msg);
}

void mask_msi_irq(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);

	msi_set_mask_bits(desc, 1, 1);
	msix_flush_writes(desc);
}

void unmask_msi_irq(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);

	msi_set_mask_bits(desc, 1, 0);
	msix_flush_writes(desc);
}

static int msi_free_irqs(struct pci_dev* dev);

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

	entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
	if (!entry)
		return NULL;

	INIT_LIST_HEAD(&entry->list);
	entry->irq = 0;
	entry->dev = NULL;

	return entry;
}

static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

static void __pci_restore_msi_state(struct pci_dev *dev)
{
	int pos;
	u16 control;
	struct msi_desc *entry;

	if (!dev->msi_enabled)
		return;

	entry = get_irq_msi(dev->irq);
	pos = entry->msi_attrib.pos;

	pci_intx_for_msi(dev, 0);
	msi_set_enable(dev, 0);
	write_msi_msg(dev->irq, &entry->msg);
	if (entry->msi_attrib.maskbit) {
		struct irq_desc *desc = irq_to_desc(dev->irq);
		msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask,
				  entry->msi_attrib.masked);
	}

	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
	control &= ~PCI_MSI_FLAGS_QSIZE;
	control |= PCI_MSI_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
}

static void __pci_restore_msix_state(struct pci_dev *dev)
{
	int pos;
	struct msi_desc *entry;
	u16 control;

	if (!dev->msix_enabled)
		return;

	/* route the table */
	pci_intx_for_msi(dev, 0);
	msix_set_enable(dev, 0);

	list_for_each_entry(entry, &dev->msi_list, list) {
		struct irq_desc *desc = irq_to_desc(entry->irq);
		write_msi_msg(entry->irq, &entry->msg);
		msi_set_mask_bits(desc, 1, entry->msi_attrib.masked);
	}

	BUG_ON(list_empty(&dev->msi_list));
	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
	pos = entry->msi_attrib.pos;
	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
	control &= ~PCI_MSIX_FLAGS_MASKALL;
	control |= PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
}

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);

/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with a single
 * MSI irq, regardless of device function is capable of handling
 * multiple messages. A return of zero indicates the successful setup
 * of an entry zero with the new MSI irq or non-zero for otherwise.
 **/
static int msi_capability_init(struct pci_dev *dev)
{
	struct msi_desc *entry;
	int pos, ret;
	u16 control;

	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;

	entry->msi_attrib.type = PCI_CAP_ID_MSI;
	entry->msi_attrib.is_64 = is_64bit_address(control);
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
	entry->msi_attrib.masked = 1;
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
	entry->msi_attrib.pos = pos;
	entry->dev = dev;
	if (entry->msi_attrib.maskbit) {
		unsigned int base, maskbits, temp;

		base = msi_mask_bits_reg(pos, entry->msi_attrib.is_64);
		entry->mask_base = (void __iomem *)(long)base;

		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev, base, &maskbits);
		temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1);
		maskbits |= temp;
		pci_write_config_dword(dev, base, maskbits);
		entry->msi_attrib.maskbits_mask = temp;
	}
	list_add_tail(&entry->list, &dev->msi_list);

	/* Configure MSI capability structure */
	ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
	if (ret) {
		msi_free_irqs(dev);
		return ret;
	}

	/* Set MSI enabled bits	 */
	pci_intx_for_msi(dev, 0);
	msi_set_enable(dev, 1);
	dev->msi_enabled = 1;

	dev->irq = entry->irq;
	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
 *
 * Setup the MSI-X capability structure of device function with a
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *entry;
	int pos, i, j, nr_entries, ret;
	unsigned long phys_addr;
	u32 table_offset;
 	u16 control;
	u8 bir;
	void __iomem *base;

	msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry();
		if (!entry)
			break;

 		j = entries[i].entry;
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
		entry->msi_attrib.is_64 = 1;
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
		entry->msi_attrib.masked = 1;
		entry->msi_attrib.default_irq = dev->irq;
		entry->msi_attrib.pos = pos;
		entry->dev = dev;
		entry->mask_base = base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
	if (ret) {
		int avail = 0;
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0) {
				avail++;
			}
		}

		msi_free_irqs(dev);

		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
		if (avail == 0)
			avail = ret;
		return avail;
	}

	i = 0;
	list_for_each_entry(entry, &dev->msi_list, list) {
		entries[i].vector = entry->irq;
		set_irq_msi(entry->irq, entry);
		i++;
	}
	/* Set MSI-X enabled bits */
	pci_intx_for_msi(dev, 0);
	msix_set_enable(dev, 1);
	dev->msix_enabled = 1;

	return 0;
}

/**
 * pci_msi_check_device - check whether MSI may be enabled on a device
 * @dev: pointer to the pci_dev data structure of MSI device function
 * @nvec: how many MSIs have been requested ?
 * @type: are we checking for MSI or MSI-X ?
 *
 * Look at global flags, the device itself, and its parent busses
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
 **/
static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
{
	struct pci_bus *bus;
	int ret;

	/* MSI must be globally enabled and supported by the device */
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

	/* Any bridge which does NOT route MSI transactions from it's
	 * secondary bus to it's primary bus must set NO_MSI flag on
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

	if (!pci_find_capability(dev, type))
		return -EINVAL;

	return 0;
}

/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
 * a single MSI irq upon its software driver call to request for
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
 * irq or non-zero for otherwise.
 **/
int pci_enable_msi(struct pci_dev* dev)
{
	int status;

	status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
	if (status)
		return status;

	WARN_ON(!!dev->msi_enabled);

	/* Check whether driver already requested for MSI-X irqs */
	if (dev->msix_enabled) {
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
		return -EINVAL;
	}
	status = msi_capability_init(dev);
	return status;
}
EXPORT_SYMBOL(pci_enable_msi);

void pci_msi_shutdown(struct pci_dev* dev)
{
	struct msi_desc *entry;

	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	msi_set_enable(dev, 0);
	pci_intx_for_msi(dev, 1);
	dev->msi_enabled = 0;

	BUG_ON(list_empty(&dev->msi_list));
	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
	/* Return the the pci reset with msi irqs unmasked */
	if (entry->msi_attrib.maskbit) {
		u32 mask = entry->msi_attrib.maskbits_mask;
		struct irq_desc *desc = irq_to_desc(dev->irq);
		msi_set_mask_bits(desc, mask, ~mask);
	}
	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
		return;

	/* Restore dev->irq to its default pin-assertion irq */
	dev->irq = entry->msi_attrib.default_irq;
}
void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;

	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);

	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
		return;

	msi_free_irqs(dev);
}
EXPORT_SYMBOL(pci_disable_msi);

static int msi_free_irqs(struct pci_dev* dev)
{
	struct msi_desc *entry, *tmp;

	list_for_each_entry(entry, &dev->msi_list, list) {
		if (entry->irq)
			BUG_ON(irq_has_action(entry->irq));
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
			writel(1, entry->mask_base + entry->msi_attrib.entry_nr
				  * PCI_MSIX_ENTRY_SIZE
				  + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);

			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
		list_del(&entry->list);
		kfree(entry);
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 * @entries: pointer to an array of MSI-X entries
 * @nvec: number of MSI-X irqs requested for allocation by device driver
 *
 * Setup the MSI-X capability structure of device function with the number
 * of requested irqs upon its software driver call to request for
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
 * Or a return of > 0 indicates that driver request is exceeding the number
 * of irqs available. Driver should use the returned value to re-send
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
	int status, pos, nr_entries;
	int i, j;
	u16 control;

	if (!entries)
 		return -EINVAL;

	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
	WARN_ON(!!dev->msix_enabled);

	/* Check whether driver already requested for MSI irq */
   	if (dev->msi_enabled) {
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
EXPORT_SYMBOL(pci_enable_msix);

static void msix_free_all_irqs(struct pci_dev *dev)
{
	msi_free_irqs(dev);
}

void pci_msix_shutdown(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	msix_set_enable(dev, 0);
	pci_intx_for_msi(dev, 1);
	dev->msix_enabled = 0;
}
void pci_disable_msix(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);

	msix_free_all_irqs(dev);
}
EXPORT_SYMBOL(pci_disable_msix);

/**
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
 * Being called during hotplug remove, from which the device function
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev)
 		return;

	if (dev->msi_enabled)
		msi_free_irqs(dev);

	if (dev->msix_enabled)
		msix_free_all_irqs(dev);
}

void pci_no_msi(void)
{
	pci_msi_enable = 0;
}

/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
{
	return pci_msi_enable;
}
EXPORT_SYMBOL(pci_msi_enabled);

void pci_msi_init_pci_dev(struct pci_dev *dev)
{
	INIT_LIST_HEAD(&dev->msi_list);
}