aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/ramzswap
diff options
context:
space:
mode:
authorNitin Gupta <ngupta@vflare.org>2010-05-13 04:54:21 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2010-05-14 16:25:28 -0400
commit97a0638218fc504d645c45ff03256dc4520efa72 (patch)
tree8f525b16a03525be5fa08a754a120f8cd693d3e4 /drivers/staging/ramzswap
parent3cdec5540d622e6b910a1fe91f544630a8ba8099 (diff)
Staging: ramzswap: Remove backing swap support
Currently, each ramzswap device can be assigned a separate 'backing swap' file/partition. The ramzswap driver forwards swap I/O requests to this backing swap whenever an incompressible page is found. This feature adds nearly 700 lines of code and it also duplicates much of the swapon() functionality (for example, finding swap extents and so on). Removing this code makes the driver much simpler and should help its transition from staging to stable drivers area (drivers/block/). Similar functionality may be implemented if we can implement migrating pages across swap devices but the details have not yet been worked out. Support for _partitions_ as backing swap could be retained as it requires a few lines of code only. This part can be re-introduced later if above swap migration method turns out to be infeasible. More cleanups and code comments will be added soon. Signed-off-by: Nitin Gupta <ngupta@vflare.org> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/staging/ramzswap')
-rw-r--r--drivers/staging/ramzswap/ramzswap_drv.c645
-rw-r--r--drivers/staging/ramzswap/ramzswap_drv.h51
-rw-r--r--drivers/staging/ramzswap/ramzswap_ioctl.h14
3 files changed, 25 insertions, 685 deletions
diff --git a/drivers/staging/ramzswap/ramzswap_drv.c b/drivers/staging/ramzswap/ramzswap_drv.c
index ee5eb12b928..167f8d17a80 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.c
+++ b/drivers/staging/ramzswap/ramzswap_drv.c
@@ -36,13 +36,6 @@
36static int ramzswap_major; 36static int ramzswap_major;
37static struct ramzswap *devices; 37static struct ramzswap *devices;
38 38
39/*
40 * Pages that compress to larger than this size are
41 * forwarded to backing swap, if present or stored
42 * uncompressed in memory otherwise.
43 */
44static unsigned int max_zpage_size;
45
46/* Module params (documentation at end) */ 39/* Module params (documentation at end) */
47static unsigned int num_devices; 40static unsigned int num_devices;
48 41
@@ -79,52 +72,6 @@ static int page_zero_filled(void *ptr)
79 return 1; 72 return 1;
80} 73}
81 74
82/*
83 * memlimit cannot be greater than backing disk size.
84 */
85static void ramzswap_set_memlimit(struct ramzswap *rzs, size_t totalram_bytes)
86{
87 int memlimit_valid = 1;
88
89 if (!rzs->memlimit) {
90 pr_info("Memory limit not set.\n");
91 memlimit_valid = 0;
92 }
93
94 if (rzs->memlimit > rzs->disksize) {
95 pr_info("Memory limit cannot be greater than "
96 "disksize: limit=%zu, disksize=%zu\n",
97 rzs->memlimit, rzs->disksize);
98 memlimit_valid = 0;
99 }
100
101 if (!memlimit_valid) {
102 size_t mempart, disksize;
103 pr_info("Using default: smaller of (%u%% of RAM) and "
104 "(backing disk size).\n",
105 default_memlimit_perc_ram);
106 mempart = default_memlimit_perc_ram * (totalram_bytes / 100);
107 disksize = rzs->disksize;
108 rzs->memlimit = mempart > disksize ? disksize : mempart;
109 }
110
111 if (rzs->memlimit > totalram_bytes / 2) {
112 pr_info(
113 "Its not advisable setting limit more than half of "
114 "size of memory since we expect a 2:1 compression ratio. "
115 "Limit represents amount of *compressed* data we can keep "
116 "in memory!\n"
117 "\tMemory Size: %zu kB\n"
118 "\tLimit you selected: %zu kB\n"
119 "Continuing anyway ...\n",
120 totalram_bytes >> 10, rzs->memlimit >> 10
121 );
122 }
123
124 rzs->memlimit &= PAGE_MASK;
125 BUG_ON(!rzs->memlimit);
126}
127
128static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes) 75static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
129{ 76{
130 if (!rzs->disksize) { 77 if (!rzs->disksize) {
@@ -156,80 +103,22 @@ static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
156 103
157/* 104/*
158 * Swap header (1st page of swap device) contains information 105 * Swap header (1st page of swap device) contains information
159 * to indentify it as a swap partition. Prepare such a header 106 * about a swap file/partition. Prepare such a header for the
160 * for ramzswap device (ramzswap0) so that swapon can identify 107 * given ramzswap device so that swapon can identify it as a
161 * it as swap partition. In case backing swap device is provided, 108 * swap partition.
162 * copy its swap header.
163 */ 109 */
164static int setup_swap_header(struct ramzswap *rzs, union swap_header *s) 110static void setup_swap_header(struct ramzswap *rzs, union swap_header *s)
165{ 111{
166 int ret = 0; 112 s->info.version = 1;
167 struct page *page;
168 struct address_space *mapping;
169 union swap_header *backing_swap_header;
170
171 /*
172 * There is no backing swap device. Create a swap header
173 * that is acceptable by swapon.
174 */
175 if (!rzs->backing_swap) {
176 s->info.version = 1;
177 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
178 s->info.nr_badpages = 0;
179 memcpy(s->magic.magic, "SWAPSPACE2", 10);
180 return 0;
181 }
182
183 /*
184 * We have a backing swap device. Copy its swap header
185 * to ramzswap device header. If this header contains
186 * invalid information (backing device not a swap
187 * partition, etc.), swapon will fail for ramzswap
188 * which is correct behavior - we don't want to swap
189 * over filesystem partition!
190 */
191
192 /* Read the backing swap header (code from sys_swapon) */
193 mapping = rzs->swap_file->f_mapping;
194 if (!mapping->a_ops->readpage) {
195 ret = -EINVAL;
196 goto out;
197 }
198
199 page = read_mapping_page(mapping, 0, rzs->swap_file);
200 if (IS_ERR(page)) {
201 ret = PTR_ERR(page);
202 goto out;
203 }
204
205 backing_swap_header = kmap(page);
206 memcpy(s, backing_swap_header, sizeof(*s));
207 if (s->info.nr_badpages) {
208 pr_info("Cannot use backing swap with bad pages (%u)\n",
209 s->info.nr_badpages);
210 ret = -EINVAL;
211 }
212 /*
213 * ramzswap disksize equals number of usable pages in backing
214 * swap. Set last_page in swap header to match this disksize
215 * ('last_page' means 0-based index of last usable swap page).
216 */
217 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1; 113 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
218 kunmap(page); 114 s->info.nr_badpages = 0;
219 115 memcpy(s->magic.magic, "SWAPSPACE2", 10);
220out:
221 return ret;
222} 116}
223 117
224static void ramzswap_ioctl_get_stats(struct ramzswap *rzs, 118static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
225 struct ramzswap_ioctl_stats *s) 119 struct ramzswap_ioctl_stats *s)
226{ 120{
227 strncpy(s->backing_swap_name, rzs->backing_swap_name,
228 MAX_SWAP_NAME_LEN - 1);
229 s->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
230
231 s->disksize = rzs->disksize; 121 s->disksize = rzs->disksize;
232 s->memlimit = rzs->memlimit;
233 122
234#if defined(CONFIG_RAMZSWAP_STATS) 123#if defined(CONFIG_RAMZSWAP_STATS)
235 { 124 {
@@ -265,333 +154,10 @@ static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
265 s->orig_data_size = rs->pages_stored << PAGE_SHIFT; 154 s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
266 s->compr_data_size = rs->compr_size; 155 s->compr_data_size = rs->compr_size;
267 s->mem_used_total = mem_used; 156 s->mem_used_total = mem_used;
268
269 s->bdev_num_reads = rzs_stat64_read(rzs, &rs->bdev_num_reads);
270 s->bdev_num_writes = rzs_stat64_read(rzs, &rs->bdev_num_writes);
271 } 157 }
272#endif /* CONFIG_RAMZSWAP_STATS */ 158#endif /* CONFIG_RAMZSWAP_STATS */
273} 159}
274 160
275static int add_backing_swap_extent(struct ramzswap *rzs,
276 pgoff_t phy_pagenum,
277 pgoff_t num_pages)
278{
279 unsigned int idx;
280 struct list_head *head;
281 struct page *curr_page, *new_page;
282 unsigned int extents_per_page = PAGE_SIZE /
283 sizeof(struct ramzswap_backing_extent);
284
285 idx = rzs->num_extents % extents_per_page;
286 if (!idx) {
287 new_page = alloc_page(__GFP_ZERO);
288 if (!new_page)
289 return -ENOMEM;
290
291 if (rzs->num_extents) {
292 curr_page = virt_to_page(rzs->curr_extent);
293 head = &curr_page->lru;
294 } else {
295 head = &rzs->backing_swap_extent_list;
296 }
297
298 list_add(&new_page->lru, head);
299 rzs->curr_extent = page_address(new_page);
300 }
301
302 rzs->curr_extent->phy_pagenum = phy_pagenum;
303 rzs->curr_extent->num_pages = num_pages;
304
305 pr_debug("add_extent: idx=%u, phy_pgnum=%lu, num_pgs=%lu, "
306 "pg_last=%lu, curr_ext=%p\n", idx, phy_pagenum, num_pages,
307 phy_pagenum + num_pages - 1, rzs->curr_extent);
308
309 if (idx != extents_per_page - 1)
310 rzs->curr_extent++;
311
312 return 0;
313}
314
315static int setup_backing_swap_extents(struct ramzswap *rzs,
316 struct inode *inode, unsigned long *num_pages)
317{
318 int ret = 0;
319 unsigned blkbits;
320 unsigned blocks_per_page;
321 pgoff_t contig_pages = 0, total_pages = 0;
322 pgoff_t pagenum = 0, prev_pagenum = 0;
323 sector_t probe_block = 0;
324 sector_t last_block;
325
326 blkbits = inode->i_blkbits;
327 blocks_per_page = PAGE_SIZE >> blkbits;
328
329 last_block = i_size_read(inode) >> blkbits;
330 while (probe_block + blocks_per_page <= last_block) {
331 unsigned block_in_page;
332 sector_t first_block;
333
334 first_block = bmap(inode, probe_block);
335 if (first_block == 0)
336 goto bad_bmap;
337
338 /* It must be PAGE_SIZE aligned on-disk */
339 if (first_block & (blocks_per_page - 1)) {
340 probe_block++;
341 goto probe_next;
342 }
343
344 /* All blocks within this page must be contiguous on disk */
345 for (block_in_page = 1; block_in_page < blocks_per_page;
346 block_in_page++) {
347 sector_t block;
348
349 block = bmap(inode, probe_block + block_in_page);
350 if (block == 0)
351 goto bad_bmap;
352 if (block != first_block + block_in_page) {
353 /* Discontiguity */
354 probe_block++;
355 goto probe_next;
356 }
357 }
358
359 /*
360 * We found a PAGE_SIZE length, PAGE_SIZE aligned
361 * run of blocks.
362 */
363 pagenum = first_block >> (PAGE_SHIFT - blkbits);
364
365 if (total_pages && (pagenum != prev_pagenum + 1)) {
366 ret = add_backing_swap_extent(rzs, prev_pagenum -
367 (contig_pages - 1), contig_pages);
368 if (ret < 0)
369 goto out;
370 rzs->num_extents++;
371 contig_pages = 0;
372 }
373 total_pages++;
374 contig_pages++;
375 prev_pagenum = pagenum;
376 probe_block += blocks_per_page;
377
378probe_next:
379 continue;
380 }
381
382 if (contig_pages) {
383 pr_debug("adding last extent: pagenum=%lu, "
384 "contig_pages=%lu\n", pagenum, contig_pages);
385 ret = add_backing_swap_extent(rzs,
386 prev_pagenum - (contig_pages - 1), contig_pages);
387 if (ret < 0)
388 goto out;
389 rzs->num_extents++;
390 }
391 if (!rzs->num_extents) {
392 pr_err("No swap extents found!\n");
393 ret = -EINVAL;
394 }
395
396 if (!ret) {
397 *num_pages = total_pages;
398 pr_info("Found %lu extents containing %luk\n",
399 rzs->num_extents, *num_pages << (PAGE_SHIFT - 10));
400 }
401 goto out;
402
403bad_bmap:
404 pr_err("Backing swapfile has holes\n");
405 ret = -EINVAL;
406out:
407 while (ret && !list_empty(&rzs->backing_swap_extent_list)) {
408 struct page *page;
409 struct list_head *entry = rzs->backing_swap_extent_list.next;
410 page = list_entry(entry, struct page, lru);
411 list_del(entry);
412 __free_page(page);
413 }
414 return ret;
415}
416
417static void map_backing_swap_extents(struct ramzswap *rzs)
418{
419 struct ramzswap_backing_extent *se;
420 struct page *table_page, *se_page;
421 unsigned long num_pages, num_table_pages, entry;
422 unsigned long se_idx, span;
423 unsigned entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
424 unsigned extents_per_page = PAGE_SIZE / sizeof(*se);
425
426 /* True for block device */
427 if (!rzs->num_extents)
428 return;
429
430 se_page = list_entry(rzs->backing_swap_extent_list.next,
431 struct page, lru);
432 se = page_address(se_page);
433 span = se->num_pages;
434 num_pages = rzs->disksize >> PAGE_SHIFT;
435 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
436 PAGE_SIZE);
437
438 entry = 0;
439 se_idx = 0;
440 while (num_table_pages--) {
441 table_page = vmalloc_to_page(&rzs->table[entry]);
442 while (span <= entry) {
443 se_idx++;
444 if (se_idx == rzs->num_extents)
445 BUG();
446
447 if (!(se_idx % extents_per_page)) {
448 se_page = list_entry(se_page->lru.next,
449 struct page, lru);
450 se = page_address(se_page);
451 } else
452 se++;
453
454 span += se->num_pages;
455 }
456 table_page->mapping = (struct address_space *)se;
457 table_page->private = se->num_pages - (span - entry);
458 pr_debug("map_table: entry=%lu, span=%lu, map=%p, priv=%lu\n",
459 entry, span, table_page->mapping, table_page->private);
460 entry += entries_per_page;
461 }
462}
463
464/*
465 * Check if value of backing_swap module param is sane.
466 * Claim this device and set ramzswap size equal to
467 * size of this block device.
468 */
469static int setup_backing_swap(struct ramzswap *rzs)
470{
471 int ret = 0;
472 size_t disksize;
473 unsigned long num_pages = 0;
474 struct inode *inode;
475 struct file *swap_file;
476 struct address_space *mapping;
477 struct block_device *bdev = NULL;
478
479 if (!rzs->backing_swap_name[0]) {
480 pr_debug("backing_swap param not given\n");
481 goto out;
482 }
483
484 pr_info("Using backing swap device: %s\n", rzs->backing_swap_name);
485
486 swap_file = filp_open(rzs->backing_swap_name,
487 O_RDWR | O_LARGEFILE, 0);
488 if (IS_ERR(swap_file)) {
489 pr_err("Error opening backing device: %s\n",
490 rzs->backing_swap_name);
491 ret = -EINVAL;
492 goto out;
493 }
494
495 mapping = swap_file->f_mapping;
496 inode = mapping->host;
497
498 if (S_ISBLK(inode->i_mode)) {
499 bdev = I_BDEV(inode);
500 ret = bd_claim(bdev, setup_backing_swap);
501 if (ret < 0) {
502 bdev = NULL;
503 goto bad_param;
504 }
505 disksize = i_size_read(inode);
506 /*
507 * Can happen if user gives an extended partition as
508 * backing swap or simply a bad disk.
509 */
510 if (!disksize) {
511 pr_err("Error reading backing swap size.\n");
512 goto bad_param;
513 }
514 } else if (S_ISREG(inode->i_mode)) {
515 bdev = inode->i_sb->s_bdev;
516 if (IS_SWAPFILE(inode)) {
517 ret = -EBUSY;
518 goto bad_param;
519 }
520 ret = setup_backing_swap_extents(rzs, inode, &num_pages);
521 if (ret < 0)
522 goto bad_param;
523 disksize = num_pages << PAGE_SHIFT;
524 } else {
525 goto bad_param;
526 }
527
528 rzs->swap_file = swap_file;
529 rzs->backing_swap = bdev;
530 rzs->disksize = disksize;
531
532 return 0;
533
534bad_param:
535 if (bdev)
536 bd_release(bdev);
537 filp_close(swap_file, NULL);
538
539out:
540 rzs->backing_swap = NULL;
541 return ret;
542}
543
544/*
545 * Map logical page number 'pagenum' to physical page number
546 * on backing swap device. For block device, this is a nop.
547 */
548static u32 map_backing_swap_page(struct ramzswap *rzs, u32 pagenum)
549{
550 u32 skip_pages, entries_per_page;
551 size_t delta, se_offset, skipped;
552 struct page *table_page, *se_page;
553 struct ramzswap_backing_extent *se;
554
555 if (!rzs->num_extents)
556 return pagenum;
557
558 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
559
560 table_page = vmalloc_to_page(&rzs->table[pagenum]);
561 se = (struct ramzswap_backing_extent *)table_page->mapping;
562 se_page = virt_to_page(se);
563
564 skip_pages = pagenum - (pagenum / entries_per_page * entries_per_page);
565 se_offset = table_page->private + skip_pages;
566
567 if (se_offset < se->num_pages)
568 return se->phy_pagenum + se_offset;
569
570 skipped = se->num_pages - table_page->private;
571 do {
572 struct ramzswap_backing_extent *se_base;
573 u32 se_entries_per_page = PAGE_SIZE / sizeof(*se);
574
575 /* Get next swap extent */
576 se_base = (struct ramzswap_backing_extent *)
577 page_address(se_page);
578 if (se - se_base == se_entries_per_page - 1) {
579 se_page = list_entry(se_page->lru.next,
580 struct page, lru);
581 se = page_address(se_page);
582 } else {
583 se++;
584 }
585
586 skipped += se->num_pages;
587 } while (skipped < skip_pages);
588
589 delta = skipped - skip_pages;
590 se_offset = se->num_pages - delta;
591
592 return se->phy_pagenum + se_offset;
593}
594
595static void ramzswap_free_page(struct ramzswap *rzs, size_t index) 161static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
596{ 162{
597 u32 clen; 163 u32 clen;
@@ -678,38 +244,12 @@ static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
678 244
679/* 245/*
680 * Called when request page is not present in ramzswap. 246 * Called when request page is not present in ramzswap.
681 * Its either in backing swap device (if present) or 247 * This is an attempt to read before any previous write
682 * this is an attempt to read before any previous write
683 * to this location - this happens due to readahead when 248 * to this location - this happens due to readahead when
684 * swap device is read from user-space (e.g. during swapon) 249 * swap device is read from user-space (e.g. during swapon)
685 */ 250 */
686static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio) 251static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
687{ 252{
688 /*
689 * Always forward such requests to backing swap
690 * device (if present)
691 */
692 if (rzs->backing_swap) {
693 u32 pagenum;
694 rzs_stat64_dec(rzs, &rzs->stats.num_reads);
695 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_reads);
696 bio->bi_bdev = rzs->backing_swap;
697
698 /*
699 * In case backing swap is a file, find the right offset within
700 * the file corresponding to logical position 'index'. For block
701 * device, this is a nop.
702 */
703 pagenum = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
704 bio->bi_sector = map_backing_swap_page(rzs, pagenum)
705 << SECTORS_PER_PAGE_SHIFT;
706 return 1;
707 }
708
709 /*
710 * Its unlikely event in case backing dev is
711 * not present
712 */
713 pr_debug("Read before write on swap device: " 253 pr_debug("Read before write on swap device: "
714 "sector=%lu, size=%u, offset=%u\n", 254 "sector=%lu, size=%u, offset=%u\n",
715 (ulong)(bio->bi_sector), bio->bi_size, 255 (ulong)(bio->bi_sector), bio->bi_size,
@@ -781,7 +321,7 @@ out:
781 321
782static int ramzswap_write(struct ramzswap *rzs, struct bio *bio) 322static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
783{ 323{
784 int ret, fwd_write_request = 0; 324 int ret;
785 u32 offset, index; 325 u32 offset, index;
786 size_t clen; 326 size_t clen;
787 struct zobj_header *zheader; 327 struct zobj_header *zheader;
@@ -817,14 +357,6 @@ static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
817 return 0; 357 return 0;
818 } 358 }
819 359
820 if (rzs->backing_swap &&
821 (rzs->stats.compr_size > rzs->memlimit - PAGE_SIZE)) {
822 kunmap_atomic(user_mem, KM_USER0);
823 mutex_unlock(&rzs->lock);
824 fwd_write_request = 1;
825 goto out;
826 }
827
828 ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen, 360 ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
829 rzs->compress_workmem); 361 rzs->compress_workmem);
830 362
@@ -838,18 +370,11 @@ static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
838 } 370 }
839 371
840 /* 372 /*
841 * Page is incompressible. Forward it to backing swap 373 * Page is incompressible. Store it as-is (uncompressed)
842 * if present. Otherwise, store it as-is (uncompressed)
843 * since we do not want to return too many swap write 374 * since we do not want to return too many swap write
844 * errors which has side effect of hanging the system. 375 * errors which has side effect of hanging the system.
845 */ 376 */
846 if (unlikely(clen > max_zpage_size)) { 377 if (unlikely(clen > max_zpage_size)) {
847 if (rzs->backing_swap) {
848 mutex_unlock(&rzs->lock);
849 fwd_write_request = 1;
850 goto out;
851 }
852
853 clen = PAGE_SIZE; 378 clen = PAGE_SIZE;
854 page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 379 page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
855 if (unlikely(!page_store)) { 380 if (unlikely(!page_store)) {
@@ -875,8 +400,6 @@ static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
875 pr_info("Error allocating memory for compressed " 400 pr_info("Error allocating memory for compressed "
876 "page: %u, size=%zu\n", index, clen); 401 "page: %u, size=%zu\n", index, clen);
877 rzs_stat64_inc(rzs, &rzs->stats.failed_writes); 402 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
878 if (rzs->backing_swap)
879 fwd_write_request = 1;
880 goto out; 403 goto out;
881 } 404 }
882 405
@@ -914,31 +437,6 @@ memstore:
914 return 0; 437 return 0;
915 438
916out: 439out:
917 if (fwd_write_request) {
918 rzs_stat64_inc(rzs, &rzs->stats.bdev_num_writes);
919 bio->bi_bdev = rzs->backing_swap;
920#if 0
921 /*
922 * TODO: We currently have linear mapping of ramzswap and
923 * backing swap sectors. This is not desired since we want
924 * to optimize writes to backing swap to minimize disk seeks
925 * or have effective wear leveling (for SSDs). Also, a
926 * non-linear mapping is required to implement compressed
927 * on-disk swapping.
928 */
929 bio->bi_sector = get_backing_swap_page()
930 << SECTORS_PER_PAGE_SHIFT;
931#endif
932 /*
933 * In case backing swap is a file, find the right offset within
934 * the file corresponding to logical position 'index'. For block
935 * device, this is a nop.
936 */
937 bio->bi_sector = map_backing_swap_page(rzs, index)
938 << SECTORS_PER_PAGE_SHIFT;
939 return 1;
940 }
941
942 bio_io_error(bio); 440 bio_io_error(bio);
943 return 0; 441 return 0;
944} 442}
@@ -996,19 +494,11 @@ static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
996 494
997static void reset_device(struct ramzswap *rzs) 495static void reset_device(struct ramzswap *rzs)
998{ 496{
999 int is_backing_blkdev = 0; 497 size_t index;
1000 size_t index, num_pages;
1001 unsigned entries_per_page;
1002 unsigned long num_table_pages, entry = 0;
1003 498
1004 /* Do not accept any new I/O request */ 499 /* Do not accept any new I/O request */
1005 rzs->init_done = 0; 500 rzs->init_done = 0;
1006 501
1007 if (rzs->backing_swap && !rzs->num_extents)
1008 is_backing_blkdev = 1;
1009
1010 num_pages = rzs->disksize >> PAGE_SHIFT;
1011
1012 /* Free various per-device buffers */ 502 /* Free various per-device buffers */
1013 kfree(rzs->compress_workmem); 503 kfree(rzs->compress_workmem);
1014 free_pages((unsigned long)rzs->compress_buffer, 1); 504 free_pages((unsigned long)rzs->compress_buffer, 1);
@@ -1017,7 +507,7 @@ static void reset_device(struct ramzswap *rzs)
1017 rzs->compress_buffer = NULL; 507 rzs->compress_buffer = NULL;
1018 508
1019 /* Free all pages that are still in this ramzswap device */ 509 /* Free all pages that are still in this ramzswap device */
1020 for (index = 0; index < num_pages; index++) { 510 for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) {
1021 struct page *page; 511 struct page *page;
1022 u16 offset; 512 u16 offset;
1023 513
@@ -1033,51 +523,16 @@ static void reset_device(struct ramzswap *rzs)
1033 xv_free(rzs->mem_pool, page, offset); 523 xv_free(rzs->mem_pool, page, offset);
1034 } 524 }
1035 525
1036 entries_per_page = PAGE_SIZE / sizeof(*rzs->table);
1037 num_table_pages = DIV_ROUND_UP(num_pages * sizeof(*rzs->table),
1038 PAGE_SIZE);
1039 /*
1040 * Set page->mapping to NULL for every table page.
1041 * Otherwise, we will hit bad_page() during free.
1042 */
1043 while (rzs->num_extents && num_table_pages--) {
1044 struct page *page;
1045 page = vmalloc_to_page(&rzs->table[entry]);
1046 page->mapping = NULL;
1047 entry += entries_per_page;
1048 }
1049 vfree(rzs->table); 526 vfree(rzs->table);
1050 rzs->table = NULL; 527 rzs->table = NULL;
1051 528
1052 xv_destroy_pool(rzs->mem_pool); 529 xv_destroy_pool(rzs->mem_pool);
1053 rzs->mem_pool = NULL; 530 rzs->mem_pool = NULL;
1054 531
1055 /* Free all swap extent pages */
1056 while (!list_empty(&rzs->backing_swap_extent_list)) {
1057 struct page *page;
1058 struct list_head *entry;
1059 entry = rzs->backing_swap_extent_list.next;
1060 page = list_entry(entry, struct page, lru);
1061 list_del(entry);
1062 __free_page(page);
1063 }
1064 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1065 rzs->num_extents = 0;
1066
1067 /* Close backing swap device, if present */
1068 if (rzs->backing_swap) {
1069 if (is_backing_blkdev)
1070 bd_release(rzs->backing_swap);
1071 filp_close(rzs->swap_file, NULL);
1072 rzs->backing_swap = NULL;
1073 memset(rzs->backing_swap_name, 0, MAX_SWAP_NAME_LEN);
1074 }
1075
1076 /* Reset stats */ 532 /* Reset stats */
1077 memset(&rzs->stats, 0, sizeof(rzs->stats)); 533 memset(&rzs->stats, 0, sizeof(rzs->stats));
1078 534
1079 rzs->disksize = 0; 535 rzs->disksize = 0;
1080 rzs->memlimit = 0;
1081} 536}
1082 537
1083static int ramzswap_ioctl_init_device(struct ramzswap *rzs) 538static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
@@ -1092,14 +547,7 @@ static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1092 return -EBUSY; 547 return -EBUSY;
1093 } 548 }
1094 549
1095 ret = setup_backing_swap(rzs); 550 ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1096 if (ret)
1097 goto fail;
1098
1099 if (rzs->backing_swap)
1100 ramzswap_set_memlimit(rzs, totalram_pages << PAGE_SHIFT);
1101 else
1102 ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
1103 551
1104 rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); 552 rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
1105 if (!rzs->compress_workmem) { 553 if (!rzs->compress_workmem) {
@@ -1126,8 +574,6 @@ static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1126 } 574 }
1127 memset(rzs->table, 0, num_pages * sizeof(*rzs->table)); 575 memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
1128 576
1129 map_backing_swap_extents(rzs);
1130
1131 page = alloc_page(__GFP_ZERO); 577 page = alloc_page(__GFP_ZERO);
1132 if (!page) { 578 if (!page) {
1133 pr_err("Error allocating swap header page\n"); 579 pr_err("Error allocating swap header page\n");
@@ -1138,23 +584,13 @@ static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1138 rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED); 584 rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
1139 585
1140 swap_header = kmap(page); 586 swap_header = kmap(page);
1141 ret = setup_swap_header(rzs, swap_header); 587 setup_swap_header(rzs, swap_header);
1142 kunmap(page); 588 kunmap(page);
1143 if (ret) {
1144 pr_err("Error setting swap header\n");
1145 goto fail;
1146 }
1147 589
1148 set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT); 590 set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
1149 591
1150 /* 592 /* ramzswap devices sort of resembles non-rotational disks */
1151 * We have ident mapping of sectors for ramzswap and 593 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1152 * and the backing swap device. So, this queue flag
1153 * should be according to backing dev.
1154 */
1155 if (!rzs->backing_swap ||
1156 blk_queue_nonrot(rzs->backing_swap->bd_disk->queue))
1157 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
1158 594
1159 rzs->mem_pool = xv_create_pool(); 595 rzs->mem_pool = xv_create_pool();
1160 if (!rzs->mem_pool) { 596 if (!rzs->mem_pool) {
@@ -1163,17 +599,6 @@ static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
1163 goto fail; 599 goto fail;
1164 } 600 }
1165 601
1166 /*
1167 * Pages that compress to size greater than this are forwarded
1168 * to physical swap disk (if backing dev is provided)
1169 * TODO: make this configurable
1170 */
1171 if (rzs->backing_swap)
1172 max_zpage_size = max_zpage_size_bdev;
1173 else
1174 max_zpage_size = max_zpage_size_nobdev;
1175 pr_debug("Max compressed page size: %u bytes\n", max_zpage_size);
1176
1177 rzs->init_done = 1; 602 rzs->init_done = 1;
1178 603
1179 pr_debug("Initialization done!\n"); 604 pr_debug("Initialization done!\n");
@@ -1198,7 +623,7 @@ static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1198 unsigned int cmd, unsigned long arg) 623 unsigned int cmd, unsigned long arg)
1199{ 624{
1200 int ret = 0; 625 int ret = 0;
1201 size_t disksize_kb, memlimit_kb; 626 size_t disksize_kb;
1202 627
1203 struct ramzswap *rzs = bdev->bd_disk->private_data; 628 struct ramzswap *rzs = bdev->bd_disk->private_data;
1204 629
@@ -1217,36 +642,6 @@ static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
1217 pr_info("Disk size set to %zu kB\n", disksize_kb); 642 pr_info("Disk size set to %zu kB\n", disksize_kb);
1218 break; 643 break;
1219 644
1220 case RZSIO_SET_MEMLIMIT_KB:
1221 if (rzs->init_done) {
1222 /* TODO: allow changing memlimit */
1223 ret = -EBUSY;
1224 goto out;
1225 }
1226 if (copy_from_user(&memlimit_kb, (void *)arg,
1227 _IOC_SIZE(cmd))) {
1228 ret = -EFAULT;
1229 goto out;
1230 }
1231 rzs->memlimit = memlimit_kb << 10;
1232 pr_info("Memory limit set to %zu kB\n", memlimit_kb);
1233 break;
1234
1235 case RZSIO_SET_BACKING_SWAP:
1236 if (rzs->init_done) {
1237 ret = -EBUSY;
1238 goto out;
1239 }
1240
1241 if (copy_from_user(&rzs->backing_swap_name, (void *)arg,
1242 _IOC_SIZE(cmd))) {
1243 ret = -EFAULT;
1244 goto out;
1245 }
1246 rzs->backing_swap_name[MAX_SWAP_NAME_LEN - 1] = '\0';
1247 pr_info("Backing swap set to %s\n", rzs->backing_swap_name);
1248 break;
1249
1250 case RZSIO_GET_STATS: 645 case RZSIO_GET_STATS:
1251 { 646 {
1252 struct ramzswap_ioctl_stats *stats; 647 struct ramzswap_ioctl_stats *stats;
@@ -1306,7 +701,6 @@ static int create_device(struct ramzswap *rzs, int device_id)
1306 701
1307 mutex_init(&rzs->lock); 702 mutex_init(&rzs->lock);
1308 spin_lock_init(&rzs->stat64_lock); 703 spin_lock_init(&rzs->stat64_lock);
1309 INIT_LIST_HEAD(&rzs->backing_swap_extent_list);
1310 704
1311 rzs->queue = blk_alloc_queue(GFP_KERNEL); 705 rzs->queue = blk_alloc_queue(GFP_KERNEL);
1312 if (!rzs->queue) { 706 if (!rzs->queue) {
@@ -1336,10 +730,7 @@ static int create_device(struct ramzswap *rzs, int device_id)
1336 rzs->disk->private_data = rzs; 730 rzs->disk->private_data = rzs;
1337 snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id); 731 snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
1338 732
1339 /* 733 /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */
1340 * Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl
1341 * or set equal to backing swap device (if provided)
1342 */
1343 set_capacity(rzs->disk, 0); 734 set_capacity(rzs->disk, 0);
1344 735
1345 blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE); 736 blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
diff --git a/drivers/staging/ramzswap/ramzswap_drv.h b/drivers/staging/ramzswap/ramzswap_drv.h
index c7e0e767c22..63c30420df2 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.h
+++ b/drivers/staging/ramzswap/ramzswap_drv.h
@@ -31,8 +31,7 @@ static const unsigned max_num_devices = 32;
31 * Stored at beginning of each compressed object. 31 * Stored at beginning of each compressed object.
32 * 32 *
33 * It stores back-reference to table entry which points to this 33 * It stores back-reference to table entry which points to this
34 * object. This is required to support memory defragmentation or 34 * object. This is required to support memory defragmentation.
35 * migrating compressed pages to backing swap disk.
36 */ 35 */
37struct zobj_header { 36struct zobj_header {
38#if 0 37#if 0
@@ -44,27 +43,17 @@ struct zobj_header {
44 43
45/* Default ramzswap disk size: 25% of total RAM */ 44/* Default ramzswap disk size: 25% of total RAM */
46static const unsigned default_disksize_perc_ram = 25; 45static const unsigned default_disksize_perc_ram = 25;
47static const unsigned default_memlimit_perc_ram = 15;
48 46
49/* 47/*
50 * Max compressed page size when backing device is provided.
51 * Pages that compress to size greater than this are sent to
52 * physical swap disk.
53 */
54static const unsigned max_zpage_size_bdev = PAGE_SIZE / 2;
55
56/*
57 * Max compressed page size when there is no backing dev.
58 * Pages that compress to size greater than this are stored 48 * Pages that compress to size greater than this are stored
59 * uncompressed in memory. 49 * uncompressed in memory.
60 */ 50 */
61static const unsigned max_zpage_size_nobdev = PAGE_SIZE / 4 * 3; 51static const unsigned max_zpage_size = PAGE_SIZE / 4 * 3;
62 52
63/* 53/*
64 * NOTE: max_zpage_size_{bdev,nobdev} sizes must be 54 * NOTE: max_zpage_size must be less than or equal to:
65 * less than or equal to:
66 * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header) 55 * XV_MAX_ALLOC_SIZE - sizeof(struct zobj_header)
67 * since otherwise xv_malloc would always return failure. 56 * otherwise, xv_malloc() would always return failure.
68 */ 57 */
69 58
70/*-- End of configurable params */ 59/*-- End of configurable params */
@@ -98,15 +87,6 @@ struct table {
98 u8 flags; 87 u8 flags;
99} __attribute__((aligned(4))); 88} __attribute__((aligned(4)));
100 89
101/*
102 * Swap extent information in case backing swap is a regular
103 * file. These extent entries must fit exactly in a page.
104 */
105struct ramzswap_backing_extent {
106 pgoff_t phy_pagenum;
107 pgoff_t num_pages;
108} __attribute__((aligned(4)));
109
110struct ramzswap_stats { 90struct ramzswap_stats {
111 /* basic stats */ 91 /* basic stats */
112 size_t compr_size; /* compressed size of pages stored - 92 size_t compr_size; /* compressed size of pages stored -
@@ -123,8 +103,6 @@ struct ramzswap_stats {
123 u32 pages_stored; /* no. of pages currently stored */ 103 u32 pages_stored; /* no. of pages currently stored */
124 u32 good_compress; /* % of pages with compression ratio<=50% */ 104 u32 good_compress; /* % of pages with compression ratio<=50% */
125 u32 pages_expand; /* % of incompressible pages */ 105 u32 pages_expand; /* % of incompressible pages */
126 u64 bdev_num_reads; /* no. of reads on backing dev */
127 u64 bdev_num_writes; /* no. of writes on backing dev */
128#endif 106#endif
129}; 107};
130 108
@@ -139,11 +117,6 @@ struct ramzswap {
139 struct gendisk *disk; 117 struct gendisk *disk;
140 int init_done; 118 int init_done;
141 /* 119 /*
142 * This is limit on compressed data size (stats.compr_size)
143 * Its applicable only when backing swap device is present.
144 */
145 size_t memlimit; /* bytes */
146 /*
147 * This is limit on amount of *uncompressed* worth of data 120 * This is limit on amount of *uncompressed* worth of data
148 * we can hold. When backing swap device is provided, it is 121 * we can hold. When backing swap device is provided, it is
149 * set equal to device size. 122 * set equal to device size.
@@ -151,14 +124,6 @@ struct ramzswap {
151 size_t disksize; /* bytes */ 124 size_t disksize; /* bytes */
152 125
153 struct ramzswap_stats stats; 126 struct ramzswap_stats stats;
154
155 /* backing swap device info */
156 struct ramzswap_backing_extent *curr_extent;
157 struct list_head backing_swap_extent_list;
158 unsigned long num_extents;
159 char backing_swap_name[MAX_SWAP_NAME_LEN];
160 struct block_device *backing_swap;
161 struct file *swap_file;
162}; 127};
163 128
164/*-- */ 129/*-- */
@@ -182,13 +147,6 @@ static void rzs_stat64_inc(struct ramzswap *rzs, u64 *v)
182 spin_unlock(&rzs->stat64_lock); 147 spin_unlock(&rzs->stat64_lock);
183} 148}
184 149
185static void rzs_stat64_dec(struct ramzswap *rzs, u64 *v)
186{
187 spin_lock(&rzs->stat64_lock);
188 *v = *v - 1;
189 spin_unlock(&rzs->stat64_lock);
190}
191
192static u64 rzs_stat64_read(struct ramzswap *rzs, u64 *v) 150static u64 rzs_stat64_read(struct ramzswap *rzs, u64 *v)
193{ 151{
194 u64 val; 152 u64 val;
@@ -203,7 +161,6 @@ static u64 rzs_stat64_read(struct ramzswap *rzs, u64 *v)
203#define rzs_stat_inc(v) 161#define rzs_stat_inc(v)
204#define rzs_stat_dec(v) 162#define rzs_stat_dec(v)
205#define rzs_stat64_inc(r, v) 163#define rzs_stat64_inc(r, v)
206#define rzs_stat64_dec(r, v)
207#define rzs_stat64_read(r, v) 164#define rzs_stat64_read(r, v)
208#endif /* CONFIG_RAMZSWAP_STATS */ 165#endif /* CONFIG_RAMZSWAP_STATS */
209 166
diff --git a/drivers/staging/ramzswap/ramzswap_ioctl.h b/drivers/staging/ramzswap/ramzswap_ioctl.h
index d26076d41bd..db94bcb4296 100644
--- a/drivers/staging/ramzswap/ramzswap_ioctl.h
+++ b/drivers/staging/ramzswap/ramzswap_ioctl.h
@@ -15,11 +15,7 @@
15#ifndef _RAMZSWAP_IOCTL_H_ 15#ifndef _RAMZSWAP_IOCTL_H_
16#define _RAMZSWAP_IOCTL_H_ 16#define _RAMZSWAP_IOCTL_H_
17 17
18#define MAX_SWAP_NAME_LEN 128
19
20struct ramzswap_ioctl_stats { 18struct ramzswap_ioctl_stats {
21 char backing_swap_name[MAX_SWAP_NAME_LEN];
22 u64 memlimit; /* only applicable if backing swap present */
23 u64 disksize; /* user specified or equal to backing swap 19 u64 disksize; /* user specified or equal to backing swap
24 * size (if present) */ 20 * size (if present) */
25 u64 num_reads; /* failed + successful */ 21 u64 num_reads; /* failed + successful */
@@ -36,15 +32,11 @@ struct ramzswap_ioctl_stats {
36 u64 orig_data_size; 32 u64 orig_data_size;
37 u64 compr_data_size; 33 u64 compr_data_size;
38 u64 mem_used_total; 34 u64 mem_used_total;
39 u64 bdev_num_reads; /* no. of reads on backing dev */
40 u64 bdev_num_writes; /* no. of writes on backing dev */
41} __attribute__ ((packed, aligned(4))); 35} __attribute__ ((packed, aligned(4)));
42 36
43#define RZSIO_SET_DISKSIZE_KB _IOW('z', 0, size_t) 37#define RZSIO_SET_DISKSIZE_KB _IOW('z', 0, size_t)
44#define RZSIO_SET_MEMLIMIT_KB _IOW('z', 1, size_t) 38#define RZSIO_GET_STATS _IOR('z', 1, struct ramzswap_ioctl_stats)
45#define RZSIO_SET_BACKING_SWAP _IOW('z', 2, unsigned char[MAX_SWAP_NAME_LEN]) 39#define RZSIO_INIT _IO('z', 2)
46#define RZSIO_GET_STATS _IOR('z', 3, struct ramzswap_ioctl_stats) 40#define RZSIO_RESET _IO('z', 3)
47#define RZSIO_INIT _IO('z', 4)
48#define RZSIO_RESET _IO('z', 5)
49 41
50#endif 42#endif