drivers/gpu/nvgpu/os/linux/swap.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

#include <linux/scatterlist.h>
#include <linux/bio.h>
//#include <nvgpu/bug.h>

// Callback for completion of the I/O chain
static void complete_swap_io(struct bio *bio) {
  struct nvgpu_mapped_buf *m = bio->bi_private;
  bio_put(bio);
  complete(&m->os_priv.swap_io_done);
}

// Queue a command to copy out an SGT to disk
// TODO: Cache bdev
// TODO: Don't hardcode sector 0
// TODO: Figure out if submit_bio() can fail, and what to do then
int copy(struct sg_table *sgt, int op, struct nvgpu_mapped_buf *m) {
  unsigned int i;
  struct scatterlist *sg;
  struct bio *bio;
  int err = 0;
  int sg_cnt = sgt->nents;
  sector_t sector = 0; // XXX: For testing
  // Find and open the block device
  struct block_device *bdev = blkdev_get_by_path("/dev/nvme0n1", FMODE_READ | FMODE_WRITE, copy);
  if (unlikely(IS_ERR(bdev))) {
    printk(KERN_WARNING "Unabled to find `nvme0`, err %ld!\n", PTR_ERR(bdev));
    return -ENODEV;
  }
  // Reset the .done variable in the completion
  reinit_completion(&m->os_priv.swap_io_done);
  // bio_alloc() will never fail when allocating <= BIO_MAX_PAGES
  bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
  bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels
  bio->bi_iter.bi_sector = sector;
  bio_set_op_attrs(bio, op, REQ_SYNC); // REQ_SYNC is identical to WRITE_ODIRECT
  bio->bi_private = m;
  bio->bi_end_io = complete_swap_io;
  // Copy the scatter-gather table (sgt) into a block I/O vector (bio vec)
  // bio_chain() approach borrowed from drivers/nvme/target/io-cmd.c:nvmet_execute_rw()
  for_each_sg(sgt->sgl, sg, sgt->nents, i) {
    // On most iterations, this inner loop shouldn't happen at all. This loop
    // conditional only triggers if we fill up the bio and are unable to map
    // the full length of an SGL entry.
    while (bio_add_page(bio, sg_page(sg), sg_dma_len(sg), sg->offset) != sg_dma_len(sg)) {
      // Uh oh! We ran out of space in the bio. Allocate a new one and chain it...
      struct bio *prev = bio;
      bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
      bio->bi_bdev = bdev; // Switch to bio_set_dev(bdev) in newer kernels
      bio->bi_iter.bi_sector = sector;
      bio_set_op_attrs(bio, op, op == REQ_OP_WRITE ? WRITE_ODIRECT : 0);
      bio_chain(bio, prev);
      // Get the I/O started
      submit_bio(prev);
      // No need to call bio_put() as that's automatically managed for chained bios
    }
    sector += sg_dma_len(sg) >> 9;
    sg_cnt--;
  }

  // Async submit. Caller should wait_for_completion_io(&m->os_priv.swap_io_done);
  submit_bio(bio);

  // Release our block device handle
  blkdev_put(bdev, FMODE_WRITE | FMODE_READ); // Is this safe?
  return err;
}

// Patterned off how __nvgpu_vm_find_mapped_buf_reverse() works in vm.c
// Needs struct nvgpu_rbtree_node *node, struct nvgpu_rbtree_node *root,
// and struct nvgpu_mapped_buf *m.
// Steps until end of rbtree OR !m
#define for_each_buffer(node, root, m) \
  for (nvgpu_rbtree_enum_start(0, &node, root); \
       node && (uintptr_t)(m = mapped_buffer_from_rbtree_node(node)); \
       nvgpu_rbtree_enum_next(&node, node))

// New, fast replacement to looking through with the above macro to match
struct nvgpu_mapped_buf* dmabuf_to_mapped_buf(struct dma_buf *dmabuf) {
  struct list_head *nvmap_priv = nvmap_get_priv_list(dmabuf);
  struct nvgpu_mapped_buf *mapped_buffer;
  struct nvgpu_mapped_buf_priv *priv;

  if (IS_ERR(nvmap_priv))
    return ERR_PTR(-EOPNOTSUPP);

  priv = list_first_entry_or_null(nvmap_priv, struct nvgpu_mapped_buf_priv, nvmap_priv_entry);
  if (unlikely(!priv)) {
    printk(KERN_ERR "nvgpu: State tracking error for fast reverse lookups. Have unattached dmabuf!");
    return ERR_PTR(-ENOTRECOVERABLE);
  }

  mapped_buffer = container_of(priv, struct nvgpu_mapped_buf, os_priv);
  if (unlikely(mapped_buffer->os_priv.dmabuf != dmabuf)) {
    printk(KERN_ERR "nvgpu: dmabuf_to_mapped_buf mapping inconsistent! BUG!\n");
    return ERR_PTR(-ENOTRECOVERABLE);
  }
  if (!list_is_singular(&priv->nvmap_priv_entry)) {
    printk(KERN_WARNING "nvgpu: Requesting paging on memory with multiple mappings! Aborting...\n");
    return ERR_PTR(-EOPNOTSUPP);
  }
  return mapped_buffer;
}

int copy_all(struct vm_gk20a *vm) {
	struct nvgpu_rbtree_node *node;
	struct nvgpu_mapped_buf *m;

	for_each_buffer(node, vm->mapped_buffers, m) {
		// TODO
		continue;
	}
	return 0;
}

int copy_out(struct sg_table *sgt, struct nvgpu_mapped_buf *m) {
  return copy(sgt, REQ_OP_WRITE, m);
}

int copy_in(struct sg_table *sgt, struct nvgpu_mapped_buf *m) {
  return copy(sgt, REQ_OP_READ, m);
}