summaryrefslogtreecommitdiff
path: root/drivers/block/brd.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/brd.c')
-rw-r--r--drivers/block/brd.c356
1 files changed, 153 insertions, 203 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 970bd6ff38c4..b1be6c510372 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -29,10 +29,7 @@
/*
* Each block ramdisk device has a xarray brd_pages of pages that stores
- * the pages containing the block device's contents. A brd page's ->index is
- * its offset in PAGE_SIZE units. This is similar to, but in no way connected
- * with, the kernel's pagecache or buffer cache (which sit above our block
- * device).
+ * the pages containing the block device's contents.
*/
struct brd_device {
int brd_number;
@@ -51,53 +48,39 @@ struct brd_device {
*/
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
{
- pgoff_t idx;
- struct page *page;
-
- idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
- page = xa_load(&brd->brd_pages, idx);
-
- BUG_ON(page && page->index != idx);
-
- return page;
+ return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
}
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
+static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
+ blk_opf_t opf)
+ __releases(rcu)
+ __acquires(rcu)
{
- pgoff_t idx;
- struct page *page, *cur;
- int ret = 0;
-
- page = brd_lookup_page(brd, sector);
- if (page)
- return 0;
+ gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
+ struct page *page, *ret;
+ rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
+ rcu_read_lock();
if (!page)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
xa_lock(&brd->brd_pages);
-
- idx = sector >> PAGE_SECTORS_SHIFT;
- page->index = idx;
-
- cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp);
-
- if (unlikely(cur)) {
+ ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
+ page, gfp);
+ if (ret) {
+ xa_unlock(&brd->brd_pages);
__free_page(page);
- ret = xa_err(cur);
- if (!ret && (cur->index != idx))
- ret = -EIO;
- } else {
- brd->brd_nr_pages++;
+ if (xa_is_err(ret))
+ return ERR_PTR(xa_err(ret));
+ return ret;
}
-
+ brd->brd_nr_pages++;
xa_unlock(&brd->brd_pages);
-
- return ret;
+ return page;
}
/*
@@ -118,156 +101,97 @@ static void brd_free_pages(struct brd_device *brd)
}
/*
- * copy_to_brd_setup must be called before copy_to_brd. It may sleep.
- */
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
- gfp_t gfp)
-{
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
- int ret;
-
- copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector, gfp);
- if (ret)
- return ret;
- if (copy < n) {
- sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector, gfp);
- }
- return ret;
-}
-
-/*
- * Copy n bytes from src to the brd starting at sector. Does not sleep.
+ * Process a single segment. The segment is capped to not cross page boundaries
+ * in both the bio and the brd backing memory.
*/
-static void copy_to_brd(struct brd_device *brd, const void *src,
- sector_t sector, size_t n)
+static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
{
+ struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
+ sector_t sector = bio->bi_iter.bi_sector;
+ u32 offset = (sector & (PAGE_SECTORS - 1)) << SECTOR_SHIFT;
+ blk_opf_t opf = bio->bi_opf;
struct page *page;
- void *dst;
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
+ void *kaddr;
+
+ bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
- copy = min_t(size_t, n, PAGE_SIZE - offset);
+ rcu_read_lock();
page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst + offset, src, copy);
- kunmap_atomic(dst);
-
- if (copy < n) {
- src += copy;
- sector += copy >> SECTOR_SHIFT;
- copy = n - copy;
- page = brd_lookup_page(brd, sector);
- BUG_ON(!page);
-
- dst = kmap_atomic(page);
- memcpy(dst, src, copy);
- kunmap_atomic(dst);
+ if (!page && op_is_write(opf)) {
+ page = brd_insert_page(brd, sector, opf);
+ if (IS_ERR(page))
+ goto out_error;
+ }
+
+ kaddr = bvec_kmap_local(&bv);
+ if (op_is_write(opf)) {
+ memcpy_to_page(page, offset, kaddr, bv.bv_len);
+ } else {
+ if (page)
+ memcpy_from_page(kaddr, page, offset, bv.bv_len);
+ else
+ memset(kaddr, 0, bv.bv_len);
}
+ kunmap_local(kaddr);
+ rcu_read_unlock();
+
+ bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+ return true;
+
+out_error:
+ rcu_read_unlock();
+ if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
+ return false;
}
-/*
- * Copy n bytes to dst from the brd starting at sector. Does not sleep.
- */
-static void copy_from_brd(void *dst, struct brd_device *brd,
- sector_t sector, size_t n)
+static void brd_free_one_page(struct rcu_head *head)
{
- struct page *page;
- void *src;
- unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
- size_t copy;
+ struct page *page = container_of(head, struct page, rcu_head);
- copy = min_t(size_t, n, PAGE_SIZE - offset);
- page = brd_lookup_page(brd, sector);
- if (page) {
- src = kmap_atomic(page);
- memcpy(dst, src + offset, copy);
- kunmap_atomic(src);
- } else
- memset(dst, 0, copy);
-
- if (copy < n) {
- dst += copy;
- sector += copy >> SECTOR_SHIFT;
- copy = n - copy;
- page = brd_lookup_page(brd, sector);
- if (page) {
- src = kmap_atomic(page);
- memcpy(dst, src, copy);
- kunmap_atomic(src);
- } else
- memset(dst, 0, copy);
- }
+ __free_page(page);
}
-/*
- * Process a single bvec of a bio.
- */
-static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, blk_opf_t opf,
- sector_t sector)
+static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
{
- void *mem;
- int err = 0;
-
- if (op_is_write(opf)) {
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+ sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
+ sector_t aligned_end = round_down(
+ sector + (size >> SECTOR_SHIFT), PAGE_SECTORS);
+ struct page *page;
- err = copy_to_brd_setup(brd, sector, len, gfp);
- if (err)
- goto out;
- }
+ if (aligned_end <= aligned_sector)
+ return;
- mem = kmap_atomic(page);
- if (!op_is_write(opf)) {
- copy_from_brd(mem + off, brd, sector, len);
- flush_dcache_page(page);
- } else {
- flush_dcache_page(page);
- copy_to_brd(brd, mem + off, sector, len);
+ xa_lock(&brd->brd_pages);
+ while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
+ page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
+ if (page) {
+ call_rcu(&page->rcu_head, brd_free_one_page);
+ brd->brd_nr_pages--;
+ }
+ aligned_sector += PAGE_SECTORS;
}
- kunmap_atomic(mem);
-
-out:
- return err;
+ xa_unlock(&brd->brd_pages);
}
static void brd_submit_bio(struct bio *bio)
{
struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
- sector_t sector = bio->bi_iter.bi_sector;
- struct bio_vec bvec;
- struct bvec_iter iter;
-
- bio_for_each_segment(bvec, bio, iter) {
- unsigned int len = bvec.bv_len;
- int err;
-
- /* Don't support un-aligned buffer */
- WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
- (len & (SECTOR_SIZE - 1)));
-
- err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio->bi_opf, sector);
- if (err) {
- if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
- bio_wouldblock_error(bio);
- return;
- }
- bio_io_error(bio);
- return;
- }
- sector += len >> SECTOR_SHIFT;
+
+ if (unlikely(op_is_discard(bio->bi_opf))) {
+ brd_do_discard(brd, bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size);
+ bio_endio(bio);
+ return;
}
+ do {
+ if (!brd_rw_bvec(brd, bio))
+ return;
+ } while (bio->bi_iter.bi_size);
+
bio_endio(bio);
}
@@ -291,6 +215,7 @@ static int max_part = 1;
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+MODULE_DESCRIPTION("Ram backed block device driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
MODULE_ALIAS("rd");
@@ -310,23 +235,65 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
+static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
+static struct brd_device *brd_find_or_alloc_device(int i)
+{
+ struct brd_device *brd;
+
+ mutex_lock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list) {
+ if (brd->brd_number == i) {
+ mutex_unlock(&brd_devices_mutex);
+ return ERR_PTR(-EEXIST);
+ }
+ }
+
+ brd = kzalloc(sizeof(*brd), GFP_KERNEL);
+ if (!brd) {
+ mutex_unlock(&brd_devices_mutex);
+ return ERR_PTR(-ENOMEM);
+ }
+ brd->brd_number = i;
+ list_add_tail(&brd->brd_list, &brd_devices);
+ mutex_unlock(&brd_devices_mutex);
+ return brd;
+}
+
+static void brd_free_device(struct brd_device *brd)
+{
+ mutex_lock(&brd_devices_mutex);
+ list_del(&brd->brd_list);
+ mutex_unlock(&brd_devices_mutex);
+ kfree(brd);
+}
+
static int brd_alloc(int i)
{
struct brd_device *brd;
struct gendisk *disk;
char buf[DISK_NAME_LEN];
int err = -ENOMEM;
-
- list_for_each_entry(brd, &brd_devices, brd_list)
- if (brd->brd_number == i)
- return -EEXIST;
- brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd)
- return -ENOMEM;
- brd->brd_number = i;
- list_add_tail(&brd->brd_list, &brd_devices);
+ struct queue_limits lim = {
+ /*
+ * This is so fdisk will align partitions on 4k, because of
+ * direct_access API needing 4k alignment, returning a PFN
+ * (This is only a problem on very small devices <= 4M,
+ * otherwise fdisk will align on 1M. Regardless this call
+ * is harmless)
+ */
+ .physical_block_size = PAGE_SIZE,
+ .max_hw_discard_sectors = UINT_MAX,
+ .max_discard_segments = 1,
+ .discard_granularity = PAGE_SIZE,
+ .features = BLK_FEAT_SYNCHRONOUS |
+ BLK_FEAT_NOWAIT,
+ };
+
+ brd = brd_find_or_alloc_device(i);
+ if (IS_ERR(brd))
+ return PTR_ERR(brd);
xa_init(&brd->brd_pages);
@@ -335,10 +302,11 @@ static int brd_alloc(int i)
debugfs_create_u64(buf, 0444, brd_debugfs_dir,
&brd->brd_nr_pages);
- disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!disk)
+ disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(disk)) {
+ err = PTR_ERR(disk);
goto out_free_dev;
-
+ }
disk->major = RAMDISK_MAJOR;
disk->first_minor = i * max_part;
disk->minors = max_part;
@@ -347,19 +315,6 @@ static int brd_alloc(int i)
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
set_capacity(disk, rd_size * 2);
- /*
- * This is so fdisk will align partitions on 4k, because of
- * direct_access API needing 4k alignment, returning a PFN
- * (This is only a problem on very small devices <= 4M,
- * otherwise fdisk will align on 1M. Regardless this call
- * is harmless)
- */
- blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
-
- /* Tell the block layer that this is not a rotational device */
- blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
- blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
@@ -369,8 +324,7 @@ static int brd_alloc(int i)
out_cleanup_disk:
put_disk(disk);
out_free_dev:
- list_del(&brd->brd_list);
- kfree(brd);
+ brd_free_device(brd);
return err;
}
@@ -389,8 +343,7 @@ static void brd_cleanup(void)
del_gendisk(brd->brd_disk);
put_disk(brd->brd_disk);
brd_free_pages(brd);
- list_del(&brd->brd_list);
- kfree(brd);
+ brd_free_device(brd);
}
}
@@ -417,16 +370,6 @@ static int __init brd_init(void)
{
int err, i;
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
- }
-
/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
@@ -442,11 +385,18 @@ static int __init brd_init(void)
* dynamically.
*/
+ brd_check_and_reset_par();
+
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
+
if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
err = -EIO;
goto out_free;
}
+ for (i = 0; i < rd_nr; i++)
+ brd_alloc(i);
+
pr_info("brd: module loaded\n");
return 0;