summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/blkio-controller.txt27
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c40
-rw-r--r--block/blk-ioc.c5
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/cfq-iosched.c112
-rw-r--r--block/genhd.c550
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/block/drbd/drbd_int.h2
-rw-r--r--drivers/block/drbd/drbd_main.c7
-rw-r--r--drivers/block/drbd/drbd_nl.c103
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/pktcdvd.c22
-rw-r--r--drivers/cdrom/cdrom.c56
-rw-r--r--drivers/char/raw.c14
-rw-r--r--drivers/md/dm-table.c20
-rw-r--r--drivers/md/dm.c6
-rw-r--r--drivers/md/md.c16
-rw-r--r--drivers/mtd/devices/block2mtd.c10
-rw-r--r--drivers/s390/block/dasd_genhd.c2
-rw-r--r--drivers/scsi/scsi_lib.c13
-rw-r--r--drivers/scsi/sd.c10
-rw-r--r--drivers/scsi/sr.c174
-rw-r--r--drivers/scsi/sr.h3
-rw-r--r--drivers/scsi/sr_ioctl.c2
-rw-r--r--drivers/usb/gadget/storage_common.c7
-rw-r--r--fs/bio-integrity.c7
-rw-r--r--fs/block_dev.c741
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/jfs/jfs_logmgr.c17
-rw-r--r--fs/logfs/dev_bdev.c7
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/partitions/check.c106
-rw-r--r--fs/reiserfs/journal.c21
-rw-r--r--fs/splice.c43
-rw-r--r--fs/super.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/cdrom.h6
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/genhd.h45
-rw-r--r--include/scsi/scsi.h1
-rw-r--r--include/trace/events/block.h12
-rw-r--r--kernel/power/swap.c5
-rw-r--r--kernel/trace/blktrace.c37
-rw-r--r--mm/swapfile.c7
53 files changed, 1323 insertions, 1085 deletions
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
index d6da611f8f63..4ed7b5ceeed2 100644
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy
Limits for writes can be put using blkio.write_bps_device file.
+Hierarchical Cgroups
+====================
+- Currently none of the IO control policy supports hierarhical groups. But
+ cgroup interface does allow creation of hierarhical cgroups and internally
+ IO policies treat them as flat hierarchy.
+
+ So this patch will allow creation of cgroup hierarhcy but at the backend
+ everything will be treated as flat. So if somebody created a hierarchy like
+ as follows.
+
+ root
+ / \
+ test1 test2
+ |
+ test3
+
+ CFQ and throttling will practically treat all groups at same level.
+
+ pivot
+ / | \ \
+ root test1 test2 test3
+
+ Down the line we can implement hierarchical accounting/control support
+ and also introduce a new cgroup file "use_hierarchy" which will control
+ whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
+ This is how memory controller also has implemented the things.
+
Various user visible config options
===================================
CONFIG_BLK_CGROUP
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b1febd0f6d2a..455768a3eb9e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
goto done;
}
- /* Currently we do not support hierarchy deeper than two level (0,1) */
- if (parent != cgroup->top_cgroup)
- return ERR_PTR(-EPERM);
-
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
if (!blkcg)
return ERR_PTR(-ENOMEM);
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ce953f1b390..2f4002f79a24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,7 +33,7 @@
#include "blk.h"
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
return;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
- if (!new_io)
+ if (!new_io) {
+ part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
- else {
+ } else {
+ part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+ if (!hd_struct_try_get(part)) {
+ /*
+ * The partition is already being removed,
+ * the request will be accounted on the disk only
+ *
+ * We take a reference on disk->part0 although that
+ * partition will never be deleted, so we can treat
+ * it as any other partition.
+ */
+ part = &rq->rq_disk->part0;
+ hd_struct_get(part);
+ }
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
+ rq->part = part;
}
part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->ref_count = 1;
rq->start_time = jiffies;
set_start_time_ns(rq);
+ rq->part = NULL;
}
EXPORT_SYMBOL(blk_rq_init);
@@ -1329,9 +1344,9 @@ static inline void blk_partition_remap(struct bio *bio)
bio->bi_sector += p->start_sect;
bio->bi_bdev = bdev->bd_contains;
- trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
- bdev->bd_dev,
- bio->bi_sector - p->start_sect);
+ trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
+ bdev->bd_dev,
+ bio->bi_sector - p->start_sect);
}
}
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
goto end_io;
if (old_sector != -1)
- trace_block_remap(q, bio, old_dev, old_sector);
+ trace_block_bio_remap(q, bio, old_dev, old_sector);
old_sector = bio->bi_sector;
old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_stat_add(cpu, part, sectors[rw], bytes >> 9);
part_stat_unlock();
}
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_stat_inc(cpu, part, ios[rw]);
part_stat_add(cpu, part, ticks[rw], duration);
part_round_stats(cpu, part);
part_dec_in_flight(part, rw);
+ hd_struct_put(part);
part_stat_unlock();
}
}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
sizeof(((struct request *)0)->cmd_flags));
- kblockd_workqueue = create_workqueue("kblockd");
+ /* used for unplugging and affects IO latency/throughput - HIGHPRI */
+ kblockd_workqueue = alloc_workqueue("kblockd",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 3c7a339fe381..b791022beef3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
rcu_read_unlock();
}
-/* Called by the exitting task */
+/* Called by the exiting task */
void exit_io_context(struct task_struct *task)
{
struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
task->io_context = NULL;
task_unlock(task);
- if (atomic_dec_and_test(&ioc->nr_tasks)) {
+ if (atomic_dec_and_test(&ioc->nr_tasks))
cfq_exit(ioc);
- }
put_io_context(ioc);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 74bc4a768f32..ea85e20d5e94 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
int cpu;
cpu = part_stat_lock();
- part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
+ part = req->part;
part_round_stats(cpu, part);
part_dec_in_flight(part, rq_data_dir(req));
+ hd_struct_put(part);
part_stat_unlock();
}
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 78ee4b1d4e85..8427697c5437 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -87,7 +87,6 @@ struct cfq_rb_root {
unsigned count;
unsigned total_weight;
u64 min_vdisktime;
- struct rb_node *active;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, .left = NULL, \
.count = 0, .min_vdisktime = 0, }
@@ -97,7 +96,7 @@ struct cfq_rb_root {
*/
struct cfq_queue {
/* reference count */
- atomic_t ref;
+ int ref;
/* various state flags, see below */
unsigned int flags;
/* parent cfq_data */
@@ -180,7 +179,6 @@ struct cfq_group {
/* group service_tree key */
u64 vdisktime;
unsigned int weight;
- bool on_st;
/* number of cfqq currently on this group */
int nr_cfqq;
@@ -209,7 +207,7 @@ struct cfq_group {
struct blkio_group blkg;
#ifdef CONFIG_CFQ_GROUP_IOSCHED
struct hlist_node cfqd_node;
- atomic_t ref;
+ int ref;
#endif
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -563,11 +561,6 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
u64 vdisktime = st->min_vdisktime;
struct cfq_group *cfqg;
- if (st->active) {
- cfqg = rb_entry_cfqg(st->active);
- vdisktime = cfqg->vdisktime;
- }
-
if (st->left) {
cfqg = rb_entry_cfqg(st->left);
vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime);
@@ -646,11 +639,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
- return 0;
+ return false;
if (time_before(jiffies, cfqq->slice_end))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
@@ -869,7 +862,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
struct rb_node *n;
cfqg->nr_cfqq++;
- if (cfqg->on_st)
+ if (!RB_EMPTY_NODE(&cfqg->rb_node))
return;
/*
@@ -885,7 +878,6 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqg->vdisktime = st->min_vdisktime;
__cfq_group_service_tree_add(st, cfqg);
- cfqg->on_st = true;
st->total_weight += cfqg->weight;
}
@@ -894,9 +886,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
- if (st->active == &cfqg->rb_node)
- st->active = NULL;
-
BUG_ON(cfqg->nr_cfqq < 1);
cfqg->nr_cfqq--;
@@ -905,7 +894,6 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;
cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
- cfqg->on_st = false;
st->total_weight -= cfqg->weight;
if (!RB_EMPTY_NODE(&cfqg->rb_node))
cfq_rb_erase(&cfqg->rb_node, st);
@@ -1026,7 +1014,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
* elevator which will be dropped by either elevator exit
* or cgroup deletion path depending on who is exiting first.
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
/*
* Add group onto cgroup list. It might happen that bdi->dev is
@@ -1071,7 +1059,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
{
- atomic_inc(&cfqg->ref);
+ cfqg->ref++;
return cfqg;
}
@@ -1083,7 +1071,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
cfqq->cfqg = cfqg;
/* cfqq reference on cfqg */
- atomic_inc(&cfqq->cfqg->ref);
+ cfqq->cfqg->ref++;
}
static void cfq_put_cfqg(struct cfq_group *cfqg)
@@ -1091,11 +1079,12 @@ static void cfq_put_cfqg(struct cfq_group *cfqg)
struct cfq_rb_root *st;
int i, j;
- BUG_ON(atomic_read(&cfqg->ref) <= 0);
- if (!atomic_dec_and_test(&cfqg->ref))
+ BUG_ON(cfqg->ref <= 0);
+ cfqg->ref--;
+ if (cfqg->ref)
return;
for_each_cfqg_st(cfqg, i, j, st)
- BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+ BUG_ON(!RB_EMPTY_ROOT(&st->rb));
kfree(cfqg);
}
@@ -1200,7 +1189,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfq_group_service_tree_del(cfqd, cfqq->cfqg);
cfqq->orig_cfqg = cfqq->cfqg;
cfqq->cfqg = &cfqd->root_group;
- atomic_inc(&cfqd->root_group.ref);
+ cfqd->root_group.ref++;
group_changed = 1;
} else if (!cfqd->cfq_group_isolation
&& cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) {
@@ -1687,9 +1676,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfqq == cfqd->active_queue)
cfqd->active_queue = NULL;
- if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
- cfqd->grp_service_tree.active = NULL;
-
if (cfqd->active_cic) {
put_io_context(cfqd->active_cic->ioc);
cfqd->active_cic = NULL;
@@ -1901,10 +1887,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* in their service tree.
*/
if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
- return 1;
+ return true;
cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
service_tree->count);
- return 0;
+ return false;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -2040,7 +2026,7 @@ static int cfqq_process_refs(struct cfq_queue *cfqq)
int process_refs, io_refs;
io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
- process_refs = atomic_read(&cfqq->ref) - io_refs;
+ process_refs = cfqq->ref - io_refs;
BUG_ON(process_refs < 0);
return process_refs;
}
@@ -2080,10 +2066,10 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
*/
if (new_process_refs >= process_refs) {
cfqq->new_cfqq = new_cfqq;
- atomic_add(process_refs, &new_cfqq->ref);
+ new_cfqq->ref += process_refs;
} else {
new_cfqq->new_cfqq = cfqq;
- atomic_add(new_process_refs, &cfqq->ref);
+ cfqq->ref += new_process_refs;
}
}
@@ -2116,12 +2102,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
unsigned count;
struct cfq_rb_root *st;
unsigned group_slice;
-
- if (!cfqg) {
- cfqd->serving_prio = IDLE_WORKLOAD;
- cfqd->workload_expires = jiffies + 1;
- return;
- }
+ enum wl_prio_t original_prio = cfqd->serving_prio;
/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2134,6 +2115,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
return;
}
+ if (original_prio != cfqd->serving_prio)
+ goto new_workload;
+
/*
* For RT and BE, we have to choose also the type
* (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
@@ -2148,6 +2132,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
if (count && !time_after(jiffies, cfqd->workload_expires))
return;
+new_workload:
/* otherwise select new workload type */
cfqd->serving_type =
cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
@@ -2199,7 +2184,6 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
if (RB_EMPTY_ROOT(&st->rb))
return NULL;
cfqg = cfq_rb_first_group(st);
- st->active = &cfqg->rb_node;
update_min_vdisktime(st);
return cfqg;
}
@@ -2293,6 +2277,17 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
goto keep_queue;
}
+ /*
+ * This is a deep seek queue, but the device is much faster than
+ * the queue can deliver, don't idle
+ **/
+ if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
+ (cfq_cfqq_slice_new(cfqq) ||
+ (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+ cfq_clear_cfqq_deep(cfqq);
+ cfq_clear_cfqq_idle_window(cfqq);
+ }
+
if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
cfqq = NULL;
goto keep_queue;
@@ -2367,12 +2362,12 @@ static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
{
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
- return 1;
+ return true;
if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
cfqq->slice_end))
- return 1;
+ return true;
- return 0;
+ return false;
}
static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -2538,9 +2533,10 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
struct cfq_data *cfqd = cfqq->cfqd;
struct cfq_group *cfqg, *orig_cfqg;
- BUG_ON(atomic_read(&cfqq->ref) <= 0);
+ BUG_ON(cfqq->ref <= 0);
- if (!atomic_dec_and_test(&cfqq->ref))
+ cfqq->ref--;
+ if (cfqq->ref)
return;
cfq_log_cfqq(cfqd, cfqq, "put_queue");
@@ -2843,7 +2839,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
RB_CLEAR_NODE(&cfqq->p_node);
INIT_LIST_HEAD(&cfqq->fifo);
- atomic_set(&cfqq->ref, 0);
+ cfqq->ref = 0;
cfqq->cfqd = cfqd;
cfq_mark_cfqq_prio_changed(cfqq);
@@ -2979,11 +2975,11 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
* pin the queue now that it's allocated, scheduler exit will prune it
*/
if (!is_sync && !(*async_cfqq)) {
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
*async_cfqq = cfqq;
}
- atomic_inc(&cfqq->ref);
+ cfqq->ref++;
return cfqq;
}
@@ -3265,6 +3261,10 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
return true;
+ /* An idle queue should not be idle now for some reason */
+ if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
+ return true;
+
if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
return false;
@@ -3681,13 +3681,13 @@ new_queue:
}
cfqq->allocated[rw]++;
- atomic_inc(&cfqq->ref);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
+ cfqq->ref++;
rq->elevator_private = cic;
rq->elevator_private2 = cfqq;
rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
return 0;
queue_fail:
@@ -3862,6 +3862,10 @@ static void *cfq_init_queue(struct request_queue *q)
if (!cfqd)
return NULL;
+ /*
+ * Don't need take queue_lock in the routine, since we are
+ * initializing the ioscheduler, and nobody is using cfqd
+ */
cfqd->cic_index = i;
/* Init root service tree */
@@ -3881,7 +3885,7 @@ static void *cfq_init_queue(struct request_queue *q)
* Take a reference to root group which we never drop. This is just
* to make sure that cfq_put_cfqg() does not try to kfree root group
*/
- atomic_set(&cfqg->ref, 1);
+ cfqg->ref = 1;
rcu_read_lock();
cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
(void *)cfqd, 0);
@@ -3901,7 +3905,7 @@ static void *cfq_init_queue(struct request_queue *q)
* will not attempt to free it.
*/
cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
- atomic_inc(&cfqd->oom_cfqq.ref);
+ cfqd->oom_cfqq.ref++;
cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
INIT_LIST_HEAD(&cfqd->cic_list);
diff --git a/block/genhd.c b/block/genhd.c
index 5fa2b44a72ff..6a5b772aa201 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,6 +18,7 @@
#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/log2.h>
#include "blk.h"
@@ -35,6 +36,10 @@ static DEFINE_IDR(ext_devt_idr);
static struct device_type disk_type;
+static void disk_add_events(struct gendisk *disk);
+static void disk_del_events(struct gendisk *disk);
+static void disk_release_events(struct gendisk *disk);
+
/**
* disk_get_part - get partition
* @disk: disk to look partition from
@@ -239,7 +244,7 @@ static struct blk_major_name {
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
/* index in the above - for now: assume no multimajor ranges */
-static inline int major_to_index(int major)
+static inline int major_to_index(unsigned major)
{
return major % BLKDEV_MAJOR_HASH_SIZE;
}
@@ -502,6 +507,64 @@ static int exact_lock(dev_t devt, void *data)
return 0;
}
+void register_disk(struct gendisk *disk)
+{
+ struct device *ddev = disk_to_dev(disk);
+ struct block_device *bdev;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+ int err;
+
+ ddev->parent = disk->driverfs_dev;
+
+ dev_set_name(ddev, disk->disk_name);
+
+ /* delay uevents, until we scanned partition table */
+ dev_set_uevent_suppress(ddev, 1);
+
+ if (device_add(ddev))
+ return;
+ if (!sysfs_deprecated) {
+ err = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
+ if (err) {
+ device_del(ddev);
+ return;
+ }
+ }
+ disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
+
+ /* No minors to use for partitions */
+ if (!disk_partitionable(disk))
+ goto exit;
+
+ /* No such device (e.g., media were just removed) */
+ if (!get_capacity(disk))
+ goto exit;
+
+ bdev = bdget_disk(disk, 0);
+ if (!bdev)
+ goto exit;
+
+ bdev->bd_invalidated = 1;
+ err = blkdev_get(bdev, FMODE_READ, NULL);
+ if (err < 0)
+ goto exit;
+ blkdev_put(bdev, FMODE_READ);
+
+exit:
+ /* announce disk after possible partitions are created */
+ dev_set_uevent_suppress(ddev, 0);
+ kobject_uevent(&ddev->kobj, KOBJ_ADD);
+
+ /* announce possible partitions */
+ disk_part_iter_init(&piter, disk, 0);
+ while ((part = disk_part_iter_next(&piter)))
+ kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+ disk_part_iter_exit(&piter);
+}
+
/**
* add_disk - add partitioning information to kernel list
* @disk: per-device partitioning information
@@ -551,18 +614,48 @@ void add_disk(struct gendisk *disk)
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
-}
+ disk_add_events(disk);
+}
EXPORT_SYMBOL(add_disk);
-EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
-void unlink_gendisk(struct gendisk *disk)
+void del_gendisk(struct gendisk *disk)
{
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+
+ disk_del_events(disk);
+
+ /* invalidate stuff */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ while ((part = disk_part_iter_next(&piter))) {
+ invalidate_partition(disk, part->partno);
+ delete_partition(disk, part->partno);
+ }
+ disk_part_iter_exit(&piter);
+
+ invalidate_partition(disk, 0);
+ blk_free_devt(disk_to_dev(disk)->devt);
+ set_capacity(disk, 0);
+ disk->flags &= ~GENHD_FL_UP;
+
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
blk_unregister_region(disk_devt(disk), disk->minors);
+
+ part_stat_set_all(&disk->part0, 0);
+ disk->part0.stamp = 0;
+
+ kobject_put(disk->part0.holder_dir);
+ kobject_put(disk->slave_dir);
+ disk->driverfs_dev = NULL;
+ if (!sysfs_deprecated)
+ sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
+ device_del(disk_to_dev(disk));
}
+EXPORT_SYMBOL(del_gendisk);
/**
* get_gendisk - get partitioning information for a given device
@@ -735,7 +828,7 @@ static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
static void *p;
p = disk_seqf_start(seqf, pos);
- if (!IS_ERR(p) && p && !*pos)
+ if (!IS_ERR_OR_NULL(p) && !*pos)
seq_puts(seqf, "major minor #blocks name\n\n");
return p;
}
@@ -1005,6 +1098,7 @@ static void disk_release(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
+ disk_release_events(disk);
kfree(disk->random);
disk_replace_part_tbl(disk, NULL);
free_part_stats(&disk->part0);
@@ -1110,29 +1204,6 @@ static int __init proc_genhd_init(void)
module_init(proc_genhd_init);
#endif /* CONFIG_PROC_FS */
-static void media_change_notify_thread(struct work_struct *work)
-{
- struct gendisk *gd = container_of(work, struct gendisk, async_notify);
- char event[] = "MEDIA_CHANGE=1";
- char *envp[] = { event, NULL };
-
- /*
- * set enviroment vars to indicate which event this is for
- * so that user space will know to go check the media status.
- */
- kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
- put_device(gd->driverfs_dev);
-}
-
-#if 0
-void genhd_media_change_notify(struct gendisk *disk)
-{
- get_device(disk->driverfs_dev);
- schedule_work(&disk->async_notify);
-}
-EXPORT_SYMBOL_GPL(genhd_media_change_notify);
-#endif /* 0 */
-
dev_t blk_lookup_devt(const char *name, int partno)
{
dev_t devt = MKDEV(0, 0);
@@ -1193,13 +1264,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
}
disk->part_tbl->part[0] = &disk->part0;
+ hd_ref_init(&disk->part0);
+
disk->minors = minors;
rand_initialize_disk(disk);
disk_to_dev(disk)->class = &block_class;
disk_to_dev(disk)->type = &disk_type;
device_initialize(disk_to_dev(disk));
- INIT_WORK(&disk->async_notify,
- media_change_notify_thread);
}
return disk;
}
@@ -1291,3 +1362,422 @@ int invalidate_partition(struct gendisk *disk, int partno)
}
EXPORT_SYMBOL(invalidate_partition);
+
+/*
+ * Disk events - monitor disk events like media change and eject request.
+ */
+struct disk_events {
+ struct list_head node; /* all disk_event's */
+ struct gendisk *disk; /* the associated disk */
+ spinlock_t lock;
+
+ int block; /* event blocking depth */
+ unsigned int pending; /* events already sent out */
+ unsigned int clearing; /* events being cleared */
+
+ long poll_msecs; /* interval, -1 for default */
+ struct delayed_work dwork;
+};
+
+static const char *disk_events_strs[] = {
+ [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
+ [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
+};
+
+static char *disk_uevents[] = {
+ [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
+ [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
+};
+
+/* list of all disk_events */
+static DEFINE_MUTEX(disk_events_mutex);
+static LIST_HEAD(disk_events);
+
+/* disable in-kernel polling by default */
+static unsigned long disk_events_dfl_poll_msecs = 0;
+
+static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
+{
+ struct disk_events *ev = disk->ev;
+ long intv_msecs = 0;
+
+ /*
+ * If device-specific poll interval is set, always use it. If
+ * the default is being used, poll iff there are events which
+ * can't be monitored asynchronously.
+ */
+ if (ev->poll_msecs >= 0)
+ intv_msecs = ev->poll_msecs;
+ else if (disk->events & ~disk->async_events)
+ intv_msecs = disk_events_dfl_poll_msecs;
+
+ return msecs_to_jiffies(intv_msecs);
+}
+
+static void __disk_block_events(struct gendisk *disk, bool sync)
+{
+ struct disk_events *ev = disk->ev;
+ unsigned long flags;
+ bool cancel;
+
+ spin_lock_irqsave(&ev->lock, flags);
+ cancel = !ev->block++;
+ spin_unlock_irqrestore(&ev->lock, flags);
+
+ if (cancel) {
+ if (sync)
+ cancel_delayed_work_sync(&disk->ev->dwork);
+ else
+ cancel_delayed_work(&disk->ev->dwork);
+ }
+}
+
+static void __disk_unblock_events(struct gendisk *disk, bool check_now)
+{
+ struct disk_events *ev = disk->ev;
+ unsigned long intv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ev->lock, flags);
+
+ if (WARN_ON_ONCE(ev->block <= 0))
+ goto out_unlock;
+
+ if (--ev->block)
+ goto out_unlock;
+
+ /*
+ * Not exactly a latency critical operation, set poll timer
+ * slack to 25% and kick event check.
+ */
+ intv = disk_events_poll_jiffies(disk);
+ set_timer_slack(&ev->dwork.timer, intv / 4);
+ if (check_now)
+ queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ else if (intv)
+ queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+out_unlock:
+ spin_unlock_irqrestore(&ev->lock, flags);
+}
+
+/**
+ * disk_block_events - block and flush disk event checking
+ * @disk: disk to block events for
+ *
+ * On return from this function, it is guaranteed that event checking
+ * isn't in progress and won't happen until unblocked by
+ * disk_unblock_events(). Events blocking is counted and the actual
+ * unblocking happens after the matching number of unblocks are done.
+ *
+ * Note that this intentionally does not block event checking from
+ * disk_clear_events().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void disk_block_events(struct gendisk *disk)
+{
+ if (disk->ev)
+ __disk_block_events(disk, true);
+}
+
+/**
+ * disk_unblock_events - unblock disk event checking
+ * @disk: disk to unblock events for
+ *
+ * Undo disk_block_events(). When the block count reaches zero, it
+ * starts events polling if configured.
+ *
+ * CONTEXT:
+ * Don't care. Safe to call from irq context.
+ */
+void disk_unblock_events(struct gendisk *disk)
+{
+ if (disk->ev)
+ __disk_unblock_events(disk, true);
+}
+
+/**
+ * disk_check_events - schedule immediate event checking
+ * @disk: disk to check events for
+ *
+ * Schedule immediate event checking on @disk if not blocked.
+ *
+ * CONTEXT:
+ * Don't care. Safe to call from irq context.
+ */
+void disk_check_events(struct gendisk *disk)
+{
+ if (disk->ev) {
+ __disk_block_events(disk, false);
+ __disk_unblock_events(disk, true);
+ }
+}
+EXPORT_SYMBOL_GPL(disk_check_events);
+
+/**
+ * disk_clear_events - synchronously check, clear and return pending events
+ * @disk: disk to fetch and clear events from
+ * @mask: mask of events to be fetched and clearted
+ *
+ * Disk events are synchronously checked and pending events in @mask
+ * are cleared and returned. This ignores the block count.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
+{
+ const struct block_device_operations *bdops = disk->fops;
+ struct disk_events *ev = disk->ev;
+ unsigned int pending;
+
+ if (!ev) {
+ /* for drivers still using the old ->media_changed method */
+ if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
+ bdops->media_changed && bdops->media_changed(disk))
+ return DISK_EVENT_MEDIA_CHANGE;
+ return 0;
+ }
+
+ /* tell the workfn about the events being cleared */
+ spin_lock_irq(&ev->lock);
+ ev->clearing |= mask;
+ spin_unlock_irq(&ev->lock);
+
+ /* uncondtionally schedule event check and wait for it to finish */
+ __disk_block_events(disk, true);
+ queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+ flush_delayed_work(&ev->dwork);
+ __disk_unblock_events(disk, false);
+
+ /* then, fetch and clear pending events */
+ spin_lock_irq(&ev->lock);
+ WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
+ pending = ev->pending & mask;
+ ev->pending &= ~mask;
+ spin_unlock_irq(&ev->lock);
+
+ return pending;
+}
+
+static void disk_events_workfn(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
+ struct gendisk *disk = ev->disk;
+ char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
+ unsigned int clearing = ev->clearing;
+ unsigned int events;
+ unsigned long intv;
+ int nr_events = 0, i;
+
+ /* check events */
+ events = disk->fops->check_events(disk, clearing);
+
+ /* accumulate pending events and schedule next poll if necessary */
+ spin_lock_irq(&ev->lock);
+
+ events &= ~ev->pending;
+ ev->pending |= events;
+ ev->clearing &= ~clearing;
+
+ intv = disk_events_poll_jiffies(disk);
+ if (!ev->block && intv)
+ queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+
+ spin_unlock_irq(&ev->lock);
+
+ /* tell userland about new events */
+ for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
+ if (events & (1 << i))
+ envp[nr_events++] = disk_uevents[i];
+
+ if (nr_events)
+ kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
+}
+
+/*
+ * A disk events enabled device has the following sysfs nodes under
+ * its /sys/block/X/ directory.
+ *
+ * events : list of all supported events
+ * events_async : list of events which can be detected w/o polling
+ * events_poll_msecs : polling interval, 0: disable, -1: system default
+ */
+static ssize_t __disk_events_show(unsigned int events, char *buf)
+{
+ const char *delim = "";
+ ssize_t pos = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
+ if (events & (1 << i)) {
+ pos += sprintf(buf + pos, "%s%s",
+ delim, disk_events_strs[i]);
+ delim = " ";
+ }
+ if (pos)
+ pos += sprintf(buf + pos, "\n");
+ return pos;
+}
+
+static ssize_t disk_events_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return __disk_events_show(disk->events, buf);
+}
+
+static ssize_t disk_events_async_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return __disk_events_show(disk->async_events, buf);
+}
+
+static ssize_t disk_events_poll_msecs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+
+ return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
+}
+
+static ssize_t disk_events_poll_msecs_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ long intv;
+
+ if (!count || !sscanf(buf, "%ld", &intv))
+ return -EINVAL;
+
+ if (intv < 0 && intv != -1)
+ return -EINVAL;
+
+ __disk_block_events(disk, true);
+ disk->ev->poll_msecs = intv;
+ __disk_unblock_events(disk, true);
+
+ return count;
+}
+
+static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
+static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
+static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
+ disk_events_poll_msecs_show,
+ disk_events_poll_msecs_store);
+
+static const struct attribute *disk_events_attrs[] = {
+ &dev_attr_events.attr,
+ &dev_attr_events_async.attr,
+ &dev_attr_events_poll_msecs.attr,
+ NULL,
+};
+
+/*
+ * The default polling interval can be specified by the kernel
+ * parameter block.events_dfl_poll_msecs which defaults to 0
+ * (disable). This can also be modified runtime by writing to
+ * /sys/module/block/events_dfl_poll_msecs.
+ */
+static int disk_events_set_dfl_poll_msecs(const char *val,
+ const struct kernel_param *kp)
+{
+ struct disk_events *ev;
+ int ret;
+
+ ret = param_set_ulong(val, kp);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&disk_events_mutex);
+
+ list_for_each_entry(ev, &disk_events, node)
+ disk_check_events(ev->disk);
+
+ mutex_unlock(&disk_events_mutex);
+
+ return 0;
+}
+
+static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
+ .set = disk_events_set_dfl_poll_msecs,
+ .get = param_get_ulong,
+};
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "block."
+
+module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
+ &disk_events_dfl_poll_msecs, 0644);
+
+/*
+ * disk_{add|del|release}_events - initialize and destroy disk_events.
+ */
+static void disk_add_events(struct gendisk *disk)
+{
+ struct disk_events *ev;
+
+ if (!disk->fops->check_events || !(disk->events | disk->async_events))
+ return;
+
+ ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+ if (!ev) {
+ pr_warn("%s: failed to initialize events\n", disk->disk_name);
+ return;
+ }
+
+ if (sysfs_create_files(&disk_to_dev(disk)->kobj,
+ disk_events_attrs) < 0) {
+ pr_warn("%s: failed to create sysfs files for events\n",
+ disk->disk_name);
+ kfree(ev);
+ return;
+ }
+
+ disk->ev = ev;
+
+ INIT_LIST_HEAD(&ev->node);
+ ev->disk = disk;
+ spin_lock_init(&ev->lock);
+ ev->block = 1;
+ ev->poll_msecs = -1;
+ INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
+
+ mutex_lock(&disk_events_mutex);
+ list_add_tail(&ev->node, &disk_events);
+ mutex_unlock(&disk_events_mutex);
+
+ /*
+ * Block count is initialized to 1 and the following initial
+ * unblock kicks it into action.
+ */
+ __disk_unblock_events(disk, true);
+}
+
+static void disk_del_events(struct gendisk *disk)
+{
+ if (!disk->ev)
+ return;
+
+ __disk_block_events(disk, true);
+
+ mutex_lock(&disk_events_mutex);
+ list_del_init(&disk->ev->node);
+ mutex_unlock(&disk_events_mutex);
+
+ sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
+}
+
+static void disk_release_events(struct gendisk *disk)
+{
+ /* the block count should be 1 from disk_del_events() */
+ WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
+ kfree(disk->ev);
+}
diff --git a/block/ioctl.c b/block/ioctl.c
index a9a302eba01e..9049d460fa89 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,11 +294,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return -EINVAL;
if (get_user(n, (int __user *) arg))
return -EFAULT;
- if (!(mode & FMODE_EXCL) && bd_claim(bdev, &bdev) < 0)
+ if (!(mode & FMODE_EXCL) &&
+ blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
return -EBUSY;
ret = set_blocksize(bdev, n);
if (!(mode & FMODE_EXCL))
- bd_release(bdev);
+ blkdev_put(bdev, mode | FMODE_EXCL);
return ret;
case BLKPG:
ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1ea1a34e78b2..3803a0348937 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -911,8 +911,6 @@ struct drbd_md {
struct drbd_backing_dev {
struct block_device *backing_bdev;
struct block_device *md_bdev;
- struct file *lo_file;
- struct file *md_file;
struct drbd_md md;
struct disk_conf dc; /* The user provided config... */
sector_t known_size; /* last known size of that backing device */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6be5401d0e88..29cd0dc9fe4f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3372,11 +3372,8 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
if (ldev == NULL)
return;
- bd_release(ldev->backing_bdev);
- bd_release(ldev->md_bdev);
-
- fput(ldev->lo_file);
- fput(ldev->md_file);
+ blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
kfree(ldev);
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 29e5c70e4e26..8cbfaa687d72 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -855,7 +855,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
sector_t max_possible_sectors;
sector_t min_md_device_sectors;
struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
- struct inode *inode, *inode2;
+ struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
union drbd_state ns, os;
unsigned int max_seg_s;
@@ -907,46 +907,40 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
}
}
- nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
- if (IS_ERR(nbc->lo_file)) {
+ bdev = blkdev_get_by_path(nbc->dc.backing_dev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
+ if (IS_ERR(bdev)) {
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
- PTR_ERR(nbc->lo_file));
- nbc->lo_file = NULL;
+ PTR_ERR(bdev));
retcode = ERR_OPEN_DISK;
goto fail;
}
+ nbc->backing_bdev = bdev;
- inode = nbc->lo_file->f_dentry->d_inode;
-
- if (!S_ISBLK(inode->i_mode)) {
- retcode = ERR_DISK_NOT_BDEV;
- goto fail;
- }
-
- nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
- if (IS_ERR(nbc->md_file)) {
+ /*
+ * meta_dev_idx >= 0: external fixed size, possibly multiple
+ * drbd sharing one meta device. TODO in that case, paranoia
+ * check that [md_bdev, meta_dev_idx] is not yet used by some
+ * other drbd minor! (if you use drbd.conf + drbdadm, that
+ * should check it for you already; but if you don't, or
+ * someone fooled it, we need to double check here)
+ */
+ bdev = blkdev_get_by_path(nbc->dc.meta_dev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+ (nbc->dc.meta_dev_idx < 0) ?
+ (void *)mdev : (void *)drbd_m_holder);
+ if (IS_ERR(bdev)) {
dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
- PTR_ERR(nbc->md_file));
- nbc->md_file = NULL;
+ PTR_ERR(bdev));
retcode = ERR_OPEN_MD_DISK;
goto fail;
}
+ nbc->md_bdev = bdev;
- inode2 = nbc->md_file->f_dentry->d_inode;
-
- if (!S_ISBLK(inode2->i_mode)) {
- retcode = ERR_MD_NOT_BDEV;
- goto fail;
- }
-
- nbc->backing_bdev = inode->i_bdev;
- if (bd_claim(nbc->backing_bdev, mdev)) {
- printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
- nbc->backing_bdev, mdev,
- nbc->backing_bdev->bd_holder,
- nbc->backing_bdev->bd_contains->bd_holder,
- nbc->backing_bdev->bd_holders);
- retcode = ERR_BDCLAIM_DISK;
+ if ((nbc->backing_bdev == nbc->md_bdev) !=
+ (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+ nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+ retcode = ERR_MD_IDX_INVALID;
goto fail;
}
@@ -955,28 +949,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
offsetof(struct bm_extent, lce));
if (!resync_lru) {
retcode = ERR_NOMEM;
- goto release_bdev_fail;
- }
-
- /* meta_dev_idx >= 0: external fixed size,
- * possibly multiple drbd sharing one meta device.
- * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
- * not yet used by some other drbd minor!
- * (if you use drbd.conf + drbdadm,
- * that should check it for you already; but if you don't, or someone
- * fooled it, we need to double check here) */
- nbc->md_bdev = inode2->i_bdev;
- if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
- : (void *) drbd_m_holder)) {
- retcode = ERR_BDCLAIM_MD_DISK;
- goto release_bdev_fail;
- }
-
- if ((nbc->backing_bdev == nbc->md_bdev) !=
- (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
- nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
- retcode = ERR_MD_IDX_INVALID;
- goto release_bdev2_fail;
+ goto fail;
}
/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
@@ -987,7 +960,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
(unsigned long long) drbd_get_max_capacity(nbc),
(unsigned long long) nbc->dc.disk_size);
retcode = ERR_DISK_TO_SMALL;
- goto release_bdev2_fail;
+ goto fail;
}
if (nbc->dc.meta_dev_idx < 0) {
@@ -1004,7 +977,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
dev_warn(DEV, "refusing attach: md-device too small, "
"at least %llu sectors needed for this meta-disk type\n",
(unsigned long long) min_md_device_sectors);
- goto release_bdev2_fail;
+ goto fail;
}
/* Make sure the new disk is big enough
@@ -1012,7 +985,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (drbd_get_max_capacity(nbc) <
drbd_get_capacity(mdev->this_bdev)) {
retcode = ERR_DISK_TO_SMALL;
- goto release_bdev2_fail;
+ goto fail;
}
nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
@@ -1035,7 +1008,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
drbd_resume_io(mdev);
if (retcode < SS_SUCCESS)
- goto release_bdev2_fail;
+ goto fail;
if (!get_ldev_if_state(mdev, D_ATTACHING))
goto force_diskless;
@@ -1269,18 +1242,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
force_diskless:
drbd_force_state(mdev, NS(disk, D_FAILED));
drbd_md_sync(mdev);
- release_bdev2_fail:
- if (nbc)
- bd_release(nbc->md_bdev);
- release_bdev_fail:
- if (nbc)
- bd_release(nbc->backing_bdev);
fail:
if (nbc) {
- if (nbc->lo_file)
- fput(nbc->lo_file);
- if (nbc->md_file)
- fput(nbc->md_file);
+ if (nbc->backing_bdev)
+ blkdev_put(nbc->backing_bdev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ if (nbc->md_bdev)
+ blkdev_put(nbc->md_bdev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL);
kfree(nbc);
}
lc_destroy(resync_lru);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7ea0bea2f7e3..44e18c073c44 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -395,11 +395,7 @@ lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct loop_device *lo = p->lo;
struct page *page = buf->page;
sector_t IV;
- int size, ret;
-
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
+ int size;
IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
(buf->offset >> 9);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 19b3568e9326..77d70eebb6b2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2296,15 +2296,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
* so bdget() can't fail.
*/
bdget(pd->bdev->bd_dev);
- if ((ret = blkdev_get(pd->bdev, FMODE_READ)))
+ if ((ret = blkdev_get(pd->bdev, FMODE_READ | FMODE_EXCL, pd)))
goto out;
- if ((ret = bd_claim(pd->bdev, pd)))
- goto out_putdev;
-
if ((ret = pkt_get_last_written(pd, &lba))) {
printk(DRIVER_NAME": pkt_get_last_written failed\n");
- goto out_unclaim;
+ goto out_putdev;
}
set_capacity(pd->disk, lba << 2);
@@ -2314,7 +2311,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
q = bdev_get_queue(pd->bdev);
if (write) {
if ((ret = pkt_open_write(pd)))
- goto out_unclaim;
+ goto out_putdev;
/*
* Some CDRW drives can not handle writes larger than one packet,
* even if the size is a multiple of the packet size.
@@ -2329,23 +2326,21 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
}
if ((ret = pkt_set_segment_merging(pd, q)))
- goto out_unclaim;
+ goto out_putdev;
if (write) {
if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
printk(DRIVER_NAME": not enough memory for buffers\n");
ret = -ENOMEM;
- goto out_unclaim;
+ goto out_putdev;
}
printk(DRIVER_NAME": %lukB available on disc\n", lba << 1);
}
return 0;
-out_unclaim:
- bd_release(pd->bdev);
out_putdev:
- blkdev_put(pd->bdev, FMODE_READ);
+ blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
out:
return ret;
}
@@ -2362,8 +2357,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
pkt_lock_door(pd, 0);
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- bd_release(pd->bdev);
- blkdev_put(pd->bdev, FMODE_READ);
+ blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
pkt_shrink_pktlist(pd);
}
@@ -2733,7 +2727,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
bdev = bdget(dev);
if (!bdev)
return -ENOMEM;
- ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY);
+ ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
if (ret)
return ret;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index af13c62dc473..14033a36bcd0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1348,7 +1348,10 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
if (!CDROM_CAN(CDC_SELECT_DISC))
return -EDRIVE_CANT_DO_THIS;
- (void) cdi->ops->media_changed(cdi, slot);
+ if (cdi->ops->check_events)
+ cdi->ops->check_events(cdi, 0, slot);
+ else
+ cdi->ops->media_changed(cdi, slot);
if (slot == CDSL_NONE) {
/* set media changed bits, on both queues */
@@ -1392,6 +1395,42 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
return slot;
}
+/*
+ * As cdrom implements an extra ioctl consumer for media changed
+ * event, it needs to buffer ->check_events() output, such that event
+ * is not lost for both the usual VFS and ioctl paths.
+ * cdi->{vfs|ioctl}_events are used to buffer pending events for each
+ * path.
+ *
+ * XXX: Locking is non-existent. cdi->ops->check_events() can be
+ * called in parallel and buffering fields are accessed without any
+ * exclusion. The original media_changed code had the same problem.
+ * It might be better to simply deprecate CDROM_MEDIA_CHANGED ioctl
+ * and remove this cruft altogether. It doesn't have much usefulness
+ * at this point.
+ */
+static void cdrom_update_events(struct cdrom_device_info *cdi,
+ unsigned int clearing)
+{
+ unsigned int events;
+
+ events = cdi->ops->check_events(cdi, clearing, CDSL_CURRENT);
+ cdi->vfs_events |= events;
+ cdi->ioctl_events |= events;
+}
+
+unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
+ unsigned int clearing)
+{
+ unsigned int events;
+
+ cdrom_update_events(cdi, clearing);
+ events = cdi->vfs_events;
+ cdi->vfs_events = 0;
+ return events;
+}
+EXPORT_SYMBOL(cdrom_check_events);
+
/* We want to make media_changed accessible to the user through an
* ioctl. The main problem now is that we must double-buffer the
* low-level implementation, to assure that the VFS and the user both
@@ -1403,15 +1442,26 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
{
unsigned int mask = (1 << (queue & 1));
int ret = !!(cdi->mc_flags & mask);
+ bool changed;
if (!CDROM_CAN(CDC_MEDIA_CHANGED))
- return ret;
+ return ret;
+
/* changed since last call? */
- if (cdi->ops->media_changed(cdi, CDSL_CURRENT)) {
+ if (cdi->ops->check_events) {
+ BUG_ON(!queue); /* shouldn't be called from VFS path */
+ cdrom_update_events(cdi, DISK_EVENT_MEDIA_CHANGE);
+ changed = cdi->ioctl_events & DISK_EVENT_MEDIA_CHANGE;
+ cdi->ioctl_events = 0;
+ } else
+ changed = cdi->ops->media_changed(cdi, CDSL_CURRENT);
+
+ if (changed) {
cdi->mc_flags = 0x3; /* set bit on both queues */
ret |= 1;
cdi->media_written = 0;
}
+
cdi->mc_flags &= ~mask; /* clear bit */
return ret;
}
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index bfe25ea9766b..b4b9d5a47885 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -65,15 +65,12 @@ static int raw_open(struct inode *inode, struct file *filp)
if (!bdev)
goto out;
igrab(bdev->bd_inode);
- err = blkdev_get(bdev, filp->f_mode);
+ err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open);
if (err)
goto out;
- err = bd_claim(bdev, raw_open);
- if (err)
- goto out1;
err = set_blocksize(bdev, bdev_logical_block_size(bdev));
if (err)
- goto out2;
+ goto out1;
filp->f_flags |= O_DIRECT;
filp->f_mapping = bdev->bd_inode->i_mapping;
if (++raw_devices[minor].inuse == 1)
@@ -83,10 +80,8 @@ static int raw_open(struct inode *inode, struct file *filp)
mutex_unlock(&raw_mutex);
return 0;
-out2:
- bd_release(bdev);
out1:
- blkdev_put(bdev, filp->f_mode);
+ blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
out:
mutex_unlock(&raw_mutex);
return err;
@@ -110,8 +105,7 @@ static int raw_release(struct inode *inode, struct file *filp)
}
mutex_unlock(&raw_mutex);
- bd_release(bdev);
- blkdev_put(bdev, filp->f_mode);
+ blkdev_put(bdev, filp->f_mode | FMODE_EXCL);
return 0;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4d705cea0f8c..985c20a4f30e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -325,15 +325,18 @@ static int open_dev(struct dm_dev_internal *d, dev_t dev,
BUG_ON(d->dm_dev.bdev);
- bdev = open_by_devnum(dev, d->dm_dev.mode);
+ bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
- if (r)
- blkdev_put(bdev, d->dm_dev.mode);
- else
- d->dm_dev.bdev = bdev;
- return r;
+
+ r = bd_link_disk_holder(bdev, dm_disk(md));
+ if (r) {
+ blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
+ return r;
+ }
+
+ d->dm_dev.bdev = bdev;
+ return 0;
}
/*
@@ -344,8 +347,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
if (!d->dm_dev.bdev)
return;
- bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
- blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
+ blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
d->dm_dev.bdev = NULL;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7cb1352f7e7a..f48a2f359ac4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -630,7 +630,7 @@ static void dec_pending(struct dm_io *io, int error)
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
- trace_block_bio_complete(md->queue, bio);
+ trace_block_bio_complete(md->queue, bio, io_error);
bio_endio(bio, io_error);
}
}
@@ -990,8 +990,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
if (r == DM_MAPIO_REMAPPED) {
/* the bio has been remapped so dispatch it */
- trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
- tio->io->bio->bi_bdev->bd_dev, sector);
+ trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
+ tio->io->bio->bi_bdev->bd_dev, sector);
generic_make_request(clone);
} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 175c424f201f..7fc090ac9e28 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1879,7 +1879,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
list_add_rcu(&rdev->same_set, &mddev->disks);
- bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+ bd_link_disk_holder(rdev->bdev, mddev->gendisk);
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
@@ -1906,7 +1906,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
MD_BUG();
return;
}
- bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
@@ -1934,19 +1933,13 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ shared ? (mdk_rdev_t *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
printk(KERN_ERR "md: could not open %s.\n",
__bdevname(dev, b));
return PTR_ERR(bdev);
}
- err = bd_claim(bdev, shared ? (mdk_rdev_t *)lock_rdev : rdev);
- if (err) {
- printk(KERN_ERR "md: could not bd_claim %s.\n",
- bdevname(bdev, b));
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return err;
- }
if (!shared)
set_bit(AllReserved, &rdev->flags);
rdev->bdev = bdev;
@@ -1959,8 +1952,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
rdev->bdev = NULL;
if (!bdev)
MD_BUG();
- bd_release(bdev);
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
void md_autodetect_dev(dev_t dev);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 2cf0cc6a4189..f29a6f9df6e7 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -224,7 +224,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
if (dev->blkdev) {
invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
0, -1);
- close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
kfree(dev);
@@ -234,6 +234,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
/* FIXME: ensure that mtd->size % erase_size == 0 */
static struct block2mtd_dev *add_device(char *devname, int erase_size)
{
+ const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
struct block_device *bdev;
struct block2mtd_dev *dev;
char *name;
@@ -246,7 +247,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
return NULL;
/* Get a handle on the device */
- bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL);
+ bdev = blkdev_get_by_path(devname, mode, dev);
#ifndef MODULE
if (IS_ERR(bdev)) {
@@ -254,9 +255,8 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
to resolve the device name by other means. */
dev_t devt = name_to_dev_t(devname);
- if (devt) {
- bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ);
- }
+ if (devt)
+ bdev = blkdev_get_by_dev(devt, mode, dev);
}
#endif
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 30a1ca3d08b7..5505bc07e1e7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -103,7 +103,7 @@ int dasd_scan_partitions(struct dasd_block *block)
struct block_device *bdev;
bdev = bdget_disk(block->gdp, 0);
- if (!bdev || blkdev_get(bdev, FMODE_READ) < 0)
+ if (!bdev || blkdev_get(bdev, FMODE_READ, NULL) < 0)
return -ENODEV;
/*
* See fs/partition/check.c:register_disk,rescan_partitions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 501f67bef719..9045c52abd25 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1977,8 +1977,7 @@ EXPORT_SYMBOL(scsi_mode_sense);
* in.
*
* Returns zero if unsuccessful or an error if TUR failed. For
- * removable media, a return of NOT_READY or UNIT_ATTENTION is
- * translated to success, with the ->changed flag updated.
+ * removable media, UNIT_ATTENTION sets ->changed flag.
**/
int
scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
@@ -2005,16 +2004,6 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
} while (scsi_sense_valid(sshdr) &&
sshdr->sense_key == UNIT_ATTENTION && --retries);
- if (!sshdr)
- /* could not allocate sense buffer, so can't process it */
- return result;
-
- if (sdev->removable && scsi_sense_valid(sshdr) &&
- (sshdr->sense_key == UNIT_ATTENTION ||
- sshdr->sense_key == NOT_READY)) {
- sdev->changed = 1;
- result = 0;
- }
if (!sshdr_external)
kfree(sshdr);
return result;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 365024b0c407..b65e65aa07eb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1043,15 +1043,7 @@ static int sd_media_changed(struct gendisk *disk)
sshdr);
}
- /*
- * Unable to test, unit probably not ready. This usually
- * means there is no disc in the drive. Mark as changed,
- * and we will figure it out later once the drive is
- * available again.
- */
- if (retval || (scsi_sense_valid(sshdr) &&
- /* 0x3a is medium not present */
- sshdr->asc == 0x3a)) {
+ if (retval) {
set_media_not_present(sdkp);
goto out;
}
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index d7b383c96d5d..be6baf8ad704 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -104,14 +104,15 @@ static void sr_release(struct cdrom_device_info *);
static void get_sectorsize(struct scsi_cd *);
static void get_capabilities(struct scsi_cd *);
-static int sr_media_change(struct cdrom_device_info *, int);
+static unsigned int sr_check_events(struct cdrom_device_info *cdi,
+ unsigned int clearing, int slot);
static int sr_packet(struct cdrom_device_info *, struct packet_command *);
static struct cdrom_device_ops sr_dops = {
.open = sr_open,
.release = sr_release,
.drive_status = sr_drive_status,
- .media_changed = sr_media_change,
+ .check_events = sr_check_events,
.tray_move = sr_tray_move,
.lock_door = sr_lock_door,
.select_speed = sr_select_speed,
@@ -165,90 +166,96 @@ static void scsi_cd_put(struct scsi_cd *cd)
mutex_unlock(&sr_ref_mutex);
}
-/* identical to scsi_test_unit_ready except that it doesn't
- * eat the NOT_READY returns for removable media */
-int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr)
+static unsigned int sr_get_events(struct scsi_device *sdev)
{
- int retries = MAX_RETRIES;
- int the_result;
- u8 cmd[] = {TEST_UNIT_READY, 0, 0, 0, 0, 0 };
+ u8 buf[8];
+ u8 cmd[] = { GET_EVENT_STATUS_NOTIFICATION,
+ 1, /* polled */
+ 0, 0, /* reserved */
+ 1 << 4, /* notification class: media */
+ 0, 0, /* reserved */
+ 0, sizeof(buf), /* allocation length */
+ 0, /* control */
+ };
+ struct event_header *eh = (void *)buf;
+ struct media_event_desc *med = (void *)(buf + 4);
+ struct scsi_sense_hdr sshdr;
+ int result;
- /* issue TEST_UNIT_READY until the initial startup UNIT_ATTENTION
- * conditions are gone, or a timeout happens
- */
- do {
- the_result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL,
- 0, sshdr, SR_TIMEOUT,
- retries--, NULL);
- if (scsi_sense_valid(sshdr) &&
- sshdr->sense_key == UNIT_ATTENTION)
- sdev->changed = 1;
-
- } while (retries > 0 &&
- (!scsi_status_is_good(the_result) ||
- (scsi_sense_valid(sshdr) &&
- sshdr->sense_key == UNIT_ATTENTION)));
- return the_result;
+ result = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, sizeof(buf),
+ &sshdr, SR_TIMEOUT, MAX_RETRIES, NULL);
+ if (scsi_sense_valid(&sshdr) && sshdr.sense_key == UNIT_ATTENTION)
+ return DISK_EVENT_MEDIA_CHANGE;
+
+ if (result || be16_to_cpu(eh->data_len) < sizeof(*med))
+ return 0;
+
+ if (eh->nea || eh->notification_class != 0x4)
+ return 0;
+
+ if (med->media_event_code == 1)
+ return DISK_EVENT_EJECT_REQUEST;
+ else if (med->media_event_code == 2)
+ return DISK_EVENT_MEDIA_CHANGE;
+ return 0;
}
/*
- * This function checks to see if the media has been changed in the
- * CDROM drive. It is possible that we have already sensed a change,
- * or the drive may have sensed one and not yet reported it. We must
- * be ready for either case. This function always reports the current
- * value of the changed bit. If flag is 0, then the changed bit is reset.
- * This function could be done as an ioctl, but we would need to have
- * an inode for that to work, and we do not always have one.
+ * This function checks to see if the media has been changed or eject
+ * button has been pressed. It is possible that we have already
+ * sensed a change, or the drive may have sensed one and not yet
+ * reported it. The past events are accumulated in sdev->changed and
+ * returned together with the current state.
*/
-
-static int sr_media_change(struct cdrom_device_info *cdi, int slot)
+static unsigned int sr_check_events(struct cdrom_device_info *cdi,
+ unsigned int clearing, int slot)
{
struct scsi_cd *cd = cdi->handle;
- int retval;
- struct scsi_sense_hdr *sshdr;
+ bool last_present;
+ struct scsi_sense_hdr sshdr;
+ unsigned int events;
+ int ret;
- if (CDSL_CURRENT != slot) {
- /* no changer support */
- return -EINVAL;
- }
+ /* no changer support */
+ if (CDSL_CURRENT != slot)
+ return 0;
- sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL);
- retval = sr_test_unit_ready(cd->device, sshdr);
- if (retval || (scsi_sense_valid(sshdr) &&
- /* 0x3a is medium not present */
- sshdr->asc == 0x3a)) {
- /* Media not present or unable to test, unit probably not
- * ready. This usually means there is no disc in the drive.
- * Mark as changed, and we will figure it out later once
- * the drive is available again.
- */
- cd->device->changed = 1;
- /* This will force a flush, if called from check_disk_change */
- retval = 1;
- goto out;
- };
+ events = sr_get_events(cd->device);
+ /*
+ * GET_EVENT_STATUS_NOTIFICATION is enough unless MEDIA_CHANGE
+ * is being cleared. Note that there are devices which hang
+ * if asked to execute TUR repeatedly.
+ */
+ if (!(clearing & DISK_EVENT_MEDIA_CHANGE))
+ goto skip_tur;
+
+ /* let's see whether the media is there with TUR */
+ last_present = cd->media_present;
+ ret = scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
+
+ /*
+ * Media is considered to be present if TUR succeeds or fails with
+ * sense data indicating something other than media-not-present
+ * (ASC 0x3a).
+ */
+ cd->media_present = scsi_status_is_good(ret) ||
+ (scsi_sense_valid(&sshdr) && sshdr.asc != 0x3a);
- retval = cd->device->changed;
- cd->device->changed = 0;
- /* If the disk changed, the capacity will now be different,
- * so we force a re-read of this information */
- if (retval) {
- /* check multisession offset etc */
- sr_cd_check(cdi);
- get_sectorsize(cd);
+ if (last_present != cd->media_present)
+ events |= DISK_EVENT_MEDIA_CHANGE;
+skip_tur:
+ if (cd->device->changed) {
+ events |= DISK_EVENT_MEDIA_CHANGE;
+ cd->device->changed = 0;
}
-out:
- /* Notify userspace, that media has changed. */
- if (retval != cd->previous_state)
+ /* for backward compatibility */
+ if (events & DISK_EVENT_MEDIA_CHANGE)
sdev_evt_send_simple(cd->device, SDEV_EVT_MEDIA_CHANGE,
GFP_KERNEL);
- cd->previous_state = retval;
- kfree(sshdr);
-
- return retval;
+ return events;
}
-
+
/*
* sr_done is the interrupt routine for the device driver.
*
@@ -533,10 +540,25 @@ out:
return ret;
}
-static int sr_block_media_changed(struct gendisk *disk)
+static unsigned int sr_block_check_events(struct gendisk *disk,
+ unsigned int clearing)
{
struct scsi_cd *cd = scsi_cd(disk);
- return cdrom_media_changed(&cd->cdi);
+ return cdrom_check_events(&cd->cdi, clearing);
+}
+
+static int sr_block_revalidate_disk(struct gendisk *disk)
+{
+ struct scsi_cd *cd = scsi_cd(disk);
+ struct scsi_sense_hdr sshdr;
+
+ /* if the unit is not ready, nothing more to do */
+ if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
+ return 0;
+
+ sr_cd_check(&cd->cdi);
+ get_sectorsize(cd);
+ return 0;
}
static const struct block_device_operations sr_bdops =
@@ -545,7 +567,8 @@ static const struct block_device_operations sr_bdops =
.open = sr_block_open,
.release = sr_block_release,
.ioctl = sr_block_ioctl,
- .media_changed = sr_block_media_changed,
+ .check_events = sr_block_check_events,
+ .revalidate_disk = sr_block_revalidate_disk,
/*
* No compat_ioctl for now because sr_block_ioctl never
* seems to pass arbitary ioctls down to host drivers.
@@ -618,6 +641,7 @@ static int sr_probe(struct device *dev)
sprintf(disk->disk_name, "sr%d", minor);
disk->fops = &sr_bdops;
disk->flags = GENHD_FL_CD;
+ disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
@@ -627,7 +651,7 @@ static int sr_probe(struct device *dev)
cd->disk = disk;
cd->capacity = 0x1fffff;
cd->device->changed = 1; /* force recheck CD type */
- cd->previous_state = 1;
+ cd->media_present = 1;
cd->use = 1;
cd->readcd_known = 0;
cd->readcd_cdda = 0;
@@ -780,7 +804,7 @@ static void get_capabilities(struct scsi_cd *cd)
}
/* eat unit attentions */
- sr_test_unit_ready(cd->device, &sshdr);
+ scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
/* ask for mode page 0x2a */
rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 1e144dfdbd4b..e036f1dc83c8 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -40,7 +40,7 @@ typedef struct scsi_cd {
unsigned xa_flag:1; /* CD has XA sectors ? */
unsigned readcd_known:1; /* drive supports READ_CD (0xbe) */
unsigned readcd_cdda:1; /* reading audio data using READ_CD */
- unsigned previous_state:1; /* media has changed */
+ unsigned media_present:1; /* media is present */
struct cdrom_device_info cdi;
/* We hold gendisk and scsi_device references on probe and use
* the refs on this kref to decide when to release them */
@@ -61,7 +61,6 @@ int sr_select_speed(struct cdrom_device_info *cdi, int speed);
int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *);
int sr_is_xa(Scsi_CD *);
-int sr_test_unit_ready(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr);
/* sr_vendor.c */
void sr_vendor_init(Scsi_CD *);
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 3cd8ffbad577..8be30554119b 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -307,7 +307,7 @@ int sr_drive_status(struct cdrom_device_info *cdi, int slot)
/* we have no changer support */
return -EINVAL;
}
- if (0 == sr_test_unit_ready(cd->device, &sshdr))
+ if (!scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
return CDS_DISC_OK;
/* SK/ASC/ASCQ of 2/4/1 means "unit is becoming ready" */
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 3b513bafaf2a..b015561fd602 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -543,7 +543,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
ro = curlun->initially_ro;
if (!ro) {
filp = filp_open(filename, O_RDWR | O_LARGEFILE, 0);
- if (-EROFS == PTR_ERR(filp))
+ if (PTR_ERR(filp) == -EROFS || PTR_ERR(filp) == -EACCES)
ro = 1;
}
if (ro)
@@ -558,10 +558,7 @@ static int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
if (filp->f_path.dentry)
inode = filp->f_path.dentry->d_inode;
- if (inode && S_ISBLK(inode->i_mode)) {
- if (bdev_read_only(inode->i_bdev))
- ro = 1;
- } else if (!inode || !S_ISREG(inode->i_mode)) {
+ if (!inode || (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) {
LINFO(curlun, "invalid file type: %s\n", filename);
goto out;
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 4d0ff5ee27b8..e49cce234c65 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -782,7 +782,12 @@ void __init bio_integrity_init(void)
{
unsigned int i;
- kintegrityd_wq = create_workqueue("kintegrityd");
+ /*
+ * kintegrityd won't block much but may burn a lot of CPU cycles.
+ * Make it highpri CPU intensive wq with max concurrency of 1.
+ */
+ kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
if (!kintegrityd_wq)
panic("Failed to create kintegrityd\n");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 88da70355aa3..fe3f59c14a02 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -432,9 +432,6 @@ static void init_once(void *foo)
mutex_init(&bdev->bd_mutex);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
-#ifdef CONFIG_SYSFS
- INIT_LIST_HEAD(&bdev->bd_holder_list);
-#endif
inode_init_once(&ei->vfs_inode);
/* Initialize mutex for freeze. */
mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -669,7 +666,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
else if (bdev->bd_contains == bdev)
return true; /* is a whole device which isn't held */
- else if (whole->bd_holder == bd_claim)
+ else if (whole->bd_holder == bd_may_claim)
return true; /* is a partition of a device that is being partitioned */
else if (whole->bd_holder != NULL)
return false; /* is a partition of a held device */
@@ -781,439 +778,87 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
}
}
-/* releases bdev_lock */
-static void __bd_abort_claiming(struct block_device *whole, void *holder)
-{
- BUG_ON(whole->bd_claiming != holder);
- whole->bd_claiming = NULL;
- wake_up_bit(&whole->bd_claiming, 0);
-
- spin_unlock(&bdev_lock);
- bdput(whole);
-}
-
-/**
- * bd_abort_claiming - abort claiming a block device
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Abort a claiming block started by bd_start_claiming(). Note that
- * @whole is not the block device to be claimed but the whole device
- * returned by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
-static void bd_abort_claiming(struct block_device *whole, void *holder)
-{
- spin_lock(&bdev_lock);
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
-}
-
-/* increment holders when we have a legitimate claim. requires bdev_lock */
-static void __bd_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
-{
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
-}
-
-/**
- * bd_finish_claiming - finish claiming a block device
- * @bdev: block device of interest (passed to bd_start_claiming())
- * @whole: whole block device returned by bd_start_claiming()
- * @holder: holder trying to claim @bdev
- *
- * Finish a claiming block started by bd_start_claiming().
- *
- * CONTEXT:
- * Grabs and releases bdev_lock.
- */
-static void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder)
-{
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
- __bd_claim(bdev, whole, holder);
- __bd_abort_claiming(whole, holder); /* not actually an abort */
-}
-
-/**
- * bd_claim - claim a block device
- * @bdev: block device to claim
- * @holder: holder trying to claim @bdev
- *
- * Try to claim @bdev which must have been opened successfully.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * 0 if successful, -EBUSY if @bdev is already claimed.
- */
-int bd_claim(struct block_device *bdev, void *holder)
-{
- struct block_device *whole = bdev->bd_contains;
- int res;
-
- might_sleep();
-
- spin_lock(&bdev_lock);
- res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0)
- __bd_claim(bdev, whole, holder);
- spin_unlock(&bdev_lock);
-
- return res;
-}
-EXPORT_SYMBOL(bd_claim);
-
-void bd_release(struct block_device *bdev)
-{
- spin_lock(&bdev_lock);
- if (!--bdev->bd_contains->bd_holders)
- bdev->bd_contains->bd_holder = NULL;
- if (!--bdev->bd_holders)
- bdev->bd_holder = NULL;
- spin_unlock(&bdev_lock);
-}
-
-EXPORT_SYMBOL(bd_release);
-
#ifdef CONFIG_SYSFS
-/*
- * Functions for bd_claim_by_kobject / bd_release_from_kobject
- *
- * If a kobject is passed to bd_claim_by_kobject()
- * and the kobject has a parent directory,
- * following symlinks are created:
- * o from the kobject to the claimed bdev
- * o from "holders" directory of the bdev to the parent of the kobject
- * bd_release_from_kobject() removes these symlinks.
- *
- * Example:
- * If /dev/dm-0 maps to /dev/sda, kobject corresponding to
- * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then:
- * /sys/block/dm-0/slaves/sda --> /sys/block/sda
- * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
- */
-
static int add_symlink(struct kobject *from, struct kobject *to)
{
- if (!from || !to)
- return 0;
return sysfs_create_link(from, to, kobject_name(to));
}
static void del_symlink(struct kobject *from, struct kobject *to)
{
- if (!from || !to)
- return;
sysfs_remove_link(from, kobject_name(to));
}
-/*
- * 'struct bd_holder' contains pointers to kobjects symlinked by
- * bd_claim_by_kobject.
- * It's connected to bd_holder_list which is protected by bdev->bd_sem.
- */
-struct bd_holder {
- struct list_head list; /* chain of holders of the bdev */
- int count; /* references from the holder */
- struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */
- struct kobject *hdev; /* e.g. "/block/dm-0" */
- struct kobject *hdir; /* e.g. "/block/sda/holders" */
- struct kobject *sdev; /* e.g. "/block/sda" */
-};
-
-/*
- * Get references of related kobjects at once.
- * Returns 1 on success. 0 on failure.
- *
- * Should call bd_holder_release_dirs() after successful use.
- */
-static int bd_holder_grab_dirs(struct block_device *bdev,
- struct bd_holder *bo)
-{
- if (!bdev || !bo)
- return 0;
-
- bo->sdir = kobject_get(bo->sdir);
- if (!bo->sdir)
- return 0;
-
- bo->hdev = kobject_get(bo->sdir->parent);
- if (!bo->hdev)
- goto fail_put_sdir;
-
- bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
- if (!bo->sdev)
- goto fail_put_hdev;
-
- bo->hdir = kobject_get(bdev->bd_part->holder_dir);
- if (!bo->hdir)
- goto fail_put_sdev;
-
- return 1;
-
-fail_put_sdev:
- kobject_put(bo->sdev);
-fail_put_hdev:
- kobject_put(bo->hdev);
-fail_put_sdir:
- kobject_put(bo->sdir);
-
- return 0;
-}
-
-/* Put references of related kobjects at once. */
-static void bd_holder_release_dirs(struct bd_holder *bo)
-{
- kobject_put(bo->hdir);
- kobject_put(bo->sdev);
- kobject_put(bo->hdev);
- kobject_put(bo->sdir);
-}
-
-static struct bd_holder *alloc_bd_holder(struct kobject *kobj)
-{
- struct bd_holder *bo;
-
- bo = kzalloc(sizeof(*bo), GFP_KERNEL);
- if (!bo)
- return NULL;
-
- bo->count = 1;
- bo->sdir = kobj;
-
- return bo;
-}
-
-static void free_bd_holder(struct bd_holder *bo)
-{
- kfree(bo);
-}
-
/**
- * find_bd_holder - find matching struct bd_holder from the block device
+ * bd_link_disk_holder - create symlinks between holding disk and slave bdev
+ * @bdev: the claimed slave bdev
+ * @disk: the holding disk
*
- * @bdev: struct block device to be searched
- * @bo: target struct bd_holder
- *
- * Returns matching entry with @bo in @bdev->bd_holder_list.
- * If found, increment the reference count and return the pointer.
- * If not found, returns NULL.
- */
-static struct bd_holder *find_bd_holder(struct block_device *bdev,
- struct bd_holder *bo)
-{
- struct bd_holder *tmp;
-
- list_for_each_entry(tmp, &bdev->bd_holder_list, list)
- if (tmp->sdir == bo->sdir) {
- tmp->count++;
- return tmp;
- }
-
- return NULL;
-}
-
-/**
- * add_bd_holder - create sysfs symlinks for bd_claim() relationship
- *
- * @bdev: block device to be bd_claimed
- * @bo: preallocated and initialized by alloc_bd_holder()
- *
- * Add @bo to @bdev->bd_holder_list, create symlinks.
- *
- * Returns 0 if symlinks are created.
- * Returns -ve if something fails.
- */
-static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo)
-{
- int err;
-
- if (!bo)
- return -EINVAL;
-
- if (!bd_holder_grab_dirs(bdev, bo))
- return -EBUSY;
-
- err = add_symlink(bo->sdir, bo->sdev);
- if (err)
- return err;
-
- err = add_symlink(bo->hdir, bo->hdev);
- if (err) {
- del_symlink(bo->sdir, bo->sdev);
- return err;
- }
-
- list_add_tail(&bo->list, &bdev->bd_holder_list);
- return 0;
-}
-
-/**
- * del_bd_holder - delete sysfs symlinks for bd_claim() relationship
+ * This functions creates the following sysfs symlinks.
*
- * @bdev: block device to be bd_claimed
- * @kobj: holder's kobject
+ * - from "slaves" directory of the holder @disk to the claimed @bdev
+ * - from "holders" directory of the @bdev to the holder @disk
*
- * If there is matching entry with @kobj in @bdev->bd_holder_list
- * and no other bd_claim() from the same kobject,
- * remove the struct bd_holder from the list, delete symlinks for it.
+ * For example, if /dev/dm-0 maps to /dev/sda and disk for dm-0 is
+ * passed to bd_link_disk_holder(), then:
*
- * Returns a pointer to the struct bd_holder when it's removed from the list
- * and ready to be freed.
- * Returns NULL if matching claim isn't found or there is other bd_claim()
- * by the same kobject.
- */
-static struct bd_holder *del_bd_holder(struct block_device *bdev,
- struct kobject *kobj)
-{
- struct bd_holder *bo;
-
- list_for_each_entry(bo, &bdev->bd_holder_list, list) {
- if (bo->sdir == kobj) {
- bo->count--;
- BUG_ON(bo->count < 0);
- if (!bo->count) {
- list_del(&bo->list);
- del_symlink(bo->sdir, bo->sdev);
- del_symlink(bo->hdir, bo->hdev);
- bd_holder_release_dirs(bo);
- return bo;
- }
- break;
- }
- }
-
- return NULL;
-}
-
-/**
- * bd_claim_by_kobject - bd_claim() with additional kobject signature
+ * /sys/block/dm-0/slaves/sda --> /sys/block/sda
+ * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
*
- * @bdev: block device to be claimed
- * @holder: holder's signature
- * @kobj: holder's kobject
+ * The caller must have claimed @bdev before calling this function and
+ * ensure that both @bdev and @disk are valid during the creation and
+ * lifetime of these symlinks.
*
- * Do bd_claim() and if it succeeds, create sysfs symlinks between
- * the bdev and the holder's kobject.
- * Use bd_release_from_kobject() when relesing the claimed bdev.
+ * CONTEXT:
+ * Might sleep.
*
- * Returns 0 on success. (same as bd_claim())
- * Returns errno on failure.
+ * RETURNS:
+ * 0 on success, -errno on failure.
*/
-static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
- struct kobject *kobj)
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
{
- int err;
- struct bd_holder *bo, *found;
-
- if (!kobj)
- return -EINVAL;
-
- bo = alloc_bd_holder(kobj);
- if (!bo)
- return -ENOMEM;
+ int ret = 0;
mutex_lock(&bdev->bd_mutex);
- err = bd_claim(bdev, holder);
- if (err)
- goto fail;
+ WARN_ON_ONCE(!bdev->bd_holder || bdev->bd_holder_disk);
- found = find_bd_holder(bdev, bo);
- if (found)
- goto fail;
+ /* FIXME: remove the following once add_disk() handles errors */
+ if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
+ goto out_unlock;
- err = add_bd_holder(bdev, bo);
- if (err)
- bd_release(bdev);
- else
- bo = NULL;
-fail:
- mutex_unlock(&bdev->bd_mutex);
- free_bd_holder(bo);
- return err;
-}
+ ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ if (ret)
+ goto out_unlock;
-/**
- * bd_release_from_kobject - bd_release() with additional kobject signature
- *
- * @bdev: block device to be released
- * @kobj: holder's kobject
- *
- * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject().
- */
-static void bd_release_from_kobject(struct block_device *bdev,
- struct kobject *kobj)
-{
- if (!kobj)
- return;
+ ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
+ if (ret) {
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ goto out_unlock;
+ }
- mutex_lock(&bdev->bd_mutex);
- bd_release(bdev);
- free_bd_holder(del_bd_holder(bdev, kobj));
+ bdev->bd_holder_disk = disk;
+out_unlock:
mutex_unlock(&bdev->bd_mutex);
+ return ret;
}
+EXPORT_SYMBOL_GPL(bd_link_disk_holder);
-/**
- * bd_claim_by_disk - wrapper function for bd_claim_by_kobject()
- *
- * @bdev: block device to be claimed
- * @holder: holder's signature
- * @disk: holder's gendisk
- *
- * Call bd_claim_by_kobject() with getting @disk->slave_dir.
- */
-int bd_claim_by_disk(struct block_device *bdev, void *holder,
- struct gendisk *disk)
+static void bd_unlink_disk_holder(struct block_device *bdev)
{
- return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir));
-}
-EXPORT_SYMBOL_GPL(bd_claim_by_disk);
+ struct gendisk *disk = bdev->bd_holder_disk;
-/**
- * bd_release_from_disk - wrapper function for bd_release_from_kobject()
- *
- * @bdev: block device to be claimed
- * @disk: holder's gendisk
- *
- * Call bd_release_from_kobject() and put @disk->slave_dir.
- */
-void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk)
-{
- bd_release_from_kobject(bdev, disk->slave_dir);
- kobject_put(disk->slave_dir);
-}
-EXPORT_SYMBOL_GPL(bd_release_from_disk);
-#endif
+ bdev->bd_holder_disk = NULL;
+ if (!disk)
+ return;
-/*
- * Tries to open block device by device number. Use it ONLY if you
- * really do not have anything better - i.e. when you are behind a
- * truly sucky interface and all you are given is a device number. _Never_
- * to be used for internal purposes. If you ever need it - reconsider
- * your API.
- */
-struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
-{
- struct block_device *bdev = bdget(dev);
- int err = -ENOMEM;
- if (bdev)
- err = blkdev_get(bdev, mode);
- return err ? ERR_PTR(err) : bdev;
+ del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
+ del_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
}
-
-EXPORT_SYMBOL(open_by_devnum);
+#else
+static inline void bd_unlink_disk_holder(struct block_device *bdev)
+{ }
+#endif
/**
* flush_disk - invalidates all buffer-cache entries on a disk
@@ -1309,10 +954,11 @@ int check_disk_change(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
const struct block_device_operations *bdops = disk->fops;
+ unsigned int events;
- if (!bdops->media_changed)
- return 0;
- if (!bdops->media_changed(bdev->bd_disk))
+ events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
+ DISK_EVENT_EJECT_REQUEST);
+ if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
flush_disk(bdev);
@@ -1475,17 +1121,171 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
return ret;
}
-int blkdev_get(struct block_device *bdev, fmode_t mode)
+/**
+ * blkdev_get - open a block device
+ * @bdev: block_device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is
+ * open with exclusive access. Specifying %FMODE_EXCL with %NULL
+ * @holder is invalid. Exclusive opens may nest for the same @holder.
+ *
+ * On success, the reference count of @bdev is unchanged. On failure,
+ * @bdev is put.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
- return __blkdev_get(bdev, mode, 0);
+ struct block_device *whole = NULL;
+ int res;
+
+ WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
+
+ if ((mode & FMODE_EXCL) && holder) {
+ whole = bd_start_claiming(bdev, holder);
+ if (IS_ERR(whole)) {
+ bdput(bdev);
+ return PTR_ERR(whole);
+ }
+ }
+
+ res = __blkdev_get(bdev, mode, 0);
+
+ /* __blkdev_get() may alter read only status, check it afterwards */
+ if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ __blkdev_put(bdev, mode, 0);
+ res = -EACCES;
+ }
+
+ if (whole) {
+ /* finish claiming */
+ mutex_lock(&bdev->bd_mutex);
+ spin_lock(&bdev_lock);
+
+ if (!res) {
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ /*
+ * Note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder
+ * will be set to bd_may_claim before being
+ * set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_may_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+ }
+
+ /* tell others that we're done */
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * Block event polling for write claims. Any write
+ * holder makes the write_holder state stick until all
+ * are released. This is good enough and tracking
+ * individual writeable reference is too fragile given
+ * the way @mode is used in blkdev_get/put().
+ */
+ if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+ bdev->bd_write_holder = true;
+ disk_block_events(bdev->bd_disk);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(whole);
+ }
+
+ return res;
}
EXPORT_SYMBOL(blkdev_get);
+/**
+ * blkdev_get_by_path - open a block device by name
+ * @path: path to the block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by the device file at @path. @mode
+ * and @holder are identical to blkdev_get().
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder)
+{
+ struct block_device *bdev;
+ int err;
+
+ bdev = lookup_bdev(path);
+ if (IS_ERR(bdev))
+ return bdev;
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_path);
+
+/**
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the blockdevice described by device number @dev. @mode and
+ * @holder are identical to blkdev_get().
+ *
+ * Use it ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a
+ * device number. _Never_ to be used for internal purposes. If you
+ * ever need it - reconsider your API.
+ *
+ * On success, the returned block_device has reference count of one.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Pointer to block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+{
+ struct block_device *bdev;
+ int err;
+
+ bdev = bdget(dev);
+ if (!bdev)
+ return ERR_PTR(-ENOMEM);
+
+ err = blkdev_get(bdev, mode, holder);
+ if (err)
+ return ERR_PTR(err);
+
+ return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_dev);
+
static int blkdev_open(struct inode * inode, struct file * filp)
{
- struct block_device *whole = NULL;
struct block_device *bdev;
- int res;
/*
* Preserve backwards compatibility and allow large file access
@@ -1506,26 +1306,9 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (bdev == NULL)
return -ENOMEM;
- if (filp->f_mode & FMODE_EXCL) {
- whole = bd_start_claiming(bdev, filp);
- if (IS_ERR(whole)) {
- bdput(bdev);
- return PTR_ERR(whole);
- }
- }
-
filp->f_mapping = bdev->bd_inode->i_mapping;
- res = blkdev_get(bdev, filp->f_mode);
-
- if (whole) {
- if (res == 0)
- bd_finish_claiming(bdev, whole, filp);
- else
- bd_abort_claiming(whole, filp);
- }
-
- return res;
+ return blkdev_get(bdev, filp->f_mode, filp);
}
static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
@@ -1539,6 +1322,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_part_count--;
if (!--bdev->bd_openers) {
+ WARN_ON_ONCE(bdev->bd_holders);
sync_blockdev(bdev);
kill_bdev(bdev);
}
@@ -1569,6 +1353,45 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
int blkdev_put(struct block_device *bdev, fmode_t mode)
{
+ if (mode & FMODE_EXCL) {
+ bool bdev_free;
+
+ /*
+ * Release a claim on the device. The holder fields
+ * are protected with bdev_lock. bd_mutex is to
+ * synchronize disk_holder unlinking.
+ */
+ mutex_lock(&bdev->bd_mutex);
+ spin_lock(&bdev_lock);
+
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
+
+ /* bd_contains might point to self, check in a separate step */
+ if ((bdev_free = !bdev->bd_holders))
+ bdev->bd_holder = NULL;
+ if (!bdev->bd_contains->bd_holders)
+ bdev->bd_contains->bd_holder = NULL;
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and
+ * unblock evpoll if it was a write holder.
+ */
+ if (bdev_free) {
+ bd_unlink_disk_holder(bdev);
+ if (bdev->bd_write_holder) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
+ } else
+ disk_check_events(bdev->bd_disk);
+ }
+
+ mutex_unlock(&bdev->bd_mutex);
+ } else
+ disk_check_events(bdev->bd_disk);
+
return __blkdev_put(bdev, mode, 0);
}
EXPORT_SYMBOL(blkdev_put);
@@ -1576,8 +1399,7 @@ EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
- if (bdev->bd_holder == filp)
- bd_release(bdev);
+
return blkdev_put(bdev, filp->f_mode);
}
@@ -1722,67 +1544,6 @@ fail:
}
EXPORT_SYMBOL(lookup_bdev);
-/**
- * open_bdev_exclusive - open a block device by name and set it up for use
- *
- * @path: special file representing the block device
- * @mode: FMODE_... combination to pass be used
- * @holder: owner for exclusion
- *
- * Open the blockdevice described by the special file at @path, claim it
- * for the @holder.
- */
-struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
-{
- struct block_device *bdev, *whole;
- int error;
-
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return bdev;
-
- whole = bd_start_claiming(bdev, holder);
- if (IS_ERR(whole)) {
- bdput(bdev);
- return whole;
- }
-
- error = blkdev_get(bdev, mode);
- if (error)
- goto out_abort_claiming;
-
- error = -EACCES;
- if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
- goto out_blkdev_put;
-
- bd_finish_claiming(bdev, whole, holder);
- return bdev;
-
-out_blkdev_put:
- blkdev_put(bdev, mode);
-out_abort_claiming:
- bd_abort_claiming(whole, holder);
- return ERR_PTR(error);
-}
-
-EXPORT_SYMBOL(open_bdev_exclusive);
-
-/**
- * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
- *
- * @bdev: blockdevice to close
- * @mode: mode, must match that used to open.
- *
- * This is the counterpart to open_bdev_exclusive().
- */
-void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
-{
- bd_release(bdev);
- blkdev_put(bdev, mode);
-}
-
-EXPORT_SYMBOL(close_bdev_exclusive);
-
int __invalidate_device(struct block_device *bdev)
{
struct super_block *sb = get_super(bdev);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6b9884507837..1718e1a5c320 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -493,7 +493,7 @@ again:
continue;
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
fs_devices->open_devices--;
}
@@ -527,7 +527,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
fs_devices->open_devices--;
}
if (device->writeable) {
@@ -584,13 +584,15 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int seeding = 1;
int ret = 0;
+ flags |= FMODE_EXCL;
+
list_for_each_entry(device, head, dev_list) {
if (device->bdev)
continue;
if (!device->name)
continue;
- bdev = open_bdev_exclusive(device->name, flags, holder);
+ bdev = blkdev_get_by_path(device->name, flags, holder);
if (IS_ERR(bdev)) {
printk(KERN_INFO "open %s failed\n", device->name);
goto error;
@@ -642,7 +644,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
error_brelse:
brelse(bh);
error_close:
- close_bdev_exclusive(bdev, FMODE_READ);
+ blkdev_put(bdev, flags);
error:
continue;
}
@@ -688,7 +690,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
mutex_lock(&uuid_mutex);
- bdev = open_bdev_exclusive(path, flags, holder);
+ flags |= FMODE_EXCL;
+ bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
@@ -720,7 +723,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
brelse(bh);
error_close:
- close_bdev_exclusive(bdev, flags);
+ blkdev_put(bdev, flags);
error:
mutex_unlock(&uuid_mutex);
return ret;
@@ -1183,8 +1186,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
goto out;
}
} else {
- bdev = open_bdev_exclusive(device_path, FMODE_READ,
- root->fs_info->bdev_holder);
+ bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
+ root->fs_info->bdev_holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
goto out;
@@ -1251,7 +1254,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->latest_bdev = next_device->bdev;
if (device->bdev) {
- close_bdev_exclusive(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
device->fs_devices->open_devices--;
}
@@ -1294,7 +1297,7 @@ error_brelse:
brelse(bh);
error_close:
if (bdev)
- close_bdev_exclusive(bdev, FMODE_READ);
+ blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
@@ -1446,7 +1449,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
return -EINVAL;
- bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
+ bdev = blkdev_get_by_path(device_path, FMODE_EXCL,
+ root->fs_info->bdev_holder);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
@@ -1572,7 +1576,7 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
error:
- close_bdev_exclusive(bdev, 0);
+ blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev) {
mutex_unlock(&uuid_mutex);
up_write(&sb->s_umount);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2740db49eb04..1be781079450 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -50,7 +50,7 @@ struct btrfs_device {
struct block_device *bdev;
- /* the mode sent to open_bdev_exclusive */
+ /* the mode sent to blkdev_get */
fmode_t mode;
char *name;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 6e99b9ddd4e9..dca9e5e0f73b 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -59,7 +59,7 @@ static struct char_device_struct {
} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
/* index in the above */
-static inline int major_to_index(int major)
+static inline int major_to_index(unsigned major)
{
return major % CHRDEV_MAJOR_HASH_SIZE;
}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b7d0554631e4..7aa767d4f06f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -364,7 +364,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
goto fail;
return bdev;
@@ -381,8 +381,7 @@ fail:
*/
static int ext3_blkdev_put(struct block_device *bdev)
{
- bd_release(bdev);
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@ -2162,13 +2161,6 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
if (bdev == NULL)
return NULL;
- if (bd_claim(bdev, sb)) {
- ext3_msg(sb, KERN_ERR,
- "error: failed to claim external journal device");
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return NULL;
- }
-
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 29c80f6d8b27..cb10a06775e4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -657,7 +657,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
struct block_device *bdev;
char b[BDEVNAME_SIZE];
- bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
goto fail;
return bdev;
@@ -673,8 +673,7 @@ fail:
*/
static int ext4_blkdev_put(struct block_device *bdev)
{
- bd_release(bdev);
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -3778,13 +3777,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if (bdev == NULL)
return NULL;
- if (bd_claim(bdev, sb)) {
- ext4_msg(sb, KERN_ERR,
- "failed to claim external journal device");
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
- return NULL;
- }
-
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 693f4470a2df..777927ce6f79 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1268,7 +1268,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
{
struct block_device *bdev;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
int error;
struct gfs2_args args;
struct gfs2_sbd *sdp;
@@ -1276,7 +1276,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
@@ -1298,7 +1298,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
goto error_bdev;
if (s->s_root)
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
@@ -1342,7 +1342,7 @@ error_super:
deactivate_locked_super(s);
return ERR_PTR(error);
error_bdev:
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
return ERR_PTR(error);
}
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e1b8493b9aaa..278e3fb40b71 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1120,16 +1120,13 @@ int lmLogOpen(struct super_block *sb)
* file systems to log may have n-to-1 relationship;
*/
- bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE);
+ bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ log);
if (IS_ERR(bdev)) {
rc = -PTR_ERR(bdev);
goto free;
}
- if ((rc = bd_claim(bdev, log))) {
- goto close;
- }
-
log->bdev = bdev;
memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid));
@@ -1137,7 +1134,7 @@ int lmLogOpen(struct super_block *sb)
* initialize log:
*/
if ((rc = lmLogInit(log)))
- goto unclaim;
+ goto close;
list_add(&log->journal_list, &jfs_external_logs);
@@ -1163,11 +1160,8 @@ journal_found:
list_del(&log->journal_list);
lbmLogShutdown(log);
- unclaim:
- bd_release(bdev);
-
close: /* close external log device */
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
free: /* free log descriptor */
mutex_unlock(&jfs_log_mutex);
@@ -1512,8 +1506,7 @@ int lmLogClose(struct super_block *sb)
bdev = log->bdev;
rc = lmLogShutdown(log);
- bd_release(bdev);
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
kfree(log);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 92ca6fbe09bd..723bc5bca09a 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -300,7 +300,7 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
static void bdev_put_device(struct logfs_super *s)
{
- close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -325,13 +325,14 @@ int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
{
struct block_device *bdev;
- bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, type);
+ bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ type);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
int mtdnr = MINOR(bdev->bd_dev);
- close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
return logfs_get_sb_mtd(p, mtdnr);
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 3a359023c9f7..230b79fbf005 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -845,11 +845,6 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
struct page *page = buf->page;
size_t size;
- int ret;
-
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
size = sd->len;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 70dfdd532b83..0994f6a76c07 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1163,14 +1163,14 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
{
struct nilfs_super_data sd;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
struct dentry *root_dentry;
int err, s_new = false;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(sd.bdev))
return ERR_CAST(sd.bdev);
@@ -1249,7 +1249,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
}
if (!s_new)
- close_bdev_exclusive(sd.bdev, mode);
+ blkdev_put(sd.bdev, mode);
return root_dentry;
@@ -1258,7 +1258,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
failed:
if (!s_new)
- close_bdev_exclusive(sd.bdev, mode);
+ blkdev_put(sd.bdev, mode);
return ERR_PTR(err);
}
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a6cc05302e9f..b108e863d8f6 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1729,7 +1729,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
goto out;
reg->hr_bdev = I_BDEV(filp->f_mapping->host);
- ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
+ ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
if (ret) {
reg->hr_bdev = NULL;
goto out;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0a8b0ad0c7e2..9c21119512b9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,6 +237,13 @@ ssize_t part_size_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
+ssize_t part_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+ return sprintf(buf, "%d\n", p->policy ? 1 : 0);
+}
+
ssize_t part_alignment_offset_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -312,6 +319,7 @@ ssize_t part_fail_store(struct device *dev,
static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
+static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
NULL);
@@ -326,6 +334,7 @@ static struct attribute *part_attrs[] = {
&dev_attr_partition.attr,
&dev_attr_start.attr,
&dev_attr_size.attr,
+ &dev_attr_ro.attr,
&dev_attr_alignment_offset.attr,
&dev_attr_discard_alignment.attr,
&dev_attr_stat.attr,
@@ -372,6 +381,11 @@ static void delete_partition_rcu_cb(struct rcu_head *head)
put_device(part_to_dev(part));
}
+void __delete_partition(struct hd_struct *part)
+{
+ call_rcu(&part->rcu_head, delete_partition_rcu_cb);
+}
+
void delete_partition(struct gendisk *disk, int partno)
{
struct disk_part_tbl *ptbl = disk->part_tbl;
@@ -390,7 +404,7 @@ void delete_partition(struct gendisk *disk, int partno)
kobject_put(part->holder_dir);
device_del(part_to_dev(part));
- call_rcu(&part->rcu_head, delete_partition_rcu_cb);
+ hd_struct_put(part);
}
static ssize_t whole_disk_show(struct device *dev,
@@ -489,6 +503,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
if (!dev_get_uevent_suppress(ddev))
kobject_uevent(&pdev->kobj, KOBJ_ADD);
+ hd_ref_init(p);
return p;
out_free_info:
@@ -507,65 +522,6 @@ out_put:
return ERR_PTR(err);
}
-/* Not exported, helper to add_disk(). */
-void register_disk(struct gendisk *disk)
-{
- struct device *ddev = disk_to_dev(disk);
- struct block_device *bdev;
- struct disk_part_iter piter;
- struct hd_struct *part;
- int err;
-
- ddev->parent = disk->driverfs_dev;
-
- dev_set_name(ddev, disk->disk_name);
-
- /* delay uevents, until we scanned partition table */
- dev_set_uevent_suppress(ddev, 1);
-
- if (device_add(ddev))
- return;
- if (!sysfs_deprecated) {
- err = sysfs_create_link(block_depr, &ddev->kobj,
- kobject_name(&ddev->kobj));
- if (err) {
- device_del(ddev);
- return;
- }
- }
- disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
- disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
-
- /* No minors to use for partitions */
- if (!disk_partitionable(disk))
- goto exit;
-
- /* No such device (e.g., media were just removed) */
- if (!get_capacity(disk))
- goto exit;
-
- bdev = bdget_disk(disk, 0);
- if (!bdev)
- goto exit;
-
- bdev->bd_invalidated = 1;
- err = blkdev_get(bdev, FMODE_READ);
- if (err < 0)
- goto exit;
- blkdev_put(bdev, FMODE_READ);
-
-exit:
- /* announce disk after possible partitions are created */
- dev_set_uevent_suppress(ddev, 0);
- kobject_uevent(&ddev->kobj, KOBJ_ADD);
-
- /* announce possible partitions */
- disk_part_iter_init(&piter, disk, 0);
- while ((part = disk_part_iter_next(&piter)))
- kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
- disk_part_iter_exit(&piter);
-}
-
static bool disk_unlock_native_capacity(struct gendisk *disk)
{
const struct block_device_operations *bdops = disk->fops;
@@ -728,33 +684,3 @@ fail:
}
EXPORT_SYMBOL(read_dev_sector);
-
-void del_gendisk(struct gendisk *disk)
-{
- struct disk_part_iter piter;
- struct hd_struct *part;
-
- /* invalidate stuff */
- disk_part_iter_init(&piter, disk,
- DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
- while ((part = disk_part_iter_next(&piter))) {
- invalidate_partition(disk, part->partno);
- delete_partition(disk, part->partno);
- }
- disk_part_iter_exit(&piter);
-
- invalidate_partition(disk, 0);
- blk_free_devt(disk_to_dev(disk)->devt);
- set_capacity(disk, 0);
- disk->flags &= ~GENHD_FL_UP;
- unlink_gendisk(disk);
- part_stat_set_all(&disk->part0, 0);
- disk->part0.stamp = 0;
-
- kobject_put(disk->part0.holder_dir);
- kobject_put(disk->slave_dir);
- disk->driverfs_dev = NULL;
- if (!sysfs_deprecated)
- sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
- device_del(disk_to_dev(disk));
-}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index d31bce1a9f90..3eea859e6990 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2551,8 +2551,6 @@ static int release_journal_dev(struct super_block *super,
result = 0;
if (journal->j_dev_bd != NULL) {
- if (journal->j_dev_bd->bd_dev != super->s_dev)
- bd_release(journal->j_dev_bd);
result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
journal->j_dev_bd = NULL;
}
@@ -2570,7 +2568,7 @@ static int journal_init_dev(struct super_block *super,
{
int result;
dev_t jdev;
- fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
+ fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
char b[BDEVNAME_SIZE];
result = 0;
@@ -2584,7 +2582,10 @@ static int journal_init_dev(struct super_block *super,
/* there is no "jdev" option and journal is on separate device */
if ((!jdev_name || !jdev_name[0])) {
- journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
+ if (jdev == super->s_dev)
+ blkdev_mode &= ~FMODE_EXCL;
+ journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
+ journal);
journal->j_dev_mode = blkdev_mode;
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
@@ -2593,22 +2594,14 @@ static int journal_init_dev(struct super_block *super,
"cannot init journal device '%s': %i",
__bdevname(jdev, b), result);
return result;
- } else if (jdev != super->s_dev) {
- result = bd_claim(journal->j_dev_bd, journal);
- if (result) {
- blkdev_put(journal->j_dev_bd, blkdev_mode);
- return result;
- }
-
+ } else if (jdev != super->s_dev)
set_blocksize(journal->j_dev_bd, super->s_blocksize);
- }
return 0;
}
journal->j_dev_mode = blkdev_mode;
- journal->j_dev_bd = open_bdev_exclusive(jdev_name,
- blkdev_mode, journal);
+ journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
if (IS_ERR(journal->j_dev_bd)) {
result = PTR_ERR(journal->j_dev_bd);
journal->j_dev_bd = NULL;
diff --git a/fs/splice.c b/fs/splice.c
index ce2f02579e35..50a5d978da16 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -682,19 +682,14 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
{
struct file *file = sd->u.file;
loff_t pos = sd->pos;
- int ret, more;
-
- ret = buf->ops->confirm(pipe, buf);
- if (!ret) {
- more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
- if (file->f_op && file->f_op->sendpage)
- ret = file->f_op->sendpage(file, buf->page, buf->offset,
- sd->len, &pos, more);
- else
- ret = -EINVAL;
- }
+ int more;
- return ret;
+ if (!likely(file->f_op && file->f_op->sendpage))
+ return -EINVAL;
+
+ more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+ return file->f_op->sendpage(file, buf->page, buf->offset,
+ sd->len, &pos, more);
}
/*
@@ -727,13 +722,6 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
void *fsdata;
int ret;
- /*
- * make sure the data in this buffer is uptodate
- */
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
-
offset = sd->pos & ~PAGE_CACHE_MASK;
this_len = sd->len;
@@ -805,12 +793,17 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
if (sd->len > sd->total_len)
sd->len = sd->total_len;
- ret = actor(pipe, buf, sd);
- if (ret <= 0) {
+ ret = buf->ops->confirm(pipe, buf);
+ if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
return ret;
}
+
+ ret = actor(pipe, buf, sd);
+ if (ret <= 0)
+ return ret;
+
buf->offset += ret;
buf->len -= ret;
@@ -1044,10 +1037,6 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
int ret;
void *data;
- ret = buf->ops->confirm(pipe, buf);
- if (ret)
- return ret;
-
data = buf->ops->map(pipe, buf, 0);
ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
buf->ops->unmap(pipe, buf, data);
@@ -1495,10 +1484,6 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
char *src;
int ret;
- ret = buf->ops->confirm(pipe, buf);
- if (unlikely(ret))
- return ret;
-
/*
* See if we can use the atomic maps, by prefaulting in the
* pages and doing an atomic copy
diff --git a/fs/super.c b/fs/super.c
index 823e061faa87..4f6a3571a634 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -767,13 +767,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
{
struct block_device *bdev;
struct super_block *s;
- fmode_t mode = FMODE_READ;
+ fmode_t mode = FMODE_READ | FMODE_EXCL;
int error = 0;
if (!(flags & MS_RDONLY))
mode |= FMODE_WRITE;
- bdev = open_bdev_exclusive(dev_name, mode, fs_type);
+ bdev = blkdev_get_by_path(dev_name, mode, fs_type);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
@@ -802,13 +802,13 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
/*
* s_umount nests inside bd_mutex during
- * __invalidate_device(). close_bdev_exclusive()
- * acquires bd_mutex and can't be called under
- * s_umount. Drop s_umount temporarily. This is safe
- * as we're holding an active reference.
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
*/
up_write(&s->s_umount);
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
down_write(&s->s_umount);
} else {
char b[BDEVNAME_SIZE];
@@ -832,7 +832,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
error_s:
error = PTR_ERR(s);
error_bdev:
- close_bdev_exclusive(bdev, mode);
+ blkdev_put(bdev, mode);
error:
return ERR_PTR(error);
}
@@ -863,7 +863,8 @@ void kill_block_super(struct super_block *sb)
bdev->bd_super = NULL;
generic_shutdown_super(sb);
sync_blockdev(bdev);
- close_bdev_exclusive(bdev, mode);
+ WARN_ON_ONCE(!(mode & FMODE_EXCL));
+ blkdev_put(bdev, mode | FMODE_EXCL);
}
EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a10f6416e563..bd07f7339366 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -606,7 +606,8 @@ xfs_blkdev_get(
{
int error = 0;
- *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
+ *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+ mp);
if (IS_ERR(*bdevp)) {
error = PTR_ERR(*bdevp);
printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -620,7 +621,7 @@ xfs_blkdev_put(
struct block_device *bdev)
{
if (bdev)
- close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 36ab42c9bb99..4d18ff34670a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -115,6 +115,7 @@ struct request {
void *elevator_private3;
struct gendisk *rq_disk;
+ struct hd_struct *part;
unsigned long start_time;
#ifdef CONFIG_BLK_CGROUP
unsigned long long start_time_ns;
@@ -646,7 +647,6 @@ static inline void rq_flush_dcache_pages(struct request *rq)
extern int blk_register_queue(struct gendisk *disk);
extern void blk_unregister_queue(struct gendisk *disk);
-extern void register_disk(struct gendisk *dev);
extern void generic_make_request(struct bio *bio);
extern void blk_rq_init(struct request_queue *q, struct request *rq);
extern void blk_put_request(struct request *);
@@ -1256,6 +1256,9 @@ struct block_device_operations {
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
void **, unsigned long *);
+ unsigned int (*check_events) (struct gendisk *disk,
+ unsigned int clearing);
+ /* ->media_changed() is DEPRECATED, use ->check_events() instead */
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 78e904796622..35eae4b67503 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -946,6 +946,8 @@ struct cdrom_device_info {
/* device-related storage */
unsigned int options : 30; /* options flags */
unsigned mc_flags : 2; /* media change buffer flags */
+ unsigned int vfs_events; /* cached events for vfs path */
+ unsigned int ioctl_events; /* cached events for ioctl path */
int use_count; /* number of times device opened */
char name[20]; /* name of the device type */
/* per-device flags */
@@ -965,6 +967,8 @@ struct cdrom_device_ops {
int (*open) (struct cdrom_device_info *, int);
void (*release) (struct cdrom_device_info *);
int (*drive_status) (struct cdrom_device_info *, int);
+ unsigned int (*check_events) (struct cdrom_device_info *cdi,
+ unsigned int clearing, int slot);
int (*media_changed) (struct cdrom_device_info *, int);
int (*tray_move) (struct cdrom_device_info *, int);
int (*lock_door) (struct cdrom_device_info *, int);
@@ -993,6 +997,8 @@ extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
fmode_t mode, unsigned int cmd, unsigned long arg);
+extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
+ unsigned int clearing);
extern int cdrom_media_changed(struct cdrom_device_info *);
extern int register_cdrom(struct cdrom_device_info *cdi);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c0701288d204..3984f2358d1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -664,8 +664,9 @@ struct block_device {
void * bd_claiming;
void * bd_holder;
int bd_holders;
+ bool bd_write_holder;
#ifdef CONFIG_SYSFS
- struct list_head bd_holder_list;
+ struct gendisk * bd_holder_disk; /* for sysfs slave linkng */
#endif
struct block_device * bd_contains;
unsigned bd_block_size;
@@ -2019,7 +2020,6 @@ extern struct block_device *bdgrab(struct block_device *bdev);
extern void bd_set_size(struct block_device *, loff_t size);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
-extern struct block_device *open_by_devnum(dev_t, fmode_t);
extern void invalidate_bdev(struct block_device *);
extern int sync_blockdev(struct block_device *bdev);
extern struct super_block *freeze_bdev(struct block_device *);
@@ -2050,16 +2050,20 @@ extern const struct file_operations def_fifo_fops;
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *, fmode_t);
-extern int blkdev_put(struct block_device *, fmode_t);
-extern int bd_claim(struct block_device *, void *);
-extern void bd_release(struct block_device *);
+extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder);
+extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
+ void *holder);
+extern int blkdev_put(struct block_device *bdev, fmode_t mode);
#ifdef CONFIG_SYSFS
-extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *);
-extern void bd_release_from_disk(struct block_device *, struct gendisk *);
+extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
#else
-#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder)
-#define bd_release_from_disk(bdev, disk) bd_release(bdev)
+static inline int bd_link_disk_holder(struct block_device *bdev,
+ struct gendisk *disk)
+{
+ return 0;
+}
#endif
#endif
@@ -2095,8 +2099,6 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
-extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
-extern void close_bdev_exclusive(struct block_device *, fmode_t);
extern void blkdev_show(struct seq_file *,off_t);
#else
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7a7b9c1644e4..c0d5f6945c1e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -115,6 +115,7 @@ struct hd_struct {
#else
struct disk_stats dkstats;
#endif
+ atomic_t ref;
struct rcu_head rcu_head;
};
@@ -127,6 +128,11 @@ struct hd_struct {
#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
#define GENHD_FL_NATIVE_CAPACITY 128
+enum {
+ DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */
+ DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */
+};
+
#define BLK_SCSI_MAX_CMDS (256)
#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
@@ -143,6 +149,8 @@ struct disk_part_tbl {
struct hd_struct __rcu *part[];
};
+struct disk_events;
+
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().
@@ -154,6 +162,10 @@ struct gendisk {
char disk_name[DISK_NAME_LEN]; /* name of major driver */
char *(*devnode)(struct gendisk *gd, mode_t *mode);
+
+ unsigned int events; /* supported events */
+ unsigned int async_events; /* async events, subset of all */
+
/* Array of pointers to partitions indexed by partno.
* Protected with matching bdev lock but stat and other
* non-critical accesses use RCU. Always access through
@@ -171,9 +183,8 @@ struct gendisk {
struct kobject *slave_dir;
struct timer_rand_state *random;
-
atomic_t sync_io; /* RAID */
- struct work_struct async_notify;
+ struct disk_events *ev;
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *integrity;
#endif
@@ -395,7 +406,6 @@ extern void part_round_stats(int cpu, struct hd_struct *part);
/* block/genhd.c */
extern void add_disk(struct gendisk *disk);
extern void del_gendisk(struct gendisk *gp);
-extern void unlink_gendisk(struct gendisk *gp);
extern struct gendisk *get_gendisk(dev_t dev, int *partno);
extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
@@ -407,6 +417,11 @@ static inline int get_disk_ro(struct gendisk *disk)
return disk->part0.policy;
}
+extern void disk_block_events(struct gendisk *disk);
+extern void disk_unblock_events(struct gendisk *disk);
+extern void disk_check_events(struct gendisk *disk);
+extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
+
/* drivers/char/random.c */
extern void add_disk_randomness(struct gendisk *disk);
extern void rand_initialize_disk(struct gendisk *disk);
@@ -583,6 +598,7 @@ extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
sector_t len, int flags,
struct partition_meta_info
*info);
+extern void __delete_partition(struct hd_struct *);
extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
@@ -611,6 +627,29 @@ extern ssize_t part_fail_store(struct device *dev,
const char *buf, size_t count);
#endif /* CONFIG_FAIL_MAKE_REQUEST */
+static inline void hd_ref_init(struct hd_struct *part)
+{
+ atomic_set(&part->ref, 1);
+ smp_mb();
+}
+
+static inline void hd_struct_get(struct hd_struct *part)
+{
+ atomic_inc(&part->ref);
+ smp_mb__after_atomic_inc();
+}
+
+static inline int hd_struct_try_get(struct hd_struct *part)
+{
+ return atomic_inc_not_zero(&part->ref);
+}
+
+static inline void hd_struct_put(struct hd_struct *part)
+{
+ if (atomic_dec_and_test(&part->ref))
+ __delete_partition(part);
+}
+
#else /* CONFIG_BLOCK */
static inline void printk_all_partitions(void) { }
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 1651fef18831..648d23358038 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -104,6 +104,7 @@ struct scsi_cmnd;
#define UNMAP 0x42
#define READ_TOC 0x43
#define READ_HEADER 0x44
+#define GET_EVENT_STATUS_NOTIFICATION 0x4a
#define LOG_SELECT 0x4c
#define LOG_SENSE 0x4d
#define XDWRITEREAD_10 0x53
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d8ce278515c3..aba421d68f6f 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -206,15 +206,16 @@ TRACE_EVENT(block_bio_bounce,
* block_bio_complete - completed all work on the block operation
* @q: queue holding the block operation
* @bio: block operation completed
+ * @error: io error value
*
* This tracepoint indicates there is no further work to do on this
* block IO operation @bio.
*/
TRACE_EVENT(block_bio_complete,
- TP_PROTO(struct request_queue *q, struct bio *bio),
+ TP_PROTO(struct request_queue *q, struct bio *bio, int error),
- TP_ARGS(q, bio),
+ TP_ARGS(q, bio, error),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -228,6 +229,7 @@ TRACE_EVENT(block_bio_complete,
__entry->dev = bio->bi_bdev->bd_dev;
__entry->sector = bio->bi_sector;
__entry->nr_sector = bio->bi_size >> 9;
+ __entry->error = error;
blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
),
@@ -486,16 +488,16 @@ TRACE_EVENT(block_split,
);
/**
- * block_remap - map request for a partition to the raw device
+ * block_bio_remap - map request for a logical device to the raw device
* @q: queue holding the operation
* @bio: revised operation
* @dev: device for the operation
* @from: original sector for the operation
*
- * An operation for a partition on a block device has been mapped to the
+ * An operation for a logical device has been mapped to the
* raw block device.
*/
-TRACE_EVENT(block_remap,
+TRACE_EVENT(block_bio_remap,
TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
sector_t from),
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 69425889bd40..7c97c3a0eee3 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -224,7 +224,7 @@ static int swsusp_swap_check(void)
return res;
root_swap = res;
- res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
if (res)
return res;
@@ -930,7 +930,8 @@ int swsusp_check(void)
{
int error;
- hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+ hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+ FMODE_READ, NULL);
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7b8ec0281548..153562d0b93c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -758,53 +758,58 @@ static void blk_add_trace_rq_complete(void *ignore,
* @q: queue the io is for
* @bio: the source bio
* @what: the action
+ * @error: error, if any
*
* Description:
* Records an action against a bio. Will log the bio offset + size.
*
**/
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what)
+ u32 what, int error)
{
struct blk_trace *bt = q->blk_trace;
if (likely(!bt))
return;
+ if (!error && !bio_flagged(bio, BIO_UPTODATE))
+ error = EIO;
+
__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
- !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+ error, 0, NULL);
}
static void blk_add_trace_bio_bounce(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+ blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
}
static void blk_add_trace_bio_complete(void *ignore,
- struct request_queue *q, struct bio *bio)
+ struct request_queue *q, struct bio *bio,
+ int error)
{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
}
static void blk_add_trace_bio_backmerge(void *ignore,
struct request_queue *q,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
}
static void blk_add_trace_bio_frontmerge(void *ignore,
struct request_queue *q,
struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
}
static void blk_add_trace_bio_queue(void *ignore,
struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
}
static void blk_add_trace_getrq(void *ignore,
@@ -812,7 +817,7 @@ static void blk_add_trace_getrq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+ blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
else {
struct blk_trace *bt = q->blk_trace;
@@ -827,7 +832,7 @@ static void blk_add_trace_sleeprq(void *ignore,
struct bio *bio, int rw)
{
if (bio)
- blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+ blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
else {
struct blk_trace *bt = q->blk_trace;
@@ -887,7 +892,7 @@ static void blk_add_trace_split(void *ignore,
}
/**
- * blk_add_trace_remap - Add a trace for a remap operation
+ * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
* @ignore: trace callback data parameter (not used)
* @q: queue the io is for
* @bio: the source bio
@@ -899,9 +904,9 @@ static void blk_add_trace_split(void *ignore,
* it spans a stripe (or similar). Add a trace for that action.
*
**/
-static void blk_add_trace_remap(void *ignore,
- struct request_queue *q, struct bio *bio,
- dev_t dev, sector_t from)
+static void blk_add_trace_bio_remap(void *ignore,
+ struct request_queue *q, struct bio *bio,
+ dev_t dev, sector_t from)
{
struct blk_trace *bt = q->blk_trace;
struct blk_io_trace_remap r;
@@ -1016,7 +1021,7 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_split(blk_add_trace_split, NULL);
WARN_ON(ret);
- ret = register_trace_block_remap(blk_add_trace_remap, NULL);
+ ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
WARN_ON(ret);
ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
WARN_ON(ret);
@@ -1025,7 +1030,7 @@ static void blk_register_tracepoints(void)
static void blk_unregister_tracepoints(void)
{
unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
- unregister_trace_block_remap(blk_add_trace_remap, NULL);
+ unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 67ddaaf98c74..b6adcfbf6f48 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1677,7 +1677,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
if (S_ISBLK(inode->i_mode)) {
struct block_device *bdev = I_BDEV(inode);
set_blocksize(bdev, p->old_block_size);
- bd_release(bdev);
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
} else {
mutex_lock(&inode->i_mutex);
inode->i_flags &= ~S_SWAPFILE;
@@ -1939,7 +1939,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = -EINVAL;
if (S_ISBLK(inode->i_mode)) {
bdev = I_BDEV(inode);
- error = bd_claim(bdev, sys_swapon);
+ error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+ sys_swapon);
if (error < 0) {
bdev = NULL;
error = -EINVAL;
@@ -2136,7 +2137,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
bad_swap:
if (bdev) {
set_blocksize(bdev, p->old_block_size);
- bd_release(bdev);
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
destroy_swap_extents(p);
swap_cgroup_swapoff(type);