mirror of
https://gitee.com/bianbu-linux/linux-6.6
synced 2025-04-26 14:17:26 -04:00
dm thin: add read only and fail io modes
Add read-only and fail-io modes to thin provisioning. If a transaction commit fails the pool's metadata device will transition to "read-only" mode. If a commit fails once already in read-only mode the transition to "fail-io" mode occurs. Once in fail-io mode the pool and all associated thin devices will report a status of "Fail". Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
This commit is contained in:
parent
da105ed5fd
commit
e49e582965
2 changed files with 347 additions and 99 deletions
|
@ -231,6 +231,9 @@ i) Constructor
|
||||||
no_discard_passdown: Don't pass discards down to the underlying
|
no_discard_passdown: Don't pass discards down to the underlying
|
||||||
data device, but just remove the mapping.
|
data device, but just remove the mapping.
|
||||||
|
|
||||||
|
read_only: Don't allow any changes to be made to the pool
|
||||||
|
metadata.
|
||||||
|
|
||||||
Data block size must be between 64KB (128 sectors) and 1GB
|
Data block size must be between 64KB (128 sectors) and 1GB
|
||||||
(2097152 sectors) inclusive.
|
(2097152 sectors) inclusive.
|
||||||
|
|
||||||
|
@ -239,7 +242,7 @@ ii) Status
|
||||||
|
|
||||||
<transaction id> <used metadata blocks>/<total metadata blocks>
|
<transaction id> <used metadata blocks>/<total metadata blocks>
|
||||||
<used data blocks>/<total data blocks> <held metadata root>
|
<used data blocks>/<total data blocks> <held metadata root>
|
||||||
|
[no_]discard_passdown ro|rw
|
||||||
|
|
||||||
transaction id:
|
transaction id:
|
||||||
A 64-bit number used by userspace to help synchronise with metadata
|
A 64-bit number used by userspace to help synchronise with metadata
|
||||||
|
@ -257,6 +260,21 @@ ii) Status
|
||||||
held root. This feature is not yet implemented so '-' is
|
held root. This feature is not yet implemented so '-' is
|
||||||
always returned.
|
always returned.
|
||||||
|
|
||||||
|
discard_passdown|no_discard_passdown
|
||||||
|
Whether or not discards are actually being passed down to the
|
||||||
|
underlying device. When this is enabled when loading the table,
|
||||||
|
it can get disabled if the underlying device doesn't support it.
|
||||||
|
|
||||||
|
ro|rw
|
||||||
|
If the pool encounters certain types of device failures it will
|
||||||
|
drop into a read-only metadata mode in which no changes to
|
||||||
|
the pool metadata (like allocating new blocks) are permitted.
|
||||||
|
|
||||||
|
In serious cases where even a read-only mode is deemed unsafe
|
||||||
|
no further I/O will be permitted and the status will just
|
||||||
|
contain the string 'Fail'. The userspace recovery tools
|
||||||
|
should then be used.
|
||||||
|
|
||||||
iii) Messages
|
iii) Messages
|
||||||
|
|
||||||
create_thin <dev id>
|
create_thin <dev id>
|
||||||
|
@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool.
|
||||||
ii) Status
|
ii) Status
|
||||||
|
|
||||||
<nr mapped sectors> <highest mapped sector>
|
<nr mapped sectors> <highest mapped sector>
|
||||||
|
|
||||||
|
If the pool has encountered device errors and failed, the status
|
||||||
|
will just contain the string 'Fail'. The userspace recovery
|
||||||
|
tools should then be used.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2011 Red Hat UK.
|
* Copyright (C) 2011-2012 Red Hat UK.
|
||||||
*
|
*
|
||||||
* This file is released under the GPL.
|
* This file is released under the GPL.
|
||||||
*/
|
*/
|
||||||
|
@ -496,12 +496,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
|
||||||
*/
|
*/
|
||||||
struct dm_thin_new_mapping;
|
struct dm_thin_new_mapping;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The pool runs in 3 modes. Ordered in degraded order for comparisons.
|
||||||
|
*/
|
||||||
|
enum pool_mode {
|
||||||
|
PM_WRITE, /* metadata may be changed */
|
||||||
|
PM_READ_ONLY, /* metadata may not be changed */
|
||||||
|
PM_FAIL, /* all I/O fails */
|
||||||
|
};
|
||||||
|
|
||||||
struct pool_features {
|
struct pool_features {
|
||||||
|
enum pool_mode mode;
|
||||||
|
|
||||||
unsigned zero_new_blocks:1;
|
unsigned zero_new_blocks:1;
|
||||||
unsigned discard_enabled:1;
|
unsigned discard_enabled:1;
|
||||||
unsigned discard_passdown:1;
|
unsigned discard_passdown:1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct thin_c;
|
||||||
|
typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio);
|
||||||
|
typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m);
|
||||||
|
|
||||||
struct pool {
|
struct pool {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
struct dm_target *ti; /* Only set if a pool target is bound */
|
struct dm_target *ti; /* Only set if a pool target is bound */
|
||||||
|
@ -542,8 +557,17 @@ struct pool {
|
||||||
struct dm_thin_new_mapping *next_mapping;
|
struct dm_thin_new_mapping *next_mapping;
|
||||||
mempool_t *mapping_pool;
|
mempool_t *mapping_pool;
|
||||||
mempool_t *endio_hook_pool;
|
mempool_t *endio_hook_pool;
|
||||||
|
|
||||||
|
process_bio_fn process_bio;
|
||||||
|
process_bio_fn process_discard;
|
||||||
|
|
||||||
|
process_mapping_fn process_prepared_mapping;
|
||||||
|
process_mapping_fn process_prepared_discard;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static enum pool_mode get_pool_mode(struct pool *pool);
|
||||||
|
static void set_pool_mode(struct pool *pool, enum pool_mode mode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Target context for a pool.
|
* Target context for a pool.
|
||||||
*/
|
*/
|
||||||
|
@ -718,16 +742,28 @@ static void issue(struct thin_c *tc, struct bio *bio)
|
||||||
struct pool *pool = tc->pool;
|
struct pool *pool = tc->pool;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
/*
|
if (!bio_triggers_commit(tc, bio)) {
|
||||||
* Batch together any FUA/FLUSH bios we find and then issue
|
|
||||||
* a single commit for them in process_deferred_bios().
|
|
||||||
*/
|
|
||||||
if (bio_triggers_commit(tc, bio)) {
|
|
||||||
spin_lock_irqsave(&pool->lock, flags);
|
|
||||||
bio_list_add(&pool->deferred_flush_bios, bio);
|
|
||||||
spin_unlock_irqrestore(&pool->lock, flags);
|
|
||||||
} else
|
|
||||||
generic_make_request(bio);
|
generic_make_request(bio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Complete bio with an error if earlier I/O caused changes to
|
||||||
|
* the metadata that can't be committed e.g, due to I/O errors
|
||||||
|
* on the metadata device.
|
||||||
|
*/
|
||||||
|
if (dm_thin_aborted_changes(tc->td)) {
|
||||||
|
bio_io_error(bio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Batch together any bios that trigger commits and then issue a
|
||||||
|
* single commit for them in process_deferred_bios().
|
||||||
|
*/
|
||||||
|
spin_lock_irqsave(&pool->lock, flags);
|
||||||
|
bio_list_add(&pool->deferred_flush_bios, bio);
|
||||||
|
spin_unlock_irqrestore(&pool->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
|
static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
|
||||||
|
@ -864,6 +900,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell
|
||||||
wake_worker(pool);
|
wake_worker(pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
|
||||||
|
{
|
||||||
|
if (m->bio)
|
||||||
|
m->bio->bi_end_io = m->saved_bi_end_io;
|
||||||
|
cell_error(m->cell);
|
||||||
|
list_del(&m->list);
|
||||||
|
mempool_free(m, m->tc->pool->mapping_pool);
|
||||||
|
}
|
||||||
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
|
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
|
||||||
{
|
{
|
||||||
struct thin_c *tc = m->tc;
|
struct thin_c *tc = m->tc;
|
||||||
|
@ -908,18 +952,20 @@ out:
|
||||||
mempool_free(m, tc->pool->mapping_pool);
|
mempool_free(m, tc->pool->mapping_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void process_prepared_discard(struct dm_thin_new_mapping *m)
|
static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
|
||||||
{
|
{
|
||||||
int r;
|
|
||||||
struct thin_c *tc = m->tc;
|
struct thin_c *tc = m->tc;
|
||||||
|
|
||||||
r = dm_thin_remove_block(tc->td, m->virt_block);
|
bio_io_error(m->bio);
|
||||||
if (r)
|
cell_defer_except(tc, m->cell);
|
||||||
DMERR("dm_thin_remove_block() failed");
|
cell_defer_except(tc, m->cell2);
|
||||||
|
mempool_free(m, tc->pool->mapping_pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
|
||||||
|
{
|
||||||
|
struct thin_c *tc = m->tc;
|
||||||
|
|
||||||
/*
|
|
||||||
* Pass the discard down to the underlying device?
|
|
||||||
*/
|
|
||||||
if (m->pass_discard)
|
if (m->pass_discard)
|
||||||
remap_and_issue(tc, m->bio, m->data_block);
|
remap_and_issue(tc, m->bio, m->data_block);
|
||||||
else
|
else
|
||||||
|
@ -930,8 +976,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m)
|
||||||
mempool_free(m, tc->pool->mapping_pool);
|
mempool_free(m, tc->pool->mapping_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void process_prepared_discard(struct dm_thin_new_mapping *m)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
struct thin_c *tc = m->tc;
|
||||||
|
|
||||||
|
r = dm_thin_remove_block(tc->td, m->virt_block);
|
||||||
|
if (r)
|
||||||
|
DMERR("dm_thin_remove_block() failed");
|
||||||
|
|
||||||
|
process_prepared_discard_passdown(m);
|
||||||
|
}
|
||||||
|
|
||||||
static void process_prepared(struct pool *pool, struct list_head *head,
|
static void process_prepared(struct pool *pool, struct list_head *head,
|
||||||
void (*fn)(struct dm_thin_new_mapping *))
|
process_mapping_fn *fn)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct list_head maps;
|
struct list_head maps;
|
||||||
|
@ -943,7 +1001,7 @@ static void process_prepared(struct pool *pool, struct list_head *head,
|
||||||
spin_unlock_irqrestore(&pool->lock, flags);
|
spin_unlock_irqrestore(&pool->lock, flags);
|
||||||
|
|
||||||
list_for_each_entry_safe(m, tmp, &maps, list)
|
list_for_each_entry_safe(m, tmp, &maps, list)
|
||||||
fn(m);
|
(*fn)(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1109,6 +1167,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int commit(struct pool *pool)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = dm_pool_commit_metadata(pool->pmd);
|
||||||
|
if (r)
|
||||||
|
DMERR("commit failed, error = %d", r);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A non-zero return indicates read_only or fail_io mode.
|
||||||
|
* Many callers don't care about the return value.
|
||||||
|
*/
|
||||||
|
static int commit_or_fallback(struct pool *pool)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (get_pool_mode(pool) != PM_WRITE)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
r = commit(pool);
|
||||||
|
if (r)
|
||||||
|
set_pool_mode(pool, PM_READ_ONLY);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
|
static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
@ -1137,12 +1224,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
|
||||||
* Try to commit to see if that will free up some
|
* Try to commit to see if that will free up some
|
||||||
* more space.
|
* more space.
|
||||||
*/
|
*/
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
(void) commit_or_fallback(pool);
|
||||||
if (r) {
|
|
||||||
DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
__func__, r);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
|
r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -1373,6 +1455,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
|
||||||
|
|
||||||
default:
|
default:
|
||||||
DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
|
DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
|
||||||
|
set_pool_mode(tc->pool, PM_READ_ONLY);
|
||||||
cell_error(cell);
|
cell_error(cell);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1430,6 +1513,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
int rw = bio_data_dir(bio);
|
||||||
|
dm_block_t block = get_bio_block(tc, bio);
|
||||||
|
struct dm_thin_lookup_result lookup_result;
|
||||||
|
|
||||||
|
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
|
||||||
|
switch (r) {
|
||||||
|
case 0:
|
||||||
|
if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
|
||||||
|
bio_io_error(bio);
|
||||||
|
else
|
||||||
|
remap_and_issue(tc, bio, lookup_result.block);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case -ENODATA:
|
||||||
|
if (rw != READ) {
|
||||||
|
bio_io_error(bio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tc->origin_dev) {
|
||||||
|
remap_to_origin_and_issue(tc, bio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
zero_fill_bio(bio);
|
||||||
|
bio_endio(bio, 0);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
DMERR("dm_thin_find_block() failed, error = %d", r);
|
||||||
|
bio_io_error(bio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void process_bio_fail(struct thin_c *tc, struct bio *bio)
|
||||||
|
{
|
||||||
|
bio_io_error(bio);
|
||||||
|
}
|
||||||
|
|
||||||
static int need_commit_due_to_time(struct pool *pool)
|
static int need_commit_due_to_time(struct pool *pool)
|
||||||
{
|
{
|
||||||
return jiffies < pool->last_commit_jiffies ||
|
return jiffies < pool->last_commit_jiffies ||
|
||||||
|
@ -1441,7 +1567,6 @@ static void process_deferred_bios(struct pool *pool)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct bio_list bios;
|
struct bio_list bios;
|
||||||
int r;
|
|
||||||
|
|
||||||
bio_list_init(&bios);
|
bio_list_init(&bios);
|
||||||
|
|
||||||
|
@ -1468,9 +1593,9 @@ static void process_deferred_bios(struct pool *pool)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bio->bi_rw & REQ_DISCARD)
|
if (bio->bi_rw & REQ_DISCARD)
|
||||||
process_discard(tc, bio);
|
pool->process_discard(tc, bio);
|
||||||
else
|
else
|
||||||
process_bio(tc, bio);
|
pool->process_bio(tc, bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1486,10 +1611,7 @@ static void process_deferred_bios(struct pool *pool)
|
||||||
if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
|
if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
if (commit_or_fallback(pool)) {
|
||||||
if (r) {
|
|
||||||
DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
__func__, r);
|
|
||||||
while ((bio = bio_list_pop(&bios)))
|
while ((bio = bio_list_pop(&bios)))
|
||||||
bio_io_error(bio);
|
bio_io_error(bio);
|
||||||
return;
|
return;
|
||||||
|
@ -1504,8 +1626,8 @@ static void do_worker(struct work_struct *ws)
|
||||||
{
|
{
|
||||||
struct pool *pool = container_of(ws, struct pool, worker);
|
struct pool *pool = container_of(ws, struct pool, worker);
|
||||||
|
|
||||||
process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
|
process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
|
||||||
process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
|
process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
|
||||||
process_deferred_bios(pool);
|
process_deferred_bios(pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1522,6 +1644,52 @@ static void do_waker(struct work_struct *ws)
|
||||||
|
|
||||||
/*----------------------------------------------------------------*/
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
|
static enum pool_mode get_pool_mode(struct pool *pool)
|
||||||
|
{
|
||||||
|
return pool->pf.mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void set_pool_mode(struct pool *pool, enum pool_mode mode)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
|
||||||
|
pool->pf.mode = mode;
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case PM_FAIL:
|
||||||
|
DMERR("switching pool to failure mode");
|
||||||
|
pool->process_bio = process_bio_fail;
|
||||||
|
pool->process_discard = process_bio_fail;
|
||||||
|
pool->process_prepared_mapping = process_prepared_mapping_fail;
|
||||||
|
pool->process_prepared_discard = process_prepared_discard_fail;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PM_READ_ONLY:
|
||||||
|
DMERR("switching pool to read-only mode");
|
||||||
|
r = dm_pool_abort_metadata(pool->pmd);
|
||||||
|
if (r) {
|
||||||
|
DMERR("aborting transaction failed");
|
||||||
|
set_pool_mode(pool, PM_FAIL);
|
||||||
|
} else {
|
||||||
|
dm_pool_metadata_read_only(pool->pmd);
|
||||||
|
pool->process_bio = process_bio_read_only;
|
||||||
|
pool->process_discard = process_discard;
|
||||||
|
pool->process_prepared_mapping = process_prepared_mapping_fail;
|
||||||
|
pool->process_prepared_discard = process_prepared_discard_passdown;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PM_WRITE:
|
||||||
|
pool->process_bio = process_bio;
|
||||||
|
pool->process_discard = process_discard;
|
||||||
|
pool->process_prepared_mapping = process_prepared_mapping;
|
||||||
|
pool->process_prepared_discard = process_prepared_discard;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*----------------------------------------------------------------*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mapping functions.
|
* Mapping functions.
|
||||||
*/
|
*/
|
||||||
|
@ -1567,6 +1735,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
|
||||||
struct dm_thin_lookup_result result;
|
struct dm_thin_lookup_result result;
|
||||||
|
|
||||||
map_context->ptr = thin_hook_bio(tc, bio);
|
map_context->ptr = thin_hook_bio(tc, bio);
|
||||||
|
|
||||||
|
if (get_pool_mode(tc->pool) == PM_FAIL) {
|
||||||
|
bio_io_error(bio);
|
||||||
|
return DM_MAPIO_SUBMITTED;
|
||||||
|
}
|
||||||
|
|
||||||
if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
|
if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
|
||||||
thin_defer_bio(tc, bio);
|
thin_defer_bio(tc, bio);
|
||||||
return DM_MAPIO_SUBMITTED;
|
return DM_MAPIO_SUBMITTED;
|
||||||
|
@ -1603,14 +1777,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case -ENODATA:
|
case -ENODATA:
|
||||||
|
if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
|
||||||
|
/*
|
||||||
|
* This block isn't provisioned, and we have no way
|
||||||
|
* of doing so. Just error it.
|
||||||
|
*/
|
||||||
|
bio_io_error(bio);
|
||||||
|
r = DM_MAPIO_SUBMITTED;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* fall through */
|
||||||
|
|
||||||
|
case -EWOULDBLOCK:
|
||||||
/*
|
/*
|
||||||
* In future, the failed dm_thin_find_block above could
|
* In future, the failed dm_thin_find_block above could
|
||||||
* provide the hint to load the metadata into cache.
|
* provide the hint to load the metadata into cache.
|
||||||
*/
|
*/
|
||||||
case -EWOULDBLOCK:
|
|
||||||
thin_defer_bio(tc, bio);
|
thin_defer_bio(tc, bio);
|
||||||
r = DM_MAPIO_SUBMITTED;
|
r = DM_MAPIO_SUBMITTED;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
/*
|
||||||
|
* Must always call bio_io_error on failure.
|
||||||
|
* dm_thin_find_block can fail with -EINVAL if the
|
||||||
|
* pool is switched to fail-io mode.
|
||||||
|
*/
|
||||||
|
bio_io_error(bio);
|
||||||
|
r = DM_MAPIO_SUBMITTED;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
@ -1647,15 +1842,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
|
||||||
{
|
{
|
||||||
struct pool_c *pt = ti->private;
|
struct pool_c *pt = ti->private;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want to make sure that degraded pools are never upgraded.
|
||||||
|
*/
|
||||||
|
enum pool_mode old_mode = pool->pf.mode;
|
||||||
|
enum pool_mode new_mode = pt->pf.mode;
|
||||||
|
|
||||||
|
if (old_mode > new_mode)
|
||||||
|
new_mode = old_mode;
|
||||||
|
|
||||||
pool->ti = ti;
|
pool->ti = ti;
|
||||||
pool->low_water_blocks = pt->low_water_blocks;
|
pool->low_water_blocks = pt->low_water_blocks;
|
||||||
pool->pf = pt->pf;
|
pool->pf = pt->pf;
|
||||||
|
set_pool_mode(pool, new_mode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If discard_passdown was enabled verify that the data device
|
* If discard_passdown was enabled verify that the data device
|
||||||
* supports discards. Disable discard_passdown if not; otherwise
|
* supports discards. Disable discard_passdown if not; otherwise
|
||||||
* -EOPNOTSUPP will be returned.
|
* -EOPNOTSUPP will be returned.
|
||||||
*/
|
*/
|
||||||
|
/* FIXME: pull this out into a sep fn. */
|
||||||
if (pt->pf.discard_passdown) {
|
if (pt->pf.discard_passdown) {
|
||||||
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
|
struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
|
||||||
if (!q || !blk_queue_discard(q)) {
|
if (!q || !blk_queue_discard(q)) {
|
||||||
|
@ -1681,6 +1887,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti)
|
||||||
/* Initialize pool features. */
|
/* Initialize pool features. */
|
||||||
static void pool_features_init(struct pool_features *pf)
|
static void pool_features_init(struct pool_features *pf)
|
||||||
{
|
{
|
||||||
|
pf->mode = PM_WRITE;
|
||||||
pf->zero_new_blocks = 1;
|
pf->zero_new_blocks = 1;
|
||||||
pf->discard_enabled = 1;
|
pf->discard_enabled = 1;
|
||||||
pf->discard_passdown = 1;
|
pf->discard_passdown = 1;
|
||||||
|
@ -1711,14 +1918,16 @@ static struct kmem_cache *_endio_hook_cache;
|
||||||
|
|
||||||
static struct pool *pool_create(struct mapped_device *pool_md,
|
static struct pool *pool_create(struct mapped_device *pool_md,
|
||||||
struct block_device *metadata_dev,
|
struct block_device *metadata_dev,
|
||||||
unsigned long block_size, char **error)
|
unsigned long block_size,
|
||||||
|
int read_only, char **error)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
void *err_p;
|
void *err_p;
|
||||||
struct pool *pool;
|
struct pool *pool;
|
||||||
struct dm_pool_metadata *pmd;
|
struct dm_pool_metadata *pmd;
|
||||||
|
bool format_device = read_only ? false : true;
|
||||||
|
|
||||||
pmd = dm_pool_metadata_open(metadata_dev, block_size, true);
|
pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
|
||||||
if (IS_ERR(pmd)) {
|
if (IS_ERR(pmd)) {
|
||||||
*error = "Error creating metadata object";
|
*error = "Error creating metadata object";
|
||||||
return (struct pool *)pmd;
|
return (struct pool *)pmd;
|
||||||
|
@ -1835,8 +2044,8 @@ static void __pool_dec(struct pool *pool)
|
||||||
|
|
||||||
static struct pool *__pool_find(struct mapped_device *pool_md,
|
static struct pool *__pool_find(struct mapped_device *pool_md,
|
||||||
struct block_device *metadata_dev,
|
struct block_device *metadata_dev,
|
||||||
unsigned long block_size, char **error,
|
unsigned long block_size, int read_only,
|
||||||
int *created)
|
char **error, int *created)
|
||||||
{
|
{
|
||||||
struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
|
struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
|
||||||
|
|
||||||
|
@ -1857,7 +2066,7 @@ static struct pool *__pool_find(struct mapped_device *pool_md,
|
||||||
__pool_inc(pool);
|
__pool_inc(pool);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
pool = pool_create(pool_md, metadata_dev, block_size, error);
|
pool = pool_create(pool_md, metadata_dev, block_size, read_only, error);
|
||||||
*created = 1;
|
*created = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1908,19 +2117,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
|
||||||
arg_name = dm_shift_arg(as);
|
arg_name = dm_shift_arg(as);
|
||||||
argc--;
|
argc--;
|
||||||
|
|
||||||
if (!strcasecmp(arg_name, "skip_block_zeroing")) {
|
if (!strcasecmp(arg_name, "skip_block_zeroing"))
|
||||||
pf->zero_new_blocks = 0;
|
pf->zero_new_blocks = 0;
|
||||||
continue;
|
|
||||||
} else if (!strcasecmp(arg_name, "ignore_discard")) {
|
|
||||||
pf->discard_enabled = 0;
|
|
||||||
continue;
|
|
||||||
} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
|
|
||||||
pf->discard_passdown = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ti->error = "Unrecognised pool feature requested";
|
else if (!strcasecmp(arg_name, "ignore_discard"))
|
||||||
r = -EINVAL;
|
pf->discard_enabled = 0;
|
||||||
|
|
||||||
|
else if (!strcasecmp(arg_name, "no_discard_passdown"))
|
||||||
|
pf->discard_passdown = 0;
|
||||||
|
|
||||||
|
else if (!strcasecmp(arg_name, "read_only"))
|
||||||
|
pf->mode = PM_READ_ONLY;
|
||||||
|
|
||||||
|
else {
|
||||||
|
ti->error = "Unrecognised pool feature requested";
|
||||||
|
r = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
@ -2013,7 +2226,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
|
pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
|
||||||
block_size, &ti->error, &pool_created);
|
block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created);
|
||||||
if (IS_ERR(pool)) {
|
if (IS_ERR(pool)) {
|
||||||
r = PTR_ERR(pool);
|
r = PTR_ERR(pool);
|
||||||
goto out_free_pt;
|
goto out_free_pt;
|
||||||
|
@ -2146,15 +2359,12 @@ static int pool_preresume(struct dm_target *ti)
|
||||||
r = dm_pool_resize_data_dev(pool->pmd, data_size);
|
r = dm_pool_resize_data_dev(pool->pmd, data_size);
|
||||||
if (r) {
|
if (r) {
|
||||||
DMERR("failed to resize data device");
|
DMERR("failed to resize data device");
|
||||||
|
/* FIXME Stricter than necessary: Rollback transaction instead here */
|
||||||
|
set_pool_mode(pool, PM_READ_ONLY);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
(void) commit_or_fallback(pool);
|
||||||
if (r) {
|
|
||||||
DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
__func__, r);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2177,19 +2387,12 @@ static void pool_resume(struct dm_target *ti)
|
||||||
|
|
||||||
static void pool_postsuspend(struct dm_target *ti)
|
static void pool_postsuspend(struct dm_target *ti)
|
||||||
{
|
{
|
||||||
int r;
|
|
||||||
struct pool_c *pt = ti->private;
|
struct pool_c *pt = ti->private;
|
||||||
struct pool *pool = pt->pool;
|
struct pool *pool = pt->pool;
|
||||||
|
|
||||||
cancel_delayed_work(&pool->waker);
|
cancel_delayed_work(&pool->waker);
|
||||||
flush_workqueue(pool->wq);
|
flush_workqueue(pool->wq);
|
||||||
|
(void) commit_or_fallback(pool);
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
|
||||||
if (r < 0) {
|
|
||||||
DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
__func__, r);
|
|
||||||
/* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int check_arg_count(unsigned argc, unsigned args_required)
|
static int check_arg_count(unsigned argc, unsigned args_required)
|
||||||
|
@ -2323,12 +2526,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
(void) commit_or_fallback(pool);
|
||||||
if (r) {
|
|
||||||
DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
__func__, r);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
r = dm_pool_reserve_metadata_snap(pool->pmd);
|
r = dm_pool_reserve_metadata_snap(pool->pmd);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -2389,16 +2587,32 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
else
|
else
|
||||||
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
|
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
|
||||||
|
|
||||||
if (!r) {
|
if (!r)
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
(void) commit_or_fallback(pool);
|
||||||
if (r)
|
|
||||||
DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
|
|
||||||
argv[0], r);
|
|
||||||
}
|
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void emit_flags(struct pool_features *pf, char *result,
|
||||||
|
unsigned sz, unsigned maxlen)
|
||||||
|
{
|
||||||
|
unsigned count = !pf->zero_new_blocks + !pf->discard_enabled +
|
||||||
|
!pf->discard_passdown + (pf->mode == PM_READ_ONLY);
|
||||||
|
DMEMIT("%u ", count);
|
||||||
|
|
||||||
|
if (!pf->zero_new_blocks)
|
||||||
|
DMEMIT("skip_block_zeroing ");
|
||||||
|
|
||||||
|
if (!pf->discard_enabled)
|
||||||
|
DMEMIT("ignore_discard ");
|
||||||
|
|
||||||
|
if (!pf->discard_passdown)
|
||||||
|
DMEMIT("no_discard_passdown ");
|
||||||
|
|
||||||
|
if (pf->mode == PM_READ_ONLY)
|
||||||
|
DMEMIT("read_only ");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Status line is:
|
* Status line is:
|
||||||
* <transaction id> <used metadata sectors>/<total metadata sectors>
|
* <transaction id> <used metadata sectors>/<total metadata sectors>
|
||||||
|
@ -2407,7 +2621,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
static int pool_status(struct dm_target *ti, status_type_t type,
|
static int pool_status(struct dm_target *ti, status_type_t type,
|
||||||
char *result, unsigned maxlen)
|
char *result, unsigned maxlen)
|
||||||
{
|
{
|
||||||
int r, count;
|
int r;
|
||||||
unsigned sz = 0;
|
unsigned sz = 0;
|
||||||
uint64_t transaction_id;
|
uint64_t transaction_id;
|
||||||
dm_block_t nr_free_blocks_data;
|
dm_block_t nr_free_blocks_data;
|
||||||
|
@ -2422,6 +2636,11 @@ static int pool_status(struct dm_target *ti, status_type_t type,
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case STATUSTYPE_INFO:
|
case STATUSTYPE_INFO:
|
||||||
|
if (get_pool_mode(pool) == PM_FAIL) {
|
||||||
|
DMEMIT("Fail");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
r = dm_pool_get_metadata_transaction_id(pool->pmd,
|
r = dm_pool_get_metadata_transaction_id(pool->pmd,
|
||||||
&transaction_id);
|
&transaction_id);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -2457,9 +2676,19 @@ static int pool_status(struct dm_target *ti, status_type_t type,
|
||||||
(unsigned long long)nr_blocks_data);
|
(unsigned long long)nr_blocks_data);
|
||||||
|
|
||||||
if (held_root)
|
if (held_root)
|
||||||
DMEMIT("%llu", held_root);
|
DMEMIT("%llu ", held_root);
|
||||||
else
|
else
|
||||||
DMEMIT("-");
|
DMEMIT("- ");
|
||||||
|
|
||||||
|
if (pool->pf.mode == PM_READ_ONLY)
|
||||||
|
DMEMIT("ro ");
|
||||||
|
else
|
||||||
|
DMEMIT("rw ");
|
||||||
|
|
||||||
|
if (pool->pf.discard_enabled && pool->pf.discard_passdown)
|
||||||
|
DMEMIT("discard_passdown");
|
||||||
|
else
|
||||||
|
DMEMIT("no_discard_passdown");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2469,20 +2698,7 @@ static int pool_status(struct dm_target *ti, status_type_t type,
|
||||||
format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
|
format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
|
||||||
(unsigned long)pool->sectors_per_block,
|
(unsigned long)pool->sectors_per_block,
|
||||||
(unsigned long long)pt->low_water_blocks);
|
(unsigned long long)pt->low_water_blocks);
|
||||||
|
emit_flags(&pt->pf, result, sz, maxlen);
|
||||||
count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
|
|
||||||
!pt->pf.discard_passdown;
|
|
||||||
DMEMIT("%u ", count);
|
|
||||||
|
|
||||||
if (!pool->pf.zero_new_blocks)
|
|
||||||
DMEMIT("skip_block_zeroing ");
|
|
||||||
|
|
||||||
if (!pool->pf.discard_enabled)
|
|
||||||
DMEMIT("ignore_discard ");
|
|
||||||
|
|
||||||
if (!pt->pf.discard_passdown)
|
|
||||||
DMEMIT("no_discard_passdown ");
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2542,7 +2758,7 @@ static struct target_type pool_target = {
|
||||||
.name = "thin-pool",
|
.name = "thin-pool",
|
||||||
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
|
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
|
||||||
DM_TARGET_IMMUTABLE,
|
DM_TARGET_IMMUTABLE,
|
||||||
.version = {1, 2, 0},
|
.version = {1, 3, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = pool_ctr,
|
.ctr = pool_ctr,
|
||||||
.dtr = pool_dtr,
|
.dtr = pool_dtr,
|
||||||
|
@ -2647,6 +2863,11 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
}
|
}
|
||||||
__pool_inc(tc->pool);
|
__pool_inc(tc->pool);
|
||||||
|
|
||||||
|
if (get_pool_mode(tc->pool) == PM_FAIL) {
|
||||||
|
ti->error = "Couldn't open thin device, Pool is in fail mode";
|
||||||
|
goto bad_thin_open;
|
||||||
|
}
|
||||||
|
|
||||||
r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
|
r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
|
||||||
if (r) {
|
if (r) {
|
||||||
ti->error = "Couldn't open thin internal device";
|
ti->error = "Couldn't open thin internal device";
|
||||||
|
@ -2755,6 +2976,11 @@ static int thin_status(struct dm_target *ti, status_type_t type,
|
||||||
char buf[BDEVNAME_SIZE];
|
char buf[BDEVNAME_SIZE];
|
||||||
struct thin_c *tc = ti->private;
|
struct thin_c *tc = ti->private;
|
||||||
|
|
||||||
|
if (get_pool_mode(tc->pool) == PM_FAIL) {
|
||||||
|
DMEMIT("Fail");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!tc->td)
|
if (!tc->td)
|
||||||
DMEMIT("-");
|
DMEMIT("-");
|
||||||
else {
|
else {
|
||||||
|
@ -2823,7 +3049,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||||
|
|
||||||
static struct target_type thin_target = {
|
static struct target_type thin_target = {
|
||||||
.name = "thin",
|
.name = "thin",
|
||||||
.version = {1, 2, 0},
|
.version = {1, 3, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = thin_ctr,
|
.ctr = thin_ctr,
|
||||||
.dtr = thin_dtr,
|
.dtr = thin_dtr,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue