ia64/linux-2.6.18-xen.hg

view drivers/md/dm-crypt.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 3e8752eb6d9c
children
line source
1 /*
2 * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
3 * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
4 *
5 * This file is released under the GPL.
6 */
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/bio.h>
12 #include <linux/blkdev.h>
13 #include <linux/mempool.h>
14 #include <linux/slab.h>
15 #include <linux/crypto.h>
16 #include <linux/workqueue.h>
17 #include <asm/atomic.h>
18 #include <linux/scatterlist.h>
19 #include <asm/page.h>
21 #include "dm.h"
23 #define DM_MSG_PREFIX "crypt"
25 /*
26 * per bio private data
27 */
28 struct crypt_io {
29 struct dm_target *target;
30 struct bio *bio;
31 struct bio *first_clone;
32 struct work_struct work;
33 atomic_t pending;
34 int error;
35 };
37 /*
38 * context holding the current state of a multi-part conversion
39 */
40 struct convert_context {
41 struct bio *bio_in;
42 struct bio *bio_out;
43 unsigned int offset_in;
44 unsigned int offset_out;
45 unsigned int idx_in;
46 unsigned int idx_out;
47 sector_t sector;
48 int write;
49 };
51 struct crypt_config;
53 struct crypt_iv_operations {
54 int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
55 const char *opts);
56 void (*dtr)(struct crypt_config *cc);
57 const char *(*status)(struct crypt_config *cc);
58 int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
59 };
61 /*
62 * Crypt: maps a linear range of a block device
63 * and encrypts / decrypts at the same time.
64 */
65 struct crypt_config {
66 struct dm_dev *dev;
67 sector_t start;
69 /*
70 * pool for per bio private data and
71 * for encryption buffer pages
72 */
73 mempool_t *io_pool;
74 mempool_t *page_pool;
76 /*
77 * crypto related data
78 */
79 struct crypt_iv_operations *iv_gen_ops;
80 char *iv_mode;
81 void *iv_gen_private;
82 sector_t iv_offset;
83 unsigned int iv_size;
85 struct crypto_tfm *tfm;
86 unsigned int key_size;
87 u8 key[0];
88 };
90 #define MIN_IOS 256
91 #define MIN_POOL_PAGES 32
92 #define MIN_BIO_PAGES 8
94 static kmem_cache_t *_crypt_io_pool;
96 /*
97 * Different IV generation algorithms:
98 *
99 * plain: the initial vector is the 32-bit low-endian version of the sector
100 * number, padded with zeros if neccessary.
101 *
102 * ess_iv: "encrypted sector|salt initial vector", the sector number is
103 * encrypted with the bulk cipher using a salt as key. The salt
104 * should be derived from the bulk cipher's key via hashing.
105 *
106 * plumb: unimplemented, see:
107 * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
108 */
110 static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
111 {
112 memset(iv, 0, cc->iv_size);
113 *(u32 *)iv = cpu_to_le32(sector & 0xffffffff);
115 return 0;
116 }
118 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
119 const char *opts)
120 {
121 struct crypto_tfm *essiv_tfm;
122 struct crypto_tfm *hash_tfm;
123 struct scatterlist sg;
124 unsigned int saltsize;
125 u8 *salt;
127 if (opts == NULL) {
128 ti->error = "Digest algorithm missing for ESSIV mode";
129 return -EINVAL;
130 }
132 /* Hash the cipher key with the given hash algorithm */
133 hash_tfm = crypto_alloc_tfm(opts, CRYPTO_TFM_REQ_MAY_SLEEP);
134 if (hash_tfm == NULL) {
135 ti->error = "Error initializing ESSIV hash";
136 return -EINVAL;
137 }
139 if (crypto_tfm_alg_type(hash_tfm) != CRYPTO_ALG_TYPE_DIGEST) {
140 ti->error = "Expected digest algorithm for ESSIV hash";
141 crypto_free_tfm(hash_tfm);
142 return -EINVAL;
143 }
145 saltsize = crypto_tfm_alg_digestsize(hash_tfm);
146 salt = kmalloc(saltsize, GFP_KERNEL);
147 if (salt == NULL) {
148 ti->error = "Error kmallocing salt storage in ESSIV";
149 crypto_free_tfm(hash_tfm);
150 return -ENOMEM;
151 }
153 sg_set_buf(&sg, cc->key, cc->key_size);
154 crypto_digest_digest(hash_tfm, &sg, 1, salt);
155 crypto_free_tfm(hash_tfm);
157 /* Setup the essiv_tfm with the given salt */
158 essiv_tfm = crypto_alloc_tfm(crypto_tfm_alg_name(cc->tfm),
159 CRYPTO_TFM_MODE_ECB |
160 CRYPTO_TFM_REQ_MAY_SLEEP);
161 if (essiv_tfm == NULL) {
162 ti->error = "Error allocating crypto tfm for ESSIV";
163 kfree(salt);
164 return -EINVAL;
165 }
166 if (crypto_tfm_alg_blocksize(essiv_tfm)
167 != crypto_tfm_alg_ivsize(cc->tfm)) {
168 ti->error = "Block size of ESSIV cipher does "
169 "not match IV size of block cipher";
170 crypto_free_tfm(essiv_tfm);
171 kfree(salt);
172 return -EINVAL;
173 }
174 if (crypto_cipher_setkey(essiv_tfm, salt, saltsize) < 0) {
175 ti->error = "Failed to set key for ESSIV cipher";
176 crypto_free_tfm(essiv_tfm);
177 kfree(salt);
178 return -EINVAL;
179 }
180 kfree(salt);
182 cc->iv_gen_private = (void *)essiv_tfm;
183 return 0;
184 }
186 static void crypt_iv_essiv_dtr(struct crypt_config *cc)
187 {
188 crypto_free_tfm((struct crypto_tfm *)cc->iv_gen_private);
189 cc->iv_gen_private = NULL;
190 }
192 static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
193 {
194 struct scatterlist sg;
196 memset(iv, 0, cc->iv_size);
197 *(u64 *)iv = cpu_to_le64(sector);
199 sg_set_buf(&sg, iv, cc->iv_size);
200 crypto_cipher_encrypt((struct crypto_tfm *)cc->iv_gen_private,
201 &sg, &sg, cc->iv_size);
203 return 0;
204 }
206 static struct crypt_iv_operations crypt_iv_plain_ops = {
207 .generator = crypt_iv_plain_gen
208 };
210 static struct crypt_iv_operations crypt_iv_essiv_ops = {
211 .ctr = crypt_iv_essiv_ctr,
212 .dtr = crypt_iv_essiv_dtr,
213 .generator = crypt_iv_essiv_gen
214 };
217 static int
218 crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
219 struct scatterlist *in, unsigned int length,
220 int write, sector_t sector)
221 {
222 u8 iv[cc->iv_size];
223 int r;
225 if (cc->iv_gen_ops) {
226 r = cc->iv_gen_ops->generator(cc, iv, sector);
227 if (r < 0)
228 return r;
230 if (write)
231 r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv);
232 else
233 r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv);
234 } else {
235 if (write)
236 r = crypto_cipher_encrypt(cc->tfm, out, in, length);
237 else
238 r = crypto_cipher_decrypt(cc->tfm, out, in, length);
239 }
241 return r;
242 }
244 static void
245 crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
246 struct bio *bio_out, struct bio *bio_in,
247 sector_t sector, int write)
248 {
249 ctx->bio_in = bio_in;
250 ctx->bio_out = bio_out;
251 ctx->offset_in = 0;
252 ctx->offset_out = 0;
253 ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
254 ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
255 ctx->sector = sector + cc->iv_offset;
256 ctx->write = write;
257 }
259 /*
260 * Encrypt / decrypt data from one bio to another one (can be the same one)
261 */
262 static int crypt_convert(struct crypt_config *cc,
263 struct convert_context *ctx)
264 {
265 int r = 0;
267 while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
268 ctx->idx_out < ctx->bio_out->bi_vcnt) {
269 struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
270 struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
271 struct scatterlist sg_in = {
272 .page = bv_in->bv_page,
273 .offset = bv_in->bv_offset + ctx->offset_in,
274 .length = 1 << SECTOR_SHIFT
275 };
276 struct scatterlist sg_out = {
277 .page = bv_out->bv_page,
278 .offset = bv_out->bv_offset + ctx->offset_out,
279 .length = 1 << SECTOR_SHIFT
280 };
282 ctx->offset_in += sg_in.length;
283 if (ctx->offset_in >= bv_in->bv_len) {
284 ctx->offset_in = 0;
285 ctx->idx_in++;
286 }
288 ctx->offset_out += sg_out.length;
289 if (ctx->offset_out >= bv_out->bv_len) {
290 ctx->offset_out = 0;
291 ctx->idx_out++;
292 }
294 r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
295 ctx->write, ctx->sector);
296 if (r < 0)
297 break;
299 ctx->sector++;
300 }
302 return r;
303 }
305 /*
306 * Generate a new unfragmented bio with the given size
307 * This should never violate the device limitations
308 * May return a smaller bio when running out of pages
309 */
310 static struct bio *
311 crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
312 struct bio *base_bio, unsigned int *bio_vec_idx)
313 {
314 struct bio *bio;
315 unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
316 gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
317 unsigned int i;
319 /*
320 * Use __GFP_NOMEMALLOC to tell the VM to act less aggressively and
321 * to fail earlier. This is not necessary but increases throughput.
322 * FIXME: Is this really intelligent?
323 */
324 if (base_bio)
325 bio = bio_clone(base_bio, GFP_NOIO|__GFP_NOMEMALLOC);
326 else
327 bio = bio_alloc(GFP_NOIO|__GFP_NOMEMALLOC, nr_iovecs);
328 if (!bio)
329 return NULL;
331 /* if the last bio was not complete, continue where that one ended */
332 bio->bi_idx = *bio_vec_idx;
333 bio->bi_vcnt = *bio_vec_idx;
334 bio->bi_size = 0;
335 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
337 /* bio->bi_idx pages have already been allocated */
338 size -= bio->bi_idx * PAGE_SIZE;
340 for(i = bio->bi_idx; i < nr_iovecs; i++) {
341 struct bio_vec *bv = bio_iovec_idx(bio, i);
343 bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask);
344 if (!bv->bv_page)
345 break;
347 /*
348 * if additional pages cannot be allocated without waiting,
349 * return a partially allocated bio, the caller will then try
350 * to allocate additional bios while submitting this partial bio
351 */
352 if ((i - bio->bi_idx) == (MIN_BIO_PAGES - 1))
353 gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
355 bv->bv_offset = 0;
356 if (size > PAGE_SIZE)
357 bv->bv_len = PAGE_SIZE;
358 else
359 bv->bv_len = size;
361 bio->bi_size += bv->bv_len;
362 bio->bi_vcnt++;
363 size -= bv->bv_len;
364 }
366 if (!bio->bi_size) {
367 bio_put(bio);
368 return NULL;
369 }
371 /*
372 * Remember the last bio_vec allocated to be able
373 * to correctly continue after the splitting.
374 */
375 *bio_vec_idx = bio->bi_vcnt;
377 return bio;
378 }
380 static void crypt_free_buffer_pages(struct crypt_config *cc,
381 struct bio *bio, unsigned int bytes)
382 {
383 unsigned int i, start, end;
384 struct bio_vec *bv;
386 /*
387 * This is ugly, but Jens Axboe thinks that using bi_idx in the
388 * endio function is too dangerous at the moment, so I calculate the
389 * correct position using bi_vcnt and bi_size.
390 * The bv_offset and bv_len fields might already be modified but we
391 * know that we always allocated whole pages.
392 * A fix to the bi_idx issue in the kernel is in the works, so
393 * we will hopefully be able to revert to the cleaner solution soon.
394 */
395 i = bio->bi_vcnt - 1;
396 bv = bio_iovec_idx(bio, i);
397 end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - bio->bi_size;
398 start = end - bytes;
400 start >>= PAGE_SHIFT;
401 if (!bio->bi_size)
402 end = bio->bi_vcnt;
403 else
404 end >>= PAGE_SHIFT;
406 for(i = start; i < end; i++) {
407 bv = bio_iovec_idx(bio, i);
408 BUG_ON(!bv->bv_page);
409 mempool_free(bv->bv_page, cc->page_pool);
410 bv->bv_page = NULL;
411 }
412 }
414 /*
415 * One of the bios was finished. Check for completion of
416 * the whole request and correctly clean up the buffer.
417 */
418 static void dec_pending(struct crypt_io *io, int error)
419 {
420 struct crypt_config *cc = (struct crypt_config *) io->target->private;
422 if (error < 0)
423 io->error = error;
425 if (!atomic_dec_and_test(&io->pending))
426 return;
428 if (io->first_clone)
429 bio_put(io->first_clone);
431 bio_endio(io->bio, io->bio->bi_size, io->error);
433 mempool_free(io, cc->io_pool);
434 }
436 /*
437 * kcryptd:
438 *
439 * Needed because it would be very unwise to do decryption in an
440 * interrupt context, so bios returning from read requests get
441 * queued here.
442 */
443 static struct workqueue_struct *_kcryptd_workqueue;
445 static void kcryptd_do_work(void *data)
446 {
447 struct crypt_io *io = (struct crypt_io *) data;
448 struct crypt_config *cc = (struct crypt_config *) io->target->private;
449 struct convert_context ctx;
450 int r;
452 crypt_convert_init(cc, &ctx, io->bio, io->bio,
453 io->bio->bi_sector - io->target->begin, 0);
454 r = crypt_convert(cc, &ctx);
456 dec_pending(io, r);
457 }
459 static void kcryptd_queue_io(struct crypt_io *io)
460 {
461 INIT_WORK(&io->work, kcryptd_do_work, io);
462 queue_work(_kcryptd_workqueue, &io->work);
463 }
465 /*
466 * Decode key from its hex representation
467 */
468 static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
469 {
470 char buffer[3];
471 char *endp;
472 unsigned int i;
474 buffer[2] = '\0';
476 for(i = 0; i < size; i++) {
477 buffer[0] = *hex++;
478 buffer[1] = *hex++;
480 key[i] = (u8)simple_strtoul(buffer, &endp, 16);
482 if (endp != &buffer[2])
483 return -EINVAL;
484 }
486 if (*hex != '\0')
487 return -EINVAL;
489 return 0;
490 }
492 /*
493 * Encode key into its hex representation
494 */
495 static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
496 {
497 unsigned int i;
499 for(i = 0; i < size; i++) {
500 sprintf(hex, "%02x", *key);
501 hex += 2;
502 key++;
503 }
504 }
506 /*
507 * Construct an encryption mapping:
508 * <cipher> <key> <iv_offset> <dev_path> <start>
509 */
510 static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
511 {
512 struct crypt_config *cc;
513 struct crypto_tfm *tfm;
514 char *tmp;
515 char *cipher;
516 char *chainmode;
517 char *ivmode;
518 char *ivopts;
519 unsigned int crypto_flags;
520 unsigned int key_size;
521 unsigned long long tmpll;
523 if (argc != 5) {
524 ti->error = "Not enough arguments";
525 return -EINVAL;
526 }
528 tmp = argv[0];
529 cipher = strsep(&tmp, "-");
530 chainmode = strsep(&tmp, "-");
531 ivopts = strsep(&tmp, "-");
532 ivmode = strsep(&ivopts, ":");
534 if (tmp)
535 DMWARN("Unexpected additional cipher options");
537 key_size = strlen(argv[1]) >> 1;
539 cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
540 if (cc == NULL) {
541 ti->error =
542 "Cannot allocate transparent encryption context";
543 return -ENOMEM;
544 }
546 cc->key_size = key_size;
547 if ((!key_size && strcmp(argv[1], "-") != 0) ||
548 (key_size && crypt_decode_key(cc->key, argv[1], key_size) < 0)) {
549 ti->error = "Error decoding key";
550 goto bad1;
551 }
553 /* Compatiblity mode for old dm-crypt cipher strings */
554 if (!chainmode || (strcmp(chainmode, "plain") == 0 && !ivmode)) {
555 chainmode = "cbc";
556 ivmode = "plain";
557 }
559 /* Choose crypto_flags according to chainmode */
560 if (strcmp(chainmode, "cbc") == 0)
561 crypto_flags = CRYPTO_TFM_MODE_CBC;
562 else if (strcmp(chainmode, "ecb") == 0)
563 crypto_flags = CRYPTO_TFM_MODE_ECB;
564 else {
565 ti->error = "Unknown chaining mode";
566 goto bad1;
567 }
569 if (crypto_flags != CRYPTO_TFM_MODE_ECB && !ivmode) {
570 ti->error = "This chaining mode requires an IV mechanism";
571 goto bad1;
572 }
574 tfm = crypto_alloc_tfm(cipher, crypto_flags | CRYPTO_TFM_REQ_MAY_SLEEP);
575 if (!tfm) {
576 ti->error = "Error allocating crypto tfm";
577 goto bad1;
578 }
579 if (crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER) {
580 ti->error = "Expected cipher algorithm";
581 goto bad2;
582 }
584 cc->tfm = tfm;
586 /*
587 * Choose ivmode. Valid modes: "plain", "essiv:<esshash>".
588 * See comments at iv code
589 */
591 if (ivmode == NULL)
592 cc->iv_gen_ops = NULL;
593 else if (strcmp(ivmode, "plain") == 0)
594 cc->iv_gen_ops = &crypt_iv_plain_ops;
595 else if (strcmp(ivmode, "essiv") == 0)
596 cc->iv_gen_ops = &crypt_iv_essiv_ops;
597 else {
598 ti->error = "Invalid IV mode";
599 goto bad2;
600 }
602 if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
603 cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
604 goto bad2;
606 if (tfm->crt_cipher.cit_decrypt_iv && tfm->crt_cipher.cit_encrypt_iv)
607 /* at least a 64 bit sector number should fit in our buffer */
608 cc->iv_size = max(crypto_tfm_alg_ivsize(tfm),
609 (unsigned int)(sizeof(u64) / sizeof(u8)));
610 else {
611 cc->iv_size = 0;
612 if (cc->iv_gen_ops) {
613 DMWARN("Selected cipher does not support IVs");
614 if (cc->iv_gen_ops->dtr)
615 cc->iv_gen_ops->dtr(cc);
616 cc->iv_gen_ops = NULL;
617 }
618 }
620 cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
621 if (!cc->io_pool) {
622 ti->error = "Cannot allocate crypt io mempool";
623 goto bad3;
624 }
626 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
627 if (!cc->page_pool) {
628 ti->error = "Cannot allocate page mempool";
629 goto bad4;
630 }
632 if (tfm->crt_cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
633 ti->error = "Error setting key";
634 goto bad5;
635 }
637 if (sscanf(argv[2], "%llu", &tmpll) != 1) {
638 ti->error = "Invalid iv_offset sector";
639 goto bad5;
640 }
641 cc->iv_offset = tmpll;
643 if (sscanf(argv[4], "%llu", &tmpll) != 1) {
644 ti->error = "Invalid device sector";
645 goto bad5;
646 }
647 cc->start = tmpll;
649 if (dm_get_device(ti, argv[3], cc->start, ti->len,
650 dm_table_get_mode(ti->table), &cc->dev)) {
651 ti->error = "Device lookup failed";
652 goto bad5;
653 }
655 if (ivmode && cc->iv_gen_ops) {
656 if (ivopts)
657 *(ivopts - 1) = ':';
658 cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
659 if (!cc->iv_mode) {
660 ti->error = "Error kmallocing iv_mode string";
661 goto bad5;
662 }
663 strcpy(cc->iv_mode, ivmode);
664 } else
665 cc->iv_mode = NULL;
667 ti->private = cc;
668 return 0;
670 bad5:
671 mempool_destroy(cc->page_pool);
672 bad4:
673 mempool_destroy(cc->io_pool);
674 bad3:
675 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
676 cc->iv_gen_ops->dtr(cc);
677 bad2:
678 crypto_free_tfm(tfm);
679 bad1:
680 /* Must zero key material before freeing */
681 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
682 kfree(cc);
683 return -EINVAL;
684 }
686 static void crypt_dtr(struct dm_target *ti)
687 {
688 struct crypt_config *cc = (struct crypt_config *) ti->private;
690 mempool_destroy(cc->page_pool);
691 mempool_destroy(cc->io_pool);
693 kfree(cc->iv_mode);
694 if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
695 cc->iv_gen_ops->dtr(cc);
696 crypto_free_tfm(cc->tfm);
697 dm_put_device(ti, cc->dev);
699 /* Must zero key material before freeing */
700 memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
701 kfree(cc);
702 }
704 static int crypt_endio(struct bio *bio, unsigned int done, int error)
705 {
706 struct crypt_io *io = (struct crypt_io *) bio->bi_private;
707 struct crypt_config *cc = (struct crypt_config *) io->target->private;
709 if (bio_data_dir(bio) == WRITE) {
710 /*
711 * free the processed pages, even if
712 * it's only a partially completed write
713 */
714 crypt_free_buffer_pages(cc, bio, done);
715 }
717 if (bio->bi_size)
718 return 1;
720 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
721 error = -EIO;
723 bio_put(bio);
725 /*
726 * successful reads are decrypted by the worker thread
727 */
728 if (bio_data_dir(io->bio) == READ && !error) {
729 kcryptd_queue_io(io);
730 return 0;
731 }
733 dec_pending(io, error);
734 return error;
735 }
737 static inline struct bio *
738 crypt_clone(struct crypt_config *cc, struct crypt_io *io, struct bio *bio,
739 sector_t sector, unsigned int *bvec_idx,
740 struct convert_context *ctx)
741 {
742 struct bio *clone;
744 if (bio_data_dir(bio) == WRITE) {
745 clone = crypt_alloc_buffer(cc, bio->bi_size,
746 io->first_clone, bvec_idx);
747 if (clone) {
748 ctx->bio_out = clone;
749 if (crypt_convert(cc, ctx) < 0) {
750 crypt_free_buffer_pages(cc, clone,
751 clone->bi_size);
752 bio_put(clone);
753 return NULL;
754 }
755 }
756 } else {
757 /*
758 * The block layer might modify the bvec array, so always
759 * copy the required bvecs because we need the original
760 * one in order to decrypt the whole bio data *afterwards*.
761 */
762 clone = bio_alloc(GFP_NOIO, bio_segments(bio));
763 if (clone) {
764 clone->bi_idx = 0;
765 clone->bi_vcnt = bio_segments(bio);
766 clone->bi_size = bio->bi_size;
767 memcpy(clone->bi_io_vec, bio_iovec(bio),
768 sizeof(struct bio_vec) * clone->bi_vcnt);
769 }
770 }
772 if (!clone)
773 return NULL;
775 clone->bi_private = io;
776 clone->bi_end_io = crypt_endio;
777 clone->bi_bdev = cc->dev->bdev;
778 clone->bi_sector = cc->start + sector;
779 clone->bi_rw = bio->bi_rw;
781 return clone;
782 }
784 static int crypt_map(struct dm_target *ti, struct bio *bio,
785 union map_info *map_context)
786 {
787 struct crypt_config *cc = (struct crypt_config *) ti->private;
788 struct crypt_io *io = mempool_alloc(cc->io_pool, GFP_NOIO);
789 struct convert_context ctx;
790 struct bio *clone;
791 unsigned int remaining = bio->bi_size;
792 sector_t sector = bio->bi_sector - ti->begin;
793 unsigned int bvec_idx = 0;
795 io->target = ti;
796 io->bio = bio;
797 io->first_clone = NULL;
798 io->error = 0;
799 atomic_set(&io->pending, 1); /* hold a reference */
801 if (bio_data_dir(bio) == WRITE)
802 crypt_convert_init(cc, &ctx, NULL, bio, sector, 1);
804 /*
805 * The allocated buffers can be smaller than the whole bio,
806 * so repeat the whole process until all the data can be handled.
807 */
808 while (remaining) {
809 clone = crypt_clone(cc, io, bio, sector, &bvec_idx, &ctx);
810 if (!clone)
811 goto cleanup;
813 if (!io->first_clone) {
814 /*
815 * hold a reference to the first clone, because it
816 * holds the bio_vec array and that can't be freed
817 * before all other clones are released
818 */
819 bio_get(clone);
820 io->first_clone = clone;
821 }
822 atomic_inc(&io->pending);
824 remaining -= clone->bi_size;
825 sector += bio_sectors(clone);
827 generic_make_request(clone);
829 /* out of memory -> run queues */
830 if (remaining)
831 blk_congestion_wait(bio_data_dir(clone), HZ/100);
832 }
834 /* drop reference, clones could have returned before we reach this */
835 dec_pending(io, 0);
836 return 0;
838 cleanup:
839 if (io->first_clone) {
840 dec_pending(io, -ENOMEM);
841 return 0;
842 }
844 /* if no bio has been dispatched yet, we can directly return the error */
845 mempool_free(io, cc->io_pool);
846 return -ENOMEM;
847 }
849 static int crypt_status(struct dm_target *ti, status_type_t type,
850 char *result, unsigned int maxlen)
851 {
852 struct crypt_config *cc = (struct crypt_config *) ti->private;
853 const char *cipher;
854 const char *chainmode = NULL;
855 unsigned int sz = 0;
857 switch (type) {
858 case STATUSTYPE_INFO:
859 result[0] = '\0';
860 break;
862 case STATUSTYPE_TABLE:
863 cipher = crypto_tfm_alg_name(cc->tfm);
865 switch(cc->tfm->crt_cipher.cit_mode) {
866 case CRYPTO_TFM_MODE_CBC:
867 chainmode = "cbc";
868 break;
869 case CRYPTO_TFM_MODE_ECB:
870 chainmode = "ecb";
871 break;
872 default:
873 BUG();
874 }
876 if (cc->iv_mode)
877 DMEMIT("%s-%s-%s ", cipher, chainmode, cc->iv_mode);
878 else
879 DMEMIT("%s-%s ", cipher, chainmode);
881 if (cc->key_size > 0) {
882 if ((maxlen - sz) < ((cc->key_size << 1) + 1))
883 return -ENOMEM;
885 crypt_encode_key(result + sz, cc->key, cc->key_size);
886 sz += cc->key_size << 1;
887 } else {
888 if (sz >= maxlen)
889 return -ENOMEM;
890 result[sz++] = '-';
891 }
893 DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
894 cc->dev->name, (unsigned long long)cc->start);
895 break;
896 }
897 return 0;
898 }
900 static struct target_type crypt_target = {
901 .name = "crypt",
902 .version= {1, 1, 0},
903 .module = THIS_MODULE,
904 .ctr = crypt_ctr,
905 .dtr = crypt_dtr,
906 .map = crypt_map,
907 .status = crypt_status,
908 };
910 static int __init dm_crypt_init(void)
911 {
912 int r;
914 _crypt_io_pool = kmem_cache_create("dm-crypt_io",
915 sizeof(struct crypt_io),
916 0, 0, NULL, NULL);
917 if (!_crypt_io_pool)
918 return -ENOMEM;
920 _kcryptd_workqueue = create_workqueue("kcryptd");
921 if (!_kcryptd_workqueue) {
922 r = -ENOMEM;
923 DMERR("couldn't create kcryptd");
924 goto bad1;
925 }
927 r = dm_register_target(&crypt_target);
928 if (r < 0) {
929 DMERR("register failed %d", r);
930 goto bad2;
931 }
933 return 0;
935 bad2:
936 destroy_workqueue(_kcryptd_workqueue);
937 bad1:
938 kmem_cache_destroy(_crypt_io_pool);
939 return r;
940 }
942 static void __exit dm_crypt_exit(void)
943 {
944 int r = dm_unregister_target(&crypt_target);
946 if (r < 0)
947 DMERR("unregister failed %d", r);
949 destroy_workqueue(_kcryptd_workqueue);
950 kmem_cache_destroy(_crypt_io_pool);
951 }
953 module_init(dm_crypt_init);
954 module_exit(dm_crypt_exit);
956 MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
957 MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption");
958 MODULE_LICENSE("GPL");