ia64/linux-2.6.18-xen.hg

annotate block/elevator.c @ 912:dd42cdb0ab89

[IA64] Build blktap2 driver by default in x86 builds.

add CONFIG_XEN_BLKDEV_TAP2=y to buildconfigs/linux-defconfig_xen_ia64.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Mon Jun 29 12:09:16 2009 +0900 (2009-06-29)
parents 7128fe32720e
children
rev   line source
ian@0 1 /*
ian@0 2 * Block device elevator/IO-scheduler.
ian@0 3 *
ian@0 4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
ian@0 5 *
ian@0 6 * 30042000 Jens Axboe <axboe@suse.de> :
ian@0 7 *
ian@0 8 * Split the elevator a bit so that it is possible to choose a different
ian@0 9 * one or even write a new "plug in". There are three pieces:
ian@0 10 * - elevator_fn, inserts a new request in the queue list
ian@0 11 * - elevator_merge_fn, decides whether a new buffer can be merged with
ian@0 12 * an existing request
ian@0 13 * - elevator_dequeue_fn, called when a request is taken off the active list
ian@0 14 *
ian@0 15 * 20082000 Dave Jones <davej@suse.de> :
ian@0 16 * Removed tests for max-bomb-segments, which was breaking elvtune
ian@0 17 * when run without -bN
ian@0 18 *
ian@0 19 * Jens:
ian@0 20 * - Rework again to work with bio instead of buffer_heads
ian@0 21 * - loose bi_dev comparisons, partition handling is right now
ian@0 22 * - completely modularize elevator setup and teardown
ian@0 23 *
ian@0 24 */
ian@0 25 #include <linux/kernel.h>
ian@0 26 #include <linux/fs.h>
ian@0 27 #include <linux/blkdev.h>
ian@0 28 #include <linux/elevator.h>
ian@0 29 #include <linux/bio.h>
ian@0 30 #include <linux/module.h>
ian@0 31 #include <linux/slab.h>
ian@0 32 #include <linux/init.h>
ian@0 33 #include <linux/compiler.h>
ian@0 34 #include <linux/delay.h>
ian@0 35 #include <linux/blktrace_api.h>
ian@0 36
ian@0 37 #include <asm/uaccess.h>
ian@0 38
ian@0 39 static DEFINE_SPINLOCK(elv_list_lock);
ian@0 40 static LIST_HEAD(elv_list);
ian@0 41
ian@0 42 /*
ian@0 43 * can we safely merge with this request?
ian@0 44 */
ian@0 45 inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
ian@0 46 {
ian@0 47 if (!rq_mergeable(rq))
ian@0 48 return 0;
ian@0 49
ian@0 50 /*
ian@0 51 * different data direction or already started, don't merge
ian@0 52 */
ian@0 53 if (bio_data_dir(bio) != rq_data_dir(rq))
ian@0 54 return 0;
ian@0 55
ian@0 56 /*
ian@0 57 * same device and no special stuff set, merge is ok
ian@0 58 */
ian@0 59 if (rq->rq_disk == bio->bi_bdev->bd_disk &&
ian@0 60 !rq->waiting && !rq->special)
ian@0 61 return 1;
ian@0 62
ian@0 63 return 0;
ian@0 64 }
ian@0 65 EXPORT_SYMBOL(elv_rq_merge_ok);
ian@0 66
ian@0 67 static inline int elv_try_merge(struct request *__rq, struct bio *bio)
ian@0 68 {
ian@0 69 int ret = ELEVATOR_NO_MERGE;
ian@0 70
ian@0 71 /*
ian@0 72 * we can merge and sequence is ok, check if it's possible
ian@0 73 */
ian@0 74 if (elv_rq_merge_ok(__rq, bio)) {
ian@0 75 if (__rq->sector + __rq->nr_sectors == bio->bi_sector)
ian@0 76 ret = ELEVATOR_BACK_MERGE;
ian@0 77 else if (__rq->sector - bio_sectors(bio) == bio->bi_sector)
ian@0 78 ret = ELEVATOR_FRONT_MERGE;
ian@0 79 }
ian@0 80
ian@0 81 return ret;
ian@0 82 }
ian@0 83
ian@0 84 static struct elevator_type *elevator_find(const char *name)
ian@0 85 {
ian@0 86 struct elevator_type *e = NULL;
ian@0 87 struct list_head *entry;
ian@0 88
ian@0 89 list_for_each(entry, &elv_list) {
ian@0 90 struct elevator_type *__e;
ian@0 91
ian@0 92 __e = list_entry(entry, struct elevator_type, list);
ian@0 93
ian@0 94 if (!strcmp(__e->elevator_name, name)) {
ian@0 95 e = __e;
ian@0 96 break;
ian@0 97 }
ian@0 98 }
ian@0 99
ian@0 100 return e;
ian@0 101 }
ian@0 102
ian@0 103 static void elevator_put(struct elevator_type *e)
ian@0 104 {
ian@0 105 module_put(e->elevator_owner);
ian@0 106 }
ian@0 107
ian@0 108 static struct elevator_type *elevator_get(const char *name)
ian@0 109 {
ian@0 110 struct elevator_type *e;
ian@0 111
ian@0 112 spin_lock_irq(&elv_list_lock);
ian@0 113
ian@0 114 e = elevator_find(name);
ian@0 115 if (e && !try_module_get(e->elevator_owner))
ian@0 116 e = NULL;
ian@0 117
ian@0 118 spin_unlock_irq(&elv_list_lock);
ian@0 119
ian@0 120 return e;
ian@0 121 }
ian@0 122
ian@0 123 static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq)
ian@0 124 {
ian@0 125 return eq->ops->elevator_init_fn(q, eq);
ian@0 126 }
ian@0 127
ian@0 128 static void elevator_attach(request_queue_t *q, struct elevator_queue *eq,
ian@0 129 void *data)
ian@0 130 {
ian@0 131 q->elevator = eq;
ian@0 132 eq->elevator_data = data;
ian@0 133 }
ian@0 134
ian@0 135 static char chosen_elevator[16];
ian@0 136
ian@0 137 static int __init elevator_setup(char *str)
ian@0 138 {
ian@0 139 /*
ian@0 140 * Be backwards-compatible with previous kernels, so users
ian@0 141 * won't get the wrong elevator.
ian@0 142 */
ian@0 143 if (!strcmp(str, "as"))
ian@0 144 strcpy(chosen_elevator, "anticipatory");
ian@0 145 else
ian@0 146 strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
ian@0 147 return 1;
ian@0 148 }
ian@0 149
ian@0 150 __setup("elevator=", elevator_setup);
ian@0 151
ian@0 152 static struct kobj_type elv_ktype;
ian@0 153
ian@0 154 static elevator_t *elevator_alloc(struct elevator_type *e)
ian@0 155 {
ian@0 156 elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
ian@0 157 if (eq) {
ian@0 158 memset(eq, 0, sizeof(*eq));
ian@0 159 eq->ops = &e->ops;
ian@0 160 eq->elevator_type = e;
ian@0 161 kobject_init(&eq->kobj);
ian@0 162 snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
ian@0 163 eq->kobj.ktype = &elv_ktype;
ian@0 164 mutex_init(&eq->sysfs_lock);
ian@0 165 } else {
ian@0 166 elevator_put(e);
ian@0 167 }
ian@0 168 return eq;
ian@0 169 }
ian@0 170
ian@0 171 static void elevator_release(struct kobject *kobj)
ian@0 172 {
ian@0 173 elevator_t *e = container_of(kobj, elevator_t, kobj);
ian@0 174 elevator_put(e->elevator_type);
ian@0 175 kfree(e);
ian@0 176 }
ian@0 177
ian@0 178 int elevator_init(request_queue_t *q, char *name)
ian@0 179 {
ian@0 180 struct elevator_type *e = NULL;
ian@0 181 struct elevator_queue *eq;
ian@0 182 int ret = 0;
ian@0 183 void *data;
ian@0 184
ian@0 185 INIT_LIST_HEAD(&q->queue_head);
ian@0 186 q->last_merge = NULL;
ian@0 187 q->end_sector = 0;
ian@0 188 q->boundary_rq = NULL;
ian@0 189
ian@0 190 if (name && !(e = elevator_get(name)))
ian@0 191 return -EINVAL;
ian@0 192
ian@0 193 if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
ian@0 194 printk("I/O scheduler %s not found\n", chosen_elevator);
ian@0 195
ian@0 196 if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
ian@0 197 printk("Default I/O scheduler not found, using no-op\n");
ian@0 198 e = elevator_get("noop");
ian@0 199 }
ian@0 200
ian@0 201 eq = elevator_alloc(e);
ian@0 202 if (!eq)
ian@0 203 return -ENOMEM;
ian@0 204
ian@0 205 data = elevator_init_queue(q, eq);
ian@0 206 if (!data) {
ian@0 207 kobject_put(&eq->kobj);
ian@0 208 return -ENOMEM;
ian@0 209 }
ian@0 210
ian@0 211 elevator_attach(q, eq, data);
ian@0 212 return ret;
ian@0 213 }
ian@0 214
ian@0 215 void elevator_exit(elevator_t *e)
ian@0 216 {
ian@0 217 mutex_lock(&e->sysfs_lock);
ian@0 218 if (e->ops->elevator_exit_fn)
ian@0 219 e->ops->elevator_exit_fn(e);
ian@0 220 e->ops = NULL;
ian@0 221 mutex_unlock(&e->sysfs_lock);
ian@0 222
ian@0 223 kobject_put(&e->kobj);
ian@0 224 }
ian@0 225
ian@0 226 /*
ian@0 227 * Insert rq into dispatch queue of q. Queue lock must be held on
ian@0 228 * entry. If sort != 0, rq is sort-inserted; otherwise, rq will be
ian@0 229 * appended to the dispatch queue. To be used by specific elevators.
ian@0 230 */
ian@0 231 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
ian@0 232 {
ian@0 233 sector_t boundary;
ian@0 234 struct list_head *entry;
ian@0 235
ian@0 236 if (q->last_merge == rq)
ian@0 237 q->last_merge = NULL;
ian@0 238 q->nr_sorted--;
ian@0 239
ian@0 240 boundary = q->end_sector;
ian@0 241
ian@0 242 list_for_each_prev(entry, &q->queue_head) {
ian@0 243 struct request *pos = list_entry_rq(entry);
ian@0 244
ian@0 245 if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
ian@0 246 break;
ian@0 247 if (rq->sector >= boundary) {
ian@0 248 if (pos->sector < boundary)
ian@0 249 continue;
ian@0 250 } else {
ian@0 251 if (pos->sector >= boundary)
ian@0 252 break;
ian@0 253 }
ian@0 254 if (rq->sector >= pos->sector)
ian@0 255 break;
ian@0 256 }
ian@0 257
ian@0 258 list_add(&rq->queuelist, entry);
ian@0 259 }
ian@0 260
ian@0 261 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
ian@0 262 {
ian@0 263 elevator_t *e = q->elevator;
ian@0 264 int ret;
ian@0 265
ian@0 266 if (q->last_merge) {
ian@0 267 ret = elv_try_merge(q->last_merge, bio);
ian@0 268 if (ret != ELEVATOR_NO_MERGE) {
ian@0 269 *req = q->last_merge;
ian@0 270 return ret;
ian@0 271 }
ian@0 272 }
ian@0 273
ian@0 274 if (e->ops->elevator_merge_fn)
ian@0 275 return e->ops->elevator_merge_fn(q, req, bio);
ian@0 276
ian@0 277 return ELEVATOR_NO_MERGE;
ian@0 278 }
ian@0 279
ian@0 280 void elv_merged_request(request_queue_t *q, struct request *rq)
ian@0 281 {
ian@0 282 elevator_t *e = q->elevator;
ian@0 283
ian@0 284 if (e->ops->elevator_merged_fn)
ian@0 285 e->ops->elevator_merged_fn(q, rq);
ian@0 286
ian@0 287 q->last_merge = rq;
ian@0 288 }
ian@0 289
ian@0 290 void elv_merge_requests(request_queue_t *q, struct request *rq,
ian@0 291 struct request *next)
ian@0 292 {
ian@0 293 elevator_t *e = q->elevator;
ian@0 294
ian@0 295 if (e->ops->elevator_merge_req_fn)
ian@0 296 e->ops->elevator_merge_req_fn(q, rq, next);
ian@0 297 q->nr_sorted--;
ian@0 298
ian@0 299 q->last_merge = rq;
ian@0 300 }
ian@0 301
ian@0 302 void elv_requeue_request(request_queue_t *q, struct request *rq)
ian@0 303 {
ian@0 304 elevator_t *e = q->elevator;
ian@0 305
ian@0 306 /*
ian@0 307 * it already went through dequeue, we need to decrement the
ian@0 308 * in_flight count again
ian@0 309 */
ian@0 310 if (blk_account_rq(rq)) {
ian@0 311 q->in_flight--;
ian@0 312 if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
ian@0 313 e->ops->elevator_deactivate_req_fn(q, rq);
ian@0 314 }
ian@0 315
ian@0 316 rq->flags &= ~REQ_STARTED;
ian@0 317
ian@0 318 elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
ian@0 319 }
ian@0 320
ian@0 321 static void elv_drain_elevator(request_queue_t *q)
ian@0 322 {
ian@0 323 static int printed;
ian@0 324 while (q->elevator->ops->elevator_dispatch_fn(q, 1))
ian@0 325 ;
ian@0 326 if (q->nr_sorted == 0)
ian@0 327 return;
ian@0 328 if (printed++ < 10) {
ian@0 329 printk(KERN_ERR "%s: forced dispatching is broken "
ian@0 330 "(nr_sorted=%u), please report this\n",
ian@0 331 q->elevator->elevator_type->elevator_name, q->nr_sorted);
ian@0 332 }
ian@0 333 }
ian@0 334
ian@0 335 void elv_insert(request_queue_t *q, struct request *rq, int where)
ian@0 336 {
ian@0 337 struct list_head *pos;
ian@0 338 unsigned ordseq;
ian@0 339 int unplug_it = 1;
ian@0 340
ian@0 341 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
ian@0 342
ian@0 343 rq->q = q;
ian@0 344
ian@0 345 switch (where) {
ian@0 346 case ELEVATOR_INSERT_FRONT:
ian@0 347 rq->flags |= REQ_SOFTBARRIER;
ian@0 348
ian@0 349 list_add(&rq->queuelist, &q->queue_head);
ian@0 350 break;
ian@0 351
ian@0 352 case ELEVATOR_INSERT_BACK:
ian@0 353 rq->flags |= REQ_SOFTBARRIER;
ian@0 354 elv_drain_elevator(q);
ian@0 355 list_add_tail(&rq->queuelist, &q->queue_head);
ian@0 356 /*
ian@0 357 * We kick the queue here for the following reasons.
ian@0 358 * - The elevator might have returned NULL previously
ian@0 359 * to delay requests and returned them now. As the
ian@0 360 * queue wasn't empty before this request, ll_rw_blk
ian@0 361 * won't run the queue on return, resulting in hang.
ian@0 362 * - Usually, back inserted requests won't be merged
ian@0 363 * with anything. There's no point in delaying queue
ian@0 364 * processing.
ian@0 365 */
ian@0 366 blk_remove_plug(q);
ian@0 367 q->request_fn(q);
ian@0 368 break;
ian@0 369
ian@0 370 case ELEVATOR_INSERT_SORT:
ian@0 371 BUG_ON(!blk_fs_request(rq));
ian@0 372 rq->flags |= REQ_SORTED;
ian@0 373 q->nr_sorted++;
ian@0 374 if (q->last_merge == NULL && rq_mergeable(rq))
ian@0 375 q->last_merge = rq;
ian@0 376 /*
ian@0 377 * Some ioscheds (cfq) run q->request_fn directly, so
ian@0 378 * rq cannot be accessed after calling
ian@0 379 * elevator_add_req_fn.
ian@0 380 */
ian@0 381 q->elevator->ops->elevator_add_req_fn(q, rq);
ian@0 382 break;
ian@0 383
ian@0 384 case ELEVATOR_INSERT_REQUEUE:
ian@0 385 /*
ian@0 386 * If ordered flush isn't in progress, we do front
ian@0 387 * insertion; otherwise, requests should be requeued
ian@0 388 * in ordseq order.
ian@0 389 */
ian@0 390 rq->flags |= REQ_SOFTBARRIER;
ian@0 391
ian@0 392 if (q->ordseq == 0) {
ian@0 393 list_add(&rq->queuelist, &q->queue_head);
ian@0 394 break;
ian@0 395 }
ian@0 396
ian@0 397 ordseq = blk_ordered_req_seq(rq);
ian@0 398
ian@0 399 list_for_each(pos, &q->queue_head) {
ian@0 400 struct request *pos_rq = list_entry_rq(pos);
ian@0 401 if (ordseq <= blk_ordered_req_seq(pos_rq))
ian@0 402 break;
ian@0 403 }
ian@0 404
ian@0 405 list_add_tail(&rq->queuelist, pos);
ian@0 406 /*
ian@0 407 * most requeues happen because of a busy condition, don't
ian@0 408 * force unplug of the queue for that case.
ian@0 409 */
ian@0 410 unplug_it = 0;
ian@0 411 break;
ian@0 412
ian@0 413 default:
ian@0 414 printk(KERN_ERR "%s: bad insertion point %d\n",
ian@0 415 __FUNCTION__, where);
ian@0 416 BUG();
ian@0 417 }
ian@0 418
ian@0 419 if (unplug_it && blk_queue_plugged(q)) {
ian@0 420 int nrq = q->rq.count[READ] + q->rq.count[WRITE]
ian@0 421 - q->in_flight;
ian@0 422
ian@0 423 if (nrq >= q->unplug_thresh)
ian@0 424 __generic_unplug_device(q);
ian@0 425 }
ian@0 426 }
ian@0 427
ian@0 428 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
ian@0 429 int plug)
ian@0 430 {
ian@0 431 if (q->ordcolor)
ian@0 432 rq->flags |= REQ_ORDERED_COLOR;
ian@0 433
ian@0 434 if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
ian@0 435 /*
ian@0 436 * toggle ordered color
ian@0 437 */
ian@0 438 if (blk_barrier_rq(rq))
ian@0 439 q->ordcolor ^= 1;
ian@0 440
ian@0 441 /*
ian@0 442 * barriers implicitly indicate back insertion
ian@0 443 */
ian@0 444 if (where == ELEVATOR_INSERT_SORT)
ian@0 445 where = ELEVATOR_INSERT_BACK;
ian@0 446
ian@0 447 /*
ian@0 448 * this request is scheduling boundary, update
ian@0 449 * end_sector
ian@0 450 */
ian@0 451 if (blk_fs_request(rq)) {
ian@0 452 q->end_sector = rq_end_sector(rq);
ian@0 453 q->boundary_rq = rq;
ian@0 454 }
ian@0 455 } else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
ian@0 456 where = ELEVATOR_INSERT_BACK;
ian@0 457
ian@0 458 if (plug)
ian@0 459 blk_plug_device(q);
ian@0 460
ian@0 461 elv_insert(q, rq, where);
ian@0 462 }
ian@0 463
ian@0 464 void elv_add_request(request_queue_t *q, struct request *rq, int where,
ian@0 465 int plug)
ian@0 466 {
ian@0 467 unsigned long flags;
ian@0 468
ian@0 469 spin_lock_irqsave(q->queue_lock, flags);
ian@0 470 __elv_add_request(q, rq, where, plug);
ian@0 471 spin_unlock_irqrestore(q->queue_lock, flags);
ian@0 472 }
ian@0 473
ian@0 474 static inline struct request *__elv_next_request(request_queue_t *q)
ian@0 475 {
ian@0 476 struct request *rq;
ian@0 477
ian@0 478 while (1) {
ian@0 479 while (!list_empty(&q->queue_head)) {
ian@0 480 rq = list_entry_rq(q->queue_head.next);
ian@0 481 if (blk_do_ordered(q, &rq))
ian@0 482 return rq;
ian@0 483 }
ian@0 484
ian@0 485 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
ian@0 486 return NULL;
ian@0 487 }
ian@0 488 }
ian@0 489
ian@0 490 struct request *elv_next_request(request_queue_t *q)
ian@0 491 {
ian@0 492 struct request *rq;
ian@0 493 int ret;
ian@0 494
ian@0 495 while ((rq = __elv_next_request(q)) != NULL) {
keir@417 496 /*
keir@417 497 * Kill the empty barrier place holder, the driver must
keir@417 498 * not ever see it.
keir@417 499 */
keir@417 500 if (blk_empty_barrier(rq)) {
keir@417 501 blkdev_dequeue_request(rq);
keir@417 502 end_that_request_chunk(rq, 1, 0);
keir@417 503 end_that_request_last(rq, 1);
keir@417 504 continue;
keir@417 505 }
ian@0 506 if (!(rq->flags & REQ_STARTED)) {
ian@0 507 elevator_t *e = q->elevator;
ian@0 508
ian@0 509 /*
ian@0 510 * This is the first time the device driver
ian@0 511 * sees this request (possibly after
ian@0 512 * requeueing). Notify IO scheduler.
ian@0 513 */
ian@0 514 if (blk_sorted_rq(rq) &&
ian@0 515 e->ops->elevator_activate_req_fn)
ian@0 516 e->ops->elevator_activate_req_fn(q, rq);
ian@0 517
ian@0 518 /*
ian@0 519 * just mark as started even if we don't start
ian@0 520 * it, a request that has been delayed should
ian@0 521 * not be passed by new incoming requests
ian@0 522 */
ian@0 523 rq->flags |= REQ_STARTED;
ian@0 524 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
ian@0 525 }
ian@0 526
ian@0 527 if (!q->boundary_rq || q->boundary_rq == rq) {
ian@0 528 q->end_sector = rq_end_sector(rq);
ian@0 529 q->boundary_rq = NULL;
ian@0 530 }
ian@0 531
ian@0 532 if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
ian@0 533 break;
ian@0 534
ian@0 535 ret = q->prep_rq_fn(q, rq);
ian@0 536 if (ret == BLKPREP_OK) {
ian@0 537 break;
ian@0 538 } else if (ret == BLKPREP_DEFER) {
ian@0 539 /*
ian@0 540 * the request may have been (partially) prepped.
ian@0 541 * we need to keep this request in the front to
ian@0 542 * avoid resource deadlock. REQ_STARTED will
ian@0 543 * prevent other fs requests from passing this one.
ian@0 544 */
ian@0 545 rq = NULL;
ian@0 546 break;
ian@0 547 } else if (ret == BLKPREP_KILL) {
ian@0 548 int nr_bytes = rq->hard_nr_sectors << 9;
ian@0 549
ian@0 550 if (!nr_bytes)
ian@0 551 nr_bytes = rq->data_len;
ian@0 552
ian@0 553 blkdev_dequeue_request(rq);
ian@0 554 rq->flags |= REQ_QUIET;
ian@0 555 end_that_request_chunk(rq, 0, nr_bytes);
ian@0 556 end_that_request_last(rq, 0);
ian@0 557 } else {
ian@0 558 printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
ian@0 559 ret);
ian@0 560 break;
ian@0 561 }
ian@0 562 }
ian@0 563
ian@0 564 return rq;
ian@0 565 }
ian@0 566
ian@0 567 void elv_dequeue_request(request_queue_t *q, struct request *rq)
ian@0 568 {
ian@0 569 BUG_ON(list_empty(&rq->queuelist));
ian@0 570
ian@0 571 list_del_init(&rq->queuelist);
ian@0 572
ian@0 573 /*
ian@0 574 * the time frame between a request being removed from the lists
ian@0 575 * and to it is freed is accounted as io that is in progress at
ian@0 576 * the driver side.
ian@0 577 */
ian@0 578 if (blk_account_rq(rq))
ian@0 579 q->in_flight++;
ian@0 580 }
ian@0 581
ian@0 582 int elv_queue_empty(request_queue_t *q)
ian@0 583 {
ian@0 584 elevator_t *e = q->elevator;
ian@0 585
ian@0 586 if (!list_empty(&q->queue_head))
ian@0 587 return 0;
ian@0 588
ian@0 589 if (e->ops->elevator_queue_empty_fn)
ian@0 590 return e->ops->elevator_queue_empty_fn(q);
ian@0 591
ian@0 592 return 1;
ian@0 593 }
ian@0 594
ian@0 595 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
ian@0 596 {
ian@0 597 elevator_t *e = q->elevator;
ian@0 598
ian@0 599 if (e->ops->elevator_latter_req_fn)
ian@0 600 return e->ops->elevator_latter_req_fn(q, rq);
ian@0 601 return NULL;
ian@0 602 }
ian@0 603
ian@0 604 struct request *elv_former_request(request_queue_t *q, struct request *rq)
ian@0 605 {
ian@0 606 elevator_t *e = q->elevator;
ian@0 607
ian@0 608 if (e->ops->elevator_former_req_fn)
ian@0 609 return e->ops->elevator_former_req_fn(q, rq);
ian@0 610 return NULL;
ian@0 611 }
ian@0 612
ian@0 613 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
ian@0 614 gfp_t gfp_mask)
ian@0 615 {
ian@0 616 elevator_t *e = q->elevator;
ian@0 617
ian@0 618 if (e->ops->elevator_set_req_fn)
ian@0 619 return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
ian@0 620
ian@0 621 rq->elevator_private = NULL;
ian@0 622 return 0;
ian@0 623 }
ian@0 624
ian@0 625 void elv_put_request(request_queue_t *q, struct request *rq)
ian@0 626 {
ian@0 627 elevator_t *e = q->elevator;
ian@0 628
ian@0 629 if (e->ops->elevator_put_req_fn)
ian@0 630 e->ops->elevator_put_req_fn(q, rq);
ian@0 631 }
ian@0 632
ian@0 633 int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
ian@0 634 {
ian@0 635 elevator_t *e = q->elevator;
ian@0 636
ian@0 637 if (e->ops->elevator_may_queue_fn)
ian@0 638 return e->ops->elevator_may_queue_fn(q, rw, bio);
ian@0 639
ian@0 640 return ELV_MQUEUE_MAY;
ian@0 641 }
ian@0 642
ian@0 643 void elv_completed_request(request_queue_t *q, struct request *rq)
ian@0 644 {
ian@0 645 elevator_t *e = q->elevator;
ian@0 646
ian@0 647 /*
ian@0 648 * request is released from the driver, io must be done
ian@0 649 */
ian@0 650 if (blk_account_rq(rq)) {
ian@0 651 q->in_flight--;
ian@0 652 if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
ian@0 653 e->ops->elevator_completed_req_fn(q, rq);
ian@0 654 }
ian@0 655
ian@0 656 /*
ian@0 657 * Check if the queue is waiting for fs requests to be
ian@0 658 * drained for flush sequence.
ian@0 659 */
ian@0 660 if (unlikely(q->ordseq)) {
ian@0 661 struct request *first_rq = list_entry_rq(q->queue_head.next);
ian@0 662 if (q->in_flight == 0 &&
ian@0 663 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
ian@0 664 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
ian@0 665 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
ian@0 666 q->request_fn(q);
ian@0 667 }
ian@0 668 }
ian@0 669 }
ian@0 670
ian@0 671 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
ian@0 672
ian@0 673 static ssize_t
ian@0 674 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
ian@0 675 {
ian@0 676 elevator_t *e = container_of(kobj, elevator_t, kobj);
ian@0 677 struct elv_fs_entry *entry = to_elv(attr);
ian@0 678 ssize_t error;
ian@0 679
ian@0 680 if (!entry->show)
ian@0 681 return -EIO;
ian@0 682
ian@0 683 mutex_lock(&e->sysfs_lock);
ian@0 684 error = e->ops ? entry->show(e, page) : -ENOENT;
ian@0 685 mutex_unlock(&e->sysfs_lock);
ian@0 686 return error;
ian@0 687 }
ian@0 688
ian@0 689 static ssize_t
ian@0 690 elv_attr_store(struct kobject *kobj, struct attribute *attr,
ian@0 691 const char *page, size_t length)
ian@0 692 {
ian@0 693 elevator_t *e = container_of(kobj, elevator_t, kobj);
ian@0 694 struct elv_fs_entry *entry = to_elv(attr);
ian@0 695 ssize_t error;
ian@0 696
ian@0 697 if (!entry->store)
ian@0 698 return -EIO;
ian@0 699
ian@0 700 mutex_lock(&e->sysfs_lock);
ian@0 701 error = e->ops ? entry->store(e, page, length) : -ENOENT;
ian@0 702 mutex_unlock(&e->sysfs_lock);
ian@0 703 return error;
ian@0 704 }
ian@0 705
ian@0 706 static struct sysfs_ops elv_sysfs_ops = {
ian@0 707 .show = elv_attr_show,
ian@0 708 .store = elv_attr_store,
ian@0 709 };
ian@0 710
ian@0 711 static struct kobj_type elv_ktype = {
ian@0 712 .sysfs_ops = &elv_sysfs_ops,
ian@0 713 .release = elevator_release,
ian@0 714 };
ian@0 715
ian@0 716 int elv_register_queue(struct request_queue *q)
ian@0 717 {
ian@0 718 elevator_t *e = q->elevator;
ian@0 719 int error;
ian@0 720
ian@0 721 e->kobj.parent = &q->kobj;
ian@0 722
ian@0 723 error = kobject_add(&e->kobj);
ian@0 724 if (!error) {
ian@0 725 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
ian@0 726 if (attr) {
ian@0 727 while (attr->attr.name) {
ian@0 728 if (sysfs_create_file(&e->kobj, &attr->attr))
ian@0 729 break;
ian@0 730 attr++;
ian@0 731 }
ian@0 732 }
ian@0 733 kobject_uevent(&e->kobj, KOBJ_ADD);
ian@0 734 }
ian@0 735 return error;
ian@0 736 }
ian@0 737
ian@0 738 static void __elv_unregister_queue(elevator_t *e)
ian@0 739 {
ian@0 740 kobject_uevent(&e->kobj, KOBJ_REMOVE);
ian@0 741 kobject_del(&e->kobj);
ian@0 742 }
ian@0 743
ian@0 744 void elv_unregister_queue(struct request_queue *q)
ian@0 745 {
ian@0 746 if (q)
ian@0 747 __elv_unregister_queue(q->elevator);
ian@0 748 }
ian@0 749
ian@0 750 int elv_register(struct elevator_type *e)
ian@0 751 {
ian@0 752 spin_lock_irq(&elv_list_lock);
ian@0 753 BUG_ON(elevator_find(e->elevator_name));
ian@0 754 list_add_tail(&e->list, &elv_list);
ian@0 755 spin_unlock_irq(&elv_list_lock);
ian@0 756
ian@0 757 printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
ian@0 758 if (!strcmp(e->elevator_name, chosen_elevator) ||
ian@0 759 (!*chosen_elevator &&
ian@0 760 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
ian@0 761 printk(" (default)");
ian@0 762 printk("\n");
ian@0 763 return 0;
ian@0 764 }
ian@0 765 EXPORT_SYMBOL_GPL(elv_register);
ian@0 766
ian@0 767 void elv_unregister(struct elevator_type *e)
ian@0 768 {
ian@0 769 struct task_struct *g, *p;
ian@0 770
ian@0 771 /*
ian@0 772 * Iterate every thread in the process to remove the io contexts.
ian@0 773 */
ian@0 774 if (e->ops.trim) {
ian@0 775 read_lock(&tasklist_lock);
ian@0 776 do_each_thread(g, p) {
ian@0 777 task_lock(p);
ian@0 778 if (p->io_context)
ian@0 779 e->ops.trim(p->io_context);
ian@0 780 task_unlock(p);
ian@0 781 } while_each_thread(g, p);
ian@0 782 read_unlock(&tasklist_lock);
ian@0 783 }
ian@0 784
ian@0 785 spin_lock_irq(&elv_list_lock);
ian@0 786 list_del_init(&e->list);
ian@0 787 spin_unlock_irq(&elv_list_lock);
ian@0 788 }
ian@0 789 EXPORT_SYMBOL_GPL(elv_unregister);
ian@0 790
ian@0 791 /*
ian@0 792 * switch to new_e io scheduler. be careful not to introduce deadlocks -
ian@0 793 * we don't free the old io scheduler, before we have allocated what we
ian@0 794 * need for the new one. this way we have a chance of going back to the old
ian@0 795 * one, if the new one fails init for some reason.
ian@0 796 */
ian@0 797 static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
ian@0 798 {
ian@0 799 elevator_t *old_elevator, *e;
ian@0 800 void *data;
ian@0 801
ian@0 802 /*
ian@0 803 * Allocate new elevator
ian@0 804 */
ian@0 805 e = elevator_alloc(new_e);
ian@0 806 if (!e)
ian@0 807 return 0;
ian@0 808
ian@0 809 data = elevator_init_queue(q, e);
ian@0 810 if (!data) {
ian@0 811 kobject_put(&e->kobj);
ian@0 812 return 0;
ian@0 813 }
ian@0 814
ian@0 815 /*
ian@0 816 * Turn on BYPASS and drain all requests w/ elevator private data
ian@0 817 */
ian@0 818 spin_lock_irq(q->queue_lock);
ian@0 819
ian@0 820 set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
ian@0 821
ian@0 822 elv_drain_elevator(q);
ian@0 823
ian@0 824 while (q->rq.elvpriv) {
ian@0 825 blk_remove_plug(q);
ian@0 826 q->request_fn(q);
ian@0 827 spin_unlock_irq(q->queue_lock);
ian@0 828 msleep(10);
ian@0 829 spin_lock_irq(q->queue_lock);
ian@0 830 elv_drain_elevator(q);
ian@0 831 }
ian@0 832
ian@0 833 /*
ian@0 834 * Remember old elevator.
ian@0 835 */
ian@0 836 old_elevator = q->elevator;
ian@0 837
ian@0 838 /*
ian@0 839 * attach and start new elevator
ian@0 840 */
ian@0 841 elevator_attach(q, e, data);
ian@0 842
ian@0 843 spin_unlock_irq(q->queue_lock);
ian@0 844
ian@0 845 __elv_unregister_queue(old_elevator);
ian@0 846
ian@0 847 if (elv_register_queue(q))
ian@0 848 goto fail_register;
ian@0 849
ian@0 850 /*
ian@0 851 * finally exit old elevator and turn off BYPASS.
ian@0 852 */
ian@0 853 elevator_exit(old_elevator);
ian@0 854 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
ian@0 855 return 1;
ian@0 856
ian@0 857 fail_register:
ian@0 858 /*
ian@0 859 * switch failed, exit the new io scheduler and reattach the old
ian@0 860 * one again (along with re-adding the sysfs dir)
ian@0 861 */
ian@0 862 elevator_exit(e);
ian@0 863 q->elevator = old_elevator;
ian@0 864 elv_register_queue(q);
ian@0 865 clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
ian@0 866 return 0;
ian@0 867 }
ian@0 868
ian@0 869 ssize_t elv_iosched_store(request_queue_t *q, const char *name, size_t count)
ian@0 870 {
ian@0 871 char elevator_name[ELV_NAME_MAX];
ian@0 872 size_t len;
ian@0 873 struct elevator_type *e;
ian@0 874
ian@0 875 elevator_name[sizeof(elevator_name) - 1] = '\0';
ian@0 876 strncpy(elevator_name, name, sizeof(elevator_name) - 1);
ian@0 877 len = strlen(elevator_name);
ian@0 878
ian@0 879 if (len && elevator_name[len - 1] == '\n')
ian@0 880 elevator_name[len - 1] = '\0';
ian@0 881
ian@0 882 e = elevator_get(elevator_name);
ian@0 883 if (!e) {
ian@0 884 printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
ian@0 885 return -EINVAL;
ian@0 886 }
ian@0 887
ian@0 888 if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
ian@0 889 elevator_put(e);
ian@0 890 return count;
ian@0 891 }
ian@0 892
ian@0 893 if (!elevator_switch(q, e))
ian@0 894 printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
ian@0 895 return count;
ian@0 896 }
ian@0 897
ian@0 898 ssize_t elv_iosched_show(request_queue_t *q, char *name)
ian@0 899 {
ian@0 900 elevator_t *e = q->elevator;
ian@0 901 struct elevator_type *elv = e->elevator_type;
ian@0 902 struct list_head *entry;
ian@0 903 int len = 0;
ian@0 904
ian@240 905 spin_lock_irq(&elv_list_lock);
ian@0 906 list_for_each(entry, &elv_list) {
ian@0 907 struct elevator_type *__e;
ian@0 908
ian@0 909 __e = list_entry(entry, struct elevator_type, list);
ian@0 910 if (!strcmp(elv->elevator_name, __e->elevator_name))
ian@0 911 len += sprintf(name+len, "[%s] ", elv->elevator_name);
ian@0 912 else
ian@0 913 len += sprintf(name+len, "%s ", __e->elevator_name);
ian@0 914 }
ian@240 915 spin_unlock_irq(&elv_list_lock);
ian@0 916
ian@0 917 len += sprintf(len+name, "\n");
ian@0 918 return len;
ian@0 919 }
ian@0 920
ian@0 921 EXPORT_SYMBOL(elv_dispatch_sort);
ian@0 922 EXPORT_SYMBOL(elv_add_request);
ian@0 923 EXPORT_SYMBOL(__elv_add_request);
ian@0 924 EXPORT_SYMBOL(elv_next_request);
ian@0 925 EXPORT_SYMBOL(elv_dequeue_request);
ian@0 926 EXPORT_SYMBOL(elv_queue_empty);
ian@0 927 EXPORT_SYMBOL(elevator_exit);
ian@0 928 EXPORT_SYMBOL(elevator_init);