ia64/xen-unstable

view linux-2.4.27-xen-sparse/drivers/block/ll_rw_blk.c @ 2552:6becc0cc3b8d

bitkeeper revision 1.1159.91.1 (41545490klPYsVC65RXTsROARJj8bw)

build 2.4 w/out blkdev frontend (e.g. for dom0)
author smh22@tempest.cl.cam.ac.uk
date Fri Sep 24 17:08:32 2004 +0000 (2004-09-24)
parents 869c20f2977b
children b3e98ee24986
line source
1 /*
2 * linux/drivers/block/ll_rw_blk.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
9 */
11 /*
12 * This handles all read/write requests to block devices
13 */
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/config.h>
20 #include <linux/locks.h>
21 #include <linux/mm.h>
22 #include <linux/swap.h>
23 #include <linux/init.h>
24 #include <linux/smp_lock.h>
25 #include <linux/completion.h>
26 #include <linux/bootmem.h>
28 #include <asm/system.h>
29 #include <asm/io.h>
30 #include <linux/blk.h>
31 #include <linux/highmem.h>
32 #include <linux/slab.h>
33 #include <linux/module.h>
35 /*
36 * MAC Floppy IWM hooks
37 */
39 #ifdef CONFIG_MAC_FLOPPY_IWM
40 extern int mac_floppy_init(void);
41 #endif
43 /*
44 * For the allocated request tables
45 */
46 static kmem_cache_t *request_cachep;
48 /*
49 * The "disk" task queue is used to start the actual requests
50 * after a plug
51 */
52 DECLARE_TASK_QUEUE(tq_disk);
54 /*
55 * Protect the request list against multiple users..
56 *
57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
59 * side (we still have protect against block device array operations, and
60 * the do_request() side is casually still unsafe. The kernel lock protects
61 * this part currently.).
62 *
63 * there is a fair chance that things will work just OK if these functions
64 * are called with no global kernel lock held ...
65 */
66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
68 /* This specifies how many sectors to read ahead on the disk. */
70 int read_ahead[MAX_BLKDEV];
72 /* blk_dev_struct is:
73 * *request_fn
74 * *current_request
75 */
76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
78 /*
79 * blk_size contains the size of all block-devices in units of 1024 byte
80 * sectors:
81 *
82 * blk_size[MAJOR][MINOR]
83 *
84 * if (!blk_size[MAJOR]) then no minor size checking is done.
85 */
86 int * blk_size[MAX_BLKDEV];
88 /*
89 * blksize_size contains the size of all block-devices:
90 *
91 * blksize_size[MAJOR][MINOR]
92 *
93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
94 */
95 int * blksize_size[MAX_BLKDEV];
97 /*
98 * hardsect_size contains the size of the hardware sector of a device.
99 *
100 * hardsect_size[MAJOR][MINOR]
101 *
102 * if (!hardsect_size[MAJOR])
103 * then 512 bytes is assumed.
104 * else
105 * sector_size is hardsect_size[MAJOR][MINOR]
106 * This is currently set by some scsi devices and read by the msdos fs driver.
107 * Other uses may appear later.
108 */
109 int * hardsect_size[MAX_BLKDEV];
111 /*
112 * The following tunes the read-ahead algorithm in mm/filemap.c
113 */
114 int * max_readahead[MAX_BLKDEV];
116 /*
117 * Max number of sectors per request
118 */
119 int * max_sectors[MAX_BLKDEV];
121 unsigned long blk_max_low_pfn, blk_max_pfn;
122 int blk_nohighio = 0;
124 int block_dump = 0;
126 static struct timer_list writeback_timer;
128 static inline int get_max_sectors(kdev_t dev)
129 {
130 if (!max_sectors[MAJOR(dev)])
131 return MAX_SECTORS;
132 return max_sectors[MAJOR(dev)][MINOR(dev)];
133 }
135 inline request_queue_t *blk_get_queue(kdev_t dev)
136 {
137 struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
139 if (bdev->queue)
140 return bdev->queue(dev);
141 else
142 return &blk_dev[MAJOR(dev)].request_queue;
143 }
145 static int __blk_cleanup_queue(struct request_list *list)
146 {
147 struct list_head *head = &list->free;
148 struct request *rq;
149 int i = 0;
151 while (!list_empty(head)) {
152 rq = list_entry(head->next, struct request, queue);
153 list_del(&rq->queue);
154 kmem_cache_free(request_cachep, rq);
155 i++;
156 };
158 if (i != list->count)
159 printk("request list leak!\n");
161 list->count = 0;
162 return i;
163 }
165 /**
166 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
167 * @q: the request queue to be released
168 *
169 * Description:
170 * blk_cleanup_queue is the pair to blk_init_queue(). It should
171 * be called when a request queue is being released; typically
172 * when a block device is being de-registered. Currently, its
173 * primary task it to free all the &struct request structures that
174 * were allocated to the queue.
175 * Caveat:
176 * Hopefully the low level driver will have finished any
177 * outstanding requests first...
178 **/
179 void blk_cleanup_queue(request_queue_t * q)
180 {
181 int count = q->nr_requests;
183 count -= __blk_cleanup_queue(&q->rq);
185 if (count)
186 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
187 if (atomic_read(&q->nr_sectors))
188 printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
190 memset(q, 0, sizeof(*q));
191 }
193 /**
194 * blk_queue_headactive - indicate whether head of request queue may be active
195 * @q: The queue which this applies to.
196 * @active: A flag indication where the head of the queue is active.
197 *
198 * Description:
199 * The driver for a block device may choose to leave the currently active
200 * request on the request queue, removing it only when it has completed.
201 * The queue handling routines assume this by default for safety reasons
202 * and will not involve the head of the request queue in any merging or
203 * reordering of requests when the queue is unplugged (and thus may be
204 * working on this particular request).
205 *
206 * If a driver removes requests from the queue before processing them, then
207 * it may indicate that it does so, there by allowing the head of the queue
208 * to be involved in merging and reordering. This is done be calling
209 * blk_queue_headactive() with an @active flag of %0.
210 *
211 * If a driver processes several requests at once, it must remove them (or
212 * at least all but one of them) from the request queue.
213 *
214 * When a queue is plugged the head will be assumed to be inactive.
215 **/
217 void blk_queue_headactive(request_queue_t * q, int active)
218 {
219 q->head_active = active;
220 }
222 /**
223 * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
224 * @q: The queue which this applies to.
225 * @active: A flag indication if you want sector throttling on
226 *
227 * Description:
228 * The sector throttling code allows us to put a limit on the number of
229 * sectors pending io to the disk at a given time, sending @active nonzero
230 * indicates you will call blk_started_sectors and blk_finished_sectors in
231 * addition to calling blk_started_io and blk_finished_io in order to
232 * keep track of the number of sectors in flight.
233 **/
235 void blk_queue_throttle_sectors(request_queue_t * q, int active)
236 {
237 q->can_throttle = active;
238 }
240 /**
241 * blk_queue_make_request - define an alternate make_request function for a device
242 * @q: the request queue for the device to be affected
243 * @mfn: the alternate make_request function
244 *
245 * Description:
246 * The normal way for &struct buffer_heads to be passed to a device
247 * driver is for them to be collected into requests on a request
248 * queue, and then to allow the device driver to select requests
249 * off that queue when it is ready. This works well for many block
250 * devices. However some block devices (typically virtual devices
251 * such as md or lvm) do not benefit from the processing on the
252 * request queue, and are served best by having the requests passed
253 * directly to them. This can be achieved by providing a function
254 * to blk_queue_make_request().
255 *
256 * Caveat:
257 * The driver that does this *must* be able to deal appropriately
258 * with buffers in "highmemory", either by calling bh_kmap() to get
259 * a kernel mapping, to by calling create_bounce() to create a
260 * buffer in normal memory.
261 **/
263 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
264 {
265 q->make_request_fn = mfn;
266 }
268 /**
269 * blk_queue_bounce_limit - set bounce buffer limit for queue
270 * @q: the request queue for the device
271 * @dma_addr: bus address limit
272 *
273 * Description:
274 * Different hardware can have different requirements as to what pages
275 * it can do I/O directly to. A low level driver can call
276 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
277 * buffers for doing I/O to pages residing above @page. By default
278 * the block layer sets this to the highest numbered "low" memory page.
279 **/
280 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
281 {
282 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
283 unsigned long mb = dma_addr >> 20;
284 static request_queue_t *old_q;
286 /*
287 * keep this for debugging for now...
288 */
289 if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
290 old_q = q;
291 printk("blk: queue %p, ", q);
292 if (dma_addr == BLK_BOUNCE_ANY)
293 printk("no I/O memory limit\n");
294 else
295 printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
296 (long long) dma_addr);
297 }
299 q->bounce_pfn = bounce_pfn;
300 }
303 /*
304 * can we merge the two segments, or do we need to start a new one?
305 */
306 inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
307 {
308 /*
309 * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
310 */
311 if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
312 return 1;
314 return 0;
315 }
317 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
318 {
319 if (req->nr_segments < max_segments) {
320 req->nr_segments++;
321 return 1;
322 }
323 return 0;
324 }
326 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
327 struct buffer_head *bh, int max_segments)
328 {
329 if (blk_seg_merge_ok(req->bhtail, bh))
330 return 1;
332 return ll_new_segment(q, req, max_segments);
333 }
335 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
336 struct buffer_head *bh, int max_segments)
337 {
338 if (blk_seg_merge_ok(bh, req->bh))
339 return 1;
341 return ll_new_segment(q, req, max_segments);
342 }
344 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
345 struct request *next, int max_segments)
346 {
347 int total_segments = req->nr_segments + next->nr_segments;
349 if (blk_seg_merge_ok(req->bhtail, next->bh))
350 total_segments--;
352 if (total_segments > max_segments)
353 return 0;
355 req->nr_segments = total_segments;
356 return 1;
357 }
359 /*
360 * "plug" the device if there are no outstanding requests: this will
361 * force the transfer to start only after we have put all the requests
362 * on the list.
363 *
364 * This is called with interrupts off and no requests on the queue.
365 * (and with the request spinlock acquired)
366 */
367 static void generic_plug_device(request_queue_t *q, kdev_t dev)
368 {
369 /*
370 * no need to replug device
371 */
372 if (!list_empty(&q->queue_head) || q->plugged)
373 return;
375 q->plugged = 1;
376 queue_task(&q->plug_tq, &tq_disk);
377 }
379 /*
380 * remove the plug and let it rip..
381 */
382 static inline void __generic_unplug_device(request_queue_t *q)
383 {
384 if (q->plugged) {
385 q->plugged = 0;
386 if (!list_empty(&q->queue_head))
387 q->request_fn(q);
388 }
389 }
391 void generic_unplug_device(void *data)
392 {
393 request_queue_t *q = (request_queue_t *) data;
394 unsigned long flags;
396 spin_lock_irqsave(&io_request_lock, flags);
397 __generic_unplug_device(q);
398 spin_unlock_irqrestore(&io_request_lock, flags);
399 }
401 /** blk_grow_request_list
402 * @q: The &request_queue_t
403 * @nr_requests: how many requests are desired
404 *
405 * More free requests are added to the queue's free lists, bringing
406 * the total number of requests to @nr_requests.
407 *
408 * The requests are added equally to the request queue's read
409 * and write freelists.
410 *
411 * This function can sleep.
412 *
413 * Returns the (new) number of requests which the queue has available.
414 */
415 int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
416 {
417 unsigned long flags;
418 /* Several broken drivers assume that this function doesn't sleep,
419 * this causes system hangs during boot.
420 * As a temporary fix, make the function non-blocking.
421 */
422 spin_lock_irqsave(&io_request_lock, flags);
423 while (q->nr_requests < nr_requests) {
424 struct request *rq;
426 rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
427 if (rq == NULL)
428 break;
429 memset(rq, 0, sizeof(*rq));
430 rq->rq_status = RQ_INACTIVE;
431 list_add(&rq->queue, &q->rq.free);
432 q->rq.count++;
434 q->nr_requests++;
435 }
437 /*
438 * Wakeup waiters after both one quarter of the
439 * max-in-fligh queue and one quarter of the requests
440 * are available again.
441 */
443 q->batch_requests = q->nr_requests / 4;
444 if (q->batch_requests > 32)
445 q->batch_requests = 32;
446 q->batch_sectors = max_queue_sectors / 4;
448 q->max_queue_sectors = max_queue_sectors;
450 BUG_ON(!q->batch_sectors);
451 atomic_set(&q->nr_sectors, 0);
453 spin_unlock_irqrestore(&io_request_lock, flags);
454 return q->nr_requests;
455 }
457 static void blk_init_free_list(request_queue_t *q)
458 {
459 struct sysinfo si;
460 int megs; /* Total memory, in megabytes */
461 int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
463 INIT_LIST_HEAD(&q->rq.free);
464 q->rq.count = 0;
465 q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
466 q->nr_requests = 0;
468 si_meminfo(&si);
469 megs = si.totalram >> (20 - PAGE_SHIFT);
470 nr_requests = MAX_NR_REQUESTS;
471 if (megs < 30) {
472 nr_requests /= 2;
473 max_queue_sectors /= 2;
474 }
475 /* notice early if anybody screwed the defaults */
476 BUG_ON(!nr_requests);
477 BUG_ON(!max_queue_sectors);
479 blk_grow_request_list(q, nr_requests, max_queue_sectors);
481 init_waitqueue_head(&q->wait_for_requests);
483 spin_lock_init(&q->queue_lock);
484 }
486 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
488 /**
489 * blk_init_queue - prepare a request queue for use with a block device
490 * @q: The &request_queue_t to be initialised
491 * @rfn: The function to be called to process requests that have been
492 * placed on the queue.
493 *
494 * Description:
495 * If a block device wishes to use the standard request handling procedures,
496 * which sorts requests and coalesces adjacent requests, then it must
497 * call blk_init_queue(). The function @rfn will be called when there
498 * are requests on the queue that need to be processed. If the device
499 * supports plugging, then @rfn may not be called immediately when requests
500 * are available on the queue, but may be called at some time later instead.
501 * Plugged queues are generally unplugged when a buffer belonging to one
502 * of the requests on the queue is needed, or due to memory pressure.
503 *
504 * @rfn is not required, or even expected, to remove all requests off the
505 * queue, but only as many as it can handle at a time. If it does leave
506 * requests on the queue, it is responsible for arranging that the requests
507 * get dealt with eventually.
508 *
509 * A global spin lock $io_request_lock must be held while manipulating the
510 * requests on the request queue.
511 *
512 * The request on the head of the queue is by default assumed to be
513 * potentially active, and it is not considered for re-ordering or merging
514 * whenever the given queue is unplugged. This behaviour can be changed with
515 * blk_queue_headactive().
516 *
517 * Note:
518 * blk_init_queue() must be paired with a blk_cleanup_queue() call
519 * when the block device is deactivated (such as at module unload).
520 **/
521 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
522 {
523 INIT_LIST_HEAD(&q->queue_head);
524 elevator_init(&q->elevator, ELEVATOR_LINUS);
525 blk_init_free_list(q);
526 q->request_fn = rfn;
527 q->back_merge_fn = ll_back_merge_fn;
528 q->front_merge_fn = ll_front_merge_fn;
529 q->merge_requests_fn = ll_merge_requests_fn;
530 q->make_request_fn = __make_request;
531 q->plug_tq.sync = 0;
532 q->plug_tq.routine = &generic_unplug_device;
533 q->plug_tq.data = q;
534 q->plugged = 0;
535 q->can_throttle = 0;
537 /*
538 * These booleans describe the queue properties. We set the
539 * default (and most common) values here. Other drivers can
540 * use the appropriate functions to alter the queue properties.
541 * as appropriate.
542 */
543 q->plug_device_fn = generic_plug_device;
544 q->head_active = 1;
546 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
547 }
549 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
550 /*
551 * Get a free request. io_request_lock must be held and interrupts
552 * disabled on the way in. Returns NULL if there are no free requests.
553 */
554 static struct request *get_request(request_queue_t *q, int rw)
555 {
556 struct request *rq = NULL;
557 struct request_list *rl = &q->rq;
559 if (blk_oversized_queue(q)) {
560 int rlim = q->nr_requests >> 5;
562 if (rlim < 4)
563 rlim = 4;
565 /*
566 * if its a write, or we have more than a handful of reads
567 * pending, bail out
568 */
569 if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
570 return NULL;
571 if (blk_oversized_queue_reads(q))
572 return NULL;
573 }
575 if (!list_empty(&rl->free)) {
576 rq = blkdev_free_rq(&rl->free);
577 list_del(&rq->queue);
578 rl->count--;
579 rl->pending[rw]++;
580 rq->rq_status = RQ_ACTIVE;
581 rq->cmd = rw;
582 rq->special = NULL;
583 rq->q = q;
584 }
586 return rq;
587 }
589 /*
590 * Here's the request allocation design, low latency version:
591 *
592 * 1: Blocking on request exhaustion is a key part of I/O throttling.
593 *
594 * 2: We want to be `fair' to all requesters. We must avoid starvation, and
595 * attempt to ensure that all requesters sleep for a similar duration. Hence
596 * no stealing requests when there are other processes waiting.
597 *
598 * There used to be more here, attempting to allow a process to send in a
599 * number of requests once it has woken up. But, there's no way to
600 * tell if a process has just been woken up, or if it is a new process
601 * coming in to steal requests from the waiters. So, we give up and force
602 * everyone to wait fairly.
603 *
604 * So here's what we do:
605 *
606 * a) A READA requester fails if free_requests < batch_requests
607 *
608 * We don't want READA requests to prevent sleepers from ever
609 * waking. Note that READA is used extremely rarely - a few
610 * filesystems use it for directory readahead.
611 *
612 * When a process wants a new request:
613 *
614 * b) If free_requests == 0, the requester sleeps in FIFO manner, and
615 * the queue full condition is set. The full condition is not
616 * cleared until there are no longer any waiters. Once the full
617 * condition is set, all new io must wait, hopefully for a very
618 * short period of time.
619 *
620 * When a request is released:
621 *
622 * c) If free_requests < batch_requests, do nothing.
623 *
624 * d) If free_requests >= batch_requests, wake up a single waiter.
625 *
626 * As each waiter gets a request, he wakes another waiter. We do this
627 * to prevent a race where an unplug might get run before a request makes
628 * it's way onto the queue. The result is a cascade of wakeups, so delaying
629 * the initial wakeup until we've got batch_requests available helps avoid
630 * wakeups where there aren't any requests available yet.
631 */
633 static struct request *__get_request_wait(request_queue_t *q, int rw)
634 {
635 register struct request *rq;
636 DECLARE_WAITQUEUE(wait, current);
638 add_wait_queue_exclusive(&q->wait_for_requests, &wait);
640 do {
641 set_current_state(TASK_UNINTERRUPTIBLE);
642 spin_lock_irq(&io_request_lock);
643 if (blk_oversized_queue(q) || q->rq.count == 0) {
644 __generic_unplug_device(q);
645 spin_unlock_irq(&io_request_lock);
646 schedule();
647 spin_lock_irq(&io_request_lock);
648 }
649 rq = get_request(q, rw);
650 spin_unlock_irq(&io_request_lock);
651 } while (rq == NULL);
652 remove_wait_queue(&q->wait_for_requests, &wait);
653 current->state = TASK_RUNNING;
655 return rq;
656 }
658 static void get_request_wait_wakeup(request_queue_t *q, int rw)
659 {
660 /*
661 * avoid losing an unplug if a second __get_request_wait did the
662 * generic_unplug_device while our __get_request_wait was running
663 * w/o the queue_lock held and w/ our request out of the queue.
664 */
665 if (waitqueue_active(&q->wait_for_requests))
666 wake_up(&q->wait_for_requests);
667 }
669 /* RO fail safe mechanism */
671 static long ro_bits[MAX_BLKDEV][8];
673 int is_read_only(kdev_t dev)
674 {
675 int minor,major;
677 major = MAJOR(dev);
678 minor = MINOR(dev);
679 if (major < 0 || major >= MAX_BLKDEV) return 0;
680 return ro_bits[major][minor >> 5] & (1 << (minor & 31));
681 }
683 void set_device_ro(kdev_t dev,int flag)
684 {
685 int minor,major;
687 major = MAJOR(dev);
688 minor = MINOR(dev);
689 if (major < 0 || major >= MAX_BLKDEV) return;
690 if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
691 else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
692 }
694 inline void drive_stat_acct (kdev_t dev, int rw,
695 unsigned long nr_sectors, int new_io)
696 {
697 unsigned int major = MAJOR(dev);
698 unsigned int index;
700 index = disk_index(dev);
701 if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
702 return;
704 kstat.dk_drive[major][index] += new_io;
705 if (rw == READ) {
706 kstat.dk_drive_rio[major][index] += new_io;
707 kstat.dk_drive_rblk[major][index] += nr_sectors;
708 } else if (rw == WRITE) {
709 kstat.dk_drive_wio[major][index] += new_io;
710 kstat.dk_drive_wblk[major][index] += nr_sectors;
711 } else
712 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
713 }
715 #ifdef CONFIG_BLK_STATS
716 /*
717 * Return up to two hd_structs on which to do IO accounting for a given
718 * request.
719 *
720 * On a partitioned device, we want to account both against the partition
721 * and against the whole disk.
722 */
723 static void locate_hd_struct(struct request *req,
724 struct hd_struct **hd1,
725 struct hd_struct **hd2)
726 {
727 struct gendisk *gd;
729 *hd1 = NULL;
730 *hd2 = NULL;
732 gd = get_gendisk(req->rq_dev);
733 if (gd && gd->part) {
734 /* Mask out the partition bits: account for the entire disk */
735 int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
736 int whole_minor = devnr << gd->minor_shift;
738 *hd1 = &gd->part[whole_minor];
739 if (whole_minor != MINOR(req->rq_dev))
740 *hd2= &gd->part[MINOR(req->rq_dev)];
741 }
742 }
744 /*
745 * Round off the performance stats on an hd_struct.
746 *
747 * The average IO queue length and utilisation statistics are maintained
748 * by observing the current state of the queue length and the amount of
749 * time it has been in this state for.
750 * Normally, that accounting is done on IO completion, but that can result
751 * in more than a second's worth of IO being accounted for within any one
752 * second, leading to >100% utilisation. To deal with that, we do a
753 * round-off before returning the results when reading /proc/partitions,
754 * accounting immediately for all queue usage up to the current jiffies and
755 * restarting the counters again.
756 */
757 void disk_round_stats(struct hd_struct *hd)
758 {
759 unsigned long now = jiffies;
761 hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
762 hd->last_queue_change = now;
764 if (hd->ios_in_flight)
765 hd->io_ticks += (now - hd->last_idle_time);
766 hd->last_idle_time = now;
767 }
769 static inline void down_ios(struct hd_struct *hd)
770 {
771 disk_round_stats(hd);
772 --hd->ios_in_flight;
773 }
775 static inline void up_ios(struct hd_struct *hd)
776 {
777 disk_round_stats(hd);
778 ++hd->ios_in_flight;
779 }
781 static void account_io_start(struct hd_struct *hd, struct request *req,
782 int merge, int sectors)
783 {
784 switch (req->cmd) {
785 case READ:
786 if (merge)
787 hd->rd_merges++;
788 hd->rd_sectors += sectors;
789 break;
790 case WRITE:
791 if (merge)
792 hd->wr_merges++;
793 hd->wr_sectors += sectors;
794 break;
795 }
796 if (!merge)
797 up_ios(hd);
798 }
800 static void account_io_end(struct hd_struct *hd, struct request *req)
801 {
802 unsigned long duration = jiffies - req->start_time;
803 switch (req->cmd) {
804 case READ:
805 hd->rd_ticks += duration;
806 hd->rd_ios++;
807 break;
808 case WRITE:
809 hd->wr_ticks += duration;
810 hd->wr_ios++;
811 break;
812 }
813 down_ios(hd);
814 }
816 void req_new_io(struct request *req, int merge, int sectors)
817 {
818 struct hd_struct *hd1, *hd2;
820 locate_hd_struct(req, &hd1, &hd2);
821 if (hd1)
822 account_io_start(hd1, req, merge, sectors);
823 if (hd2)
824 account_io_start(hd2, req, merge, sectors);
825 }
827 void req_merged_io(struct request *req)
828 {
829 struct hd_struct *hd1, *hd2;
831 locate_hd_struct(req, &hd1, &hd2);
832 if (hd1)
833 down_ios(hd1);
834 if (hd2)
835 down_ios(hd2);
836 }
838 void req_finished_io(struct request *req)
839 {
840 struct hd_struct *hd1, *hd2;
842 locate_hd_struct(req, &hd1, &hd2);
843 if (hd1)
844 account_io_end(hd1, req);
845 if (hd2)
846 account_io_end(hd2, req);
847 }
848 EXPORT_SYMBOL(req_finished_io);
849 #endif /* CONFIG_BLK_STATS */
851 /*
852 * add-request adds a request to the linked list.
853 * io_request_lock is held and interrupts disabled, as we muck with the
854 * request queue list.
855 *
856 * By this point, req->cmd is always either READ/WRITE, never READA,
857 * which is important for drive_stat_acct() above.
858 */
859 static inline void add_request(request_queue_t * q, struct request * req,
860 struct list_head *insert_here)
861 {
862 drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
864 if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
865 spin_unlock_irq(&io_request_lock);
866 BUG();
867 }
869 /*
870 * elevator indicated where it wants this request to be
871 * inserted at elevator_merge time
872 */
873 list_add(&req->queue, insert_here);
874 }
876 /*
877 * Must be called with io_request_lock held and interrupts disabled
878 */
879 void blkdev_release_request(struct request *req)
880 {
881 request_queue_t *q = req->q;
883 req->rq_status = RQ_INACTIVE;
884 req->q = NULL;
886 /*
887 * Request may not have originated from ll_rw_blk. if not,
888 * assume it has free buffers and check waiters
889 */
890 if (q) {
891 struct request_list *rl = &q->rq;
892 int oversized_batch = 0;
894 if (q->can_throttle)
895 oversized_batch = blk_oversized_queue_batch(q);
896 rl->count++;
897 /*
898 * paranoia check
899 */
900 if (req->cmd == READ || req->cmd == WRITE)
901 rl->pending[req->cmd]--;
902 if (rl->pending[READ] > q->nr_requests)
903 printk("blk: reads: %u\n", rl->pending[READ]);
904 if (rl->pending[WRITE] > q->nr_requests)
905 printk("blk: writes: %u\n", rl->pending[WRITE]);
906 if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
907 printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
908 list_add(&req->queue, &rl->free);
909 if (rl->count >= q->batch_requests && !oversized_batch) {
910 smp_mb();
911 if (waitqueue_active(&q->wait_for_requests))
912 wake_up(&q->wait_for_requests);
913 }
914 }
915 }
917 /*
918 * Has to be called with the request spinlock acquired
919 */
920 static void attempt_merge(request_queue_t * q,
921 struct request *req,
922 int max_sectors,
923 int max_segments)
924 {
925 struct request *next;
927 next = blkdev_next_request(req);
928 if (req->sector + req->nr_sectors != next->sector)
929 return;
930 if (req->cmd != next->cmd
931 || req->rq_dev != next->rq_dev
932 || req->nr_sectors + next->nr_sectors > max_sectors
933 || next->waiting)
934 return;
935 /*
936 * If we are not allowed to merge these requests, then
937 * return. If we are allowed to merge, then the count
938 * will have been updated to the appropriate number,
939 * and we shouldn't do it here too.
940 */
941 if (!q->merge_requests_fn(q, req, next, max_segments))
942 return;
944 q->elevator.elevator_merge_req_fn(req, next);
946 /* At this point we have either done a back merge
947 * or front merge. We need the smaller start_time of
948 * the merged requests to be the current request
949 * for accounting purposes.
950 */
951 if (time_after(req->start_time, next->start_time))
952 req->start_time = next->start_time;
954 req->bhtail->b_reqnext = next->bh;
955 req->bhtail = next->bhtail;
956 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
957 list_del(&next->queue);
959 /* One last thing: we have removed a request, so we now have one
960 less expected IO to complete for accounting purposes. */
961 req_merged_io(req);
963 blkdev_release_request(next);
964 }
966 static inline void attempt_back_merge(request_queue_t * q,
967 struct request *req,
968 int max_sectors,
969 int max_segments)
970 {
971 if (&req->queue == q->queue_head.prev)
972 return;
973 attempt_merge(q, req, max_sectors, max_segments);
974 }
976 static inline void attempt_front_merge(request_queue_t * q,
977 struct list_head * head,
978 struct request *req,
979 int max_sectors,
980 int max_segments)
981 {
982 struct list_head * prev;
984 prev = req->queue.prev;
985 if (head == prev)
986 return;
987 attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
988 }
990 static int __make_request(request_queue_t * q, int rw,
991 struct buffer_head * bh)
992 {
993 unsigned int sector, count, sync;
994 int max_segments = MAX_SEGMENTS;
995 struct request * req, *freereq = NULL;
996 int rw_ahead, max_sectors, el_ret;
997 struct list_head *head, *insert_here;
998 int latency;
999 elevator_t *elevator = &q->elevator;
1000 int should_wake = 0;
1002 count = bh->b_size >> 9;
1003 sector = bh->b_rsector;
1004 sync = test_and_clear_bit(BH_Sync, &bh->b_state);
1006 rw_ahead = 0; /* normal case; gets changed below for READA */
1007 switch (rw) {
1008 case READA:
1009 #if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */
1010 rw_ahead = 1;
1011 #endif
1012 rw = READ; /* drop into READ */
1013 case READ:
1014 case WRITE:
1015 latency = elevator_request_latency(elevator, rw);
1016 break;
1017 default:
1018 BUG();
1019 goto end_io;
1022 /* We'd better have a real physical mapping!
1023 Check this bit only if the buffer was dirty and just locked
1024 down by us so at this point flushpage will block and
1025 won't clear the mapped bit under us. */
1026 if (!buffer_mapped(bh))
1027 BUG();
1029 /*
1030 * Temporary solution - in 2.5 this will be done by the lowlevel
1031 * driver. Create a bounce buffer if the buffer data points into
1032 * high memory - keep the original buffer otherwise.
1033 */
1034 bh = blk_queue_bounce(q, rw, bh);
1036 /* look for a free request. */
1037 /*
1038 * Try to coalesce the new request with old requests
1039 */
1040 max_sectors = get_max_sectors(bh->b_rdev);
1042 req = NULL;
1043 head = &q->queue_head;
1044 /*
1045 * Now we acquire the request spinlock, we have to be mega careful
1046 * not to schedule or do something nonatomic
1047 */
1048 spin_lock_irq(&io_request_lock);
1050 again:
1051 insert_here = head->prev;
1053 if (list_empty(head)) {
1054 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
1055 goto get_rq;
1056 } else if (q->head_active && !q->plugged)
1057 head = head->next;
1059 el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
1060 switch (el_ret) {
1062 case ELEVATOR_BACK_MERGE:
1063 if (!q->back_merge_fn(q, req, bh, max_segments)) {
1064 insert_here = &req->queue;
1065 break;
1067 req->bhtail->b_reqnext = bh;
1068 req->bhtail = bh;
1069 req->nr_sectors = req->hard_nr_sectors += count;
1070 blk_started_io(count);
1071 blk_started_sectors(req, count);
1072 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1073 req_new_io(req, 1, count);
1074 attempt_back_merge(q, req, max_sectors, max_segments);
1075 goto out;
1077 case ELEVATOR_FRONT_MERGE:
1078 if (!q->front_merge_fn(q, req, bh, max_segments)) {
1079 insert_here = req->queue.prev;
1080 break;
1082 bh->b_reqnext = req->bh;
1083 req->bh = bh;
1084 /*
1085 * may not be valid, but queues not having bounce
1086 * enabled for highmem pages must not look at
1087 * ->buffer anyway
1088 */
1089 req->buffer = bh->b_data;
1090 req->current_nr_sectors = req->hard_cur_sectors = count;
1091 req->sector = req->hard_sector = sector;
1092 req->nr_sectors = req->hard_nr_sectors += count;
1093 blk_started_io(count);
1094 blk_started_sectors(req, count);
1095 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1096 req_new_io(req, 1, count);
1097 attempt_front_merge(q, head, req, max_sectors, max_segments);
1098 goto out;
1100 /*
1101 * elevator says don't/can't merge. get new request
1102 */
1103 case ELEVATOR_NO_MERGE:
1104 /*
1105 * use elevator hints as to where to insert the
1106 * request. if no hints, just add it to the back
1107 * of the queue
1108 */
1109 if (req)
1110 insert_here = &req->queue;
1111 break;
1113 default:
1114 printk("elevator returned crap (%d)\n", el_ret);
1115 BUG();
1118 get_rq:
1119 if (freereq) {
1120 req = freereq;
1121 freereq = NULL;
1122 } else {
1123 /*
1124 * See description above __get_request_wait()
1125 */
1126 if (rw_ahead) {
1127 if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
1128 spin_unlock_irq(&io_request_lock);
1129 goto end_io;
1131 req = get_request(q, rw);
1132 if (req == NULL)
1133 BUG();
1134 } else {
1135 req = get_request(q, rw);
1136 if (req == NULL) {
1137 spin_unlock_irq(&io_request_lock);
1138 freereq = __get_request_wait(q, rw);
1139 head = &q->queue_head;
1140 spin_lock_irq(&io_request_lock);
1141 should_wake = 1;
1142 goto again;
1147 /* fill up the request-info, and add it to the queue */
1148 req->elevator_sequence = latency;
1149 req->cmd = rw;
1150 req->errors = 0;
1151 req->hard_sector = req->sector = sector;
1152 req->hard_nr_sectors = req->nr_sectors = count;
1153 req->current_nr_sectors = req->hard_cur_sectors = count;
1154 req->nr_segments = 1; /* Always 1 for a new request. */
1155 req->nr_hw_segments = 1; /* Always 1 for a new request. */
1156 req->buffer = bh->b_data;
1157 req->waiting = NULL;
1158 req->bh = bh;
1159 req->bhtail = bh;
1160 req->rq_dev = bh->b_rdev;
1161 req->start_time = jiffies;
1162 req_new_io(req, 0, count);
1163 blk_started_io(count);
1164 blk_started_sectors(req, count);
1165 add_request(q, req, insert_here);
1166 out:
1167 if (freereq)
1168 blkdev_release_request(freereq);
1169 if (should_wake)
1170 get_request_wait_wakeup(q, rw);
1171 if (sync)
1172 __generic_unplug_device(q);
1173 spin_unlock_irq(&io_request_lock);
1174 return 0;
1175 end_io:
1176 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1177 return 0;
1180 /**
1181 * generic_make_request: hand a buffer head to it's device driver for I/O
1182 * @rw: READ, WRITE, or READA - what sort of I/O is desired.
1183 * @bh: The buffer head describing the location in memory and on the device.
1185 * generic_make_request() is used to make I/O requests of block
1186 * devices. It is passed a &struct buffer_head and a &rw value. The
1187 * %READ and %WRITE options are (hopefully) obvious in meaning. The
1188 * %READA value means that a read is required, but that the driver is
1189 * free to fail the request if, for example, it cannot get needed
1190 * resources immediately.
1192 * generic_make_request() does not return any status. The
1193 * success/failure status of the request, along with notification of
1194 * completion, is delivered asynchronously through the bh->b_end_io
1195 * function described (one day) else where.
1197 * The caller of generic_make_request must make sure that b_page,
1198 * b_addr, b_size are set to describe the memory buffer, that b_rdev
1199 * and b_rsector are set to describe the device address, and the
1200 * b_end_io and optionally b_private are set to describe how
1201 * completion notification should be signaled. BH_Mapped should also
1202 * be set (to confirm that b_dev and b_blocknr are valid).
1204 * generic_make_request and the drivers it calls may use b_reqnext,
1205 * and may change b_rdev and b_rsector. So the values of these fields
1206 * should NOT be depended on after the call to generic_make_request.
1207 * Because of this, the caller should record the device address
1208 * information in b_dev and b_blocknr.
1210 * Apart from those fields mentioned above, no other fields, and in
1211 * particular, no other flags, are changed by generic_make_request or
1212 * any lower level drivers.
1213 * */
1214 void generic_make_request (int rw, struct buffer_head * bh)
1216 int major = MAJOR(bh->b_rdev);
1217 int minorsize = 0;
1218 request_queue_t *q;
1220 if (!bh->b_end_io)
1221 BUG();
1223 /* Test device size, when known. */
1224 if (blk_size[major])
1225 minorsize = blk_size[major][MINOR(bh->b_rdev)];
1226 if (minorsize) {
1227 unsigned long maxsector = (minorsize << 1) + 1;
1228 unsigned long sector = bh->b_rsector;
1229 unsigned int count = bh->b_size >> 9;
1231 if (maxsector < count || maxsector - count < sector) {
1232 /* Yecch */
1233 bh->b_state &= ~(1 << BH_Dirty);
1235 /* This may well happen - the kernel calls bread()
1236 without checking the size of the device, e.g.,
1237 when mounting a device. */
1238 printk(KERN_INFO
1239 "attempt to access beyond end of device\n");
1240 printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1241 kdevname(bh->b_rdev), rw,
1242 (sector + count)>>1, minorsize);
1244 bh->b_end_io(bh, 0);
1245 return;
1249 /*
1250 * Resolve the mapping until finished. (drivers are
1251 * still free to implement/resolve their own stacking
1252 * by explicitly returning 0)
1253 */
1254 /* NOTE: we don't repeat the blk_size check for each new device.
1255 * Stacking drivers are expected to know what they are doing.
1256 */
1257 do {
1258 q = blk_get_queue(bh->b_rdev);
1259 if (!q) {
1260 printk(KERN_ERR
1261 "generic_make_request: Trying to access "
1262 "nonexistent block-device %s (%ld)\n",
1263 kdevname(bh->b_rdev), bh->b_rsector);
1264 buffer_IO_error(bh);
1265 break;
1267 } while (q->make_request_fn(q, rw, bh));
1271 /**
1272 * submit_bh: submit a buffer_head to the block device later for I/O
1273 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1274 * @bh: The &struct buffer_head which describes the I/O
1276 * submit_bh() is very similar in purpose to generic_make_request(), and
1277 * uses that function to do most of the work.
1279 * The extra functionality provided by submit_bh is to determine
1280 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1281 * This is is appropriate for IO requests that come from the buffer
1282 * cache and page cache which (currently) always use aligned blocks.
1283 */
1284 void submit_bh(int rw, struct buffer_head * bh)
1286 int count = bh->b_size >> 9;
1288 if (!test_bit(BH_Lock, &bh->b_state))
1289 BUG();
1291 set_bit(BH_Req, &bh->b_state);
1292 set_bit(BH_Launder, &bh->b_state);
1294 /*
1295 * First step, 'identity mapping' - RAID or LVM might
1296 * further remap this.
1297 */
1298 bh->b_rdev = bh->b_dev;
1299 bh->b_rsector = bh->b_blocknr * count;
1301 get_bh(bh);
1302 generic_make_request(rw, bh);
1304 /* fix race condition with wait_on_buffer() */
1305 smp_mb(); /* spin_unlock may have inclusive semantics */
1306 if (waitqueue_active(&bh->b_wait))
1307 wake_up(&bh->b_wait);
1309 if (block_dump)
1310 printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
1312 put_bh(bh);
1313 switch (rw) {
1314 case WRITE:
1315 kstat.pgpgout += count;
1316 break;
1317 default:
1318 kstat.pgpgin += count;
1319 break;
1323 /**
1324 * ll_rw_block: low-level access to block devices
1325 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1326 * @nr: number of &struct buffer_heads in the array
1327 * @bhs: array of pointers to &struct buffer_head
1329 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1330 * and requests an I/O operation on them, either a %READ or a %WRITE.
1331 * The third %READA option is described in the documentation for
1332 * generic_make_request() which ll_rw_block() calls.
1334 * This function provides extra functionality that is not in
1335 * generic_make_request() that is relevant to buffers in the buffer
1336 * cache or page cache. In particular it drops any buffer that it
1337 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1338 * appears to be clean when doing a write request, and any buffer that
1339 * appears to be up-to-date when doing read request. Further it marks
1340 * as clean buffers that are processed for writing (the buffer cache
1341 * wont assume that they are actually clean until the buffer gets
1342 * unlocked).
1344 * ll_rw_block sets b_end_io to simple completion handler that marks
1345 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1346 * any waiters. As client that needs a more interesting completion
1347 * routine should call submit_bh() (or generic_make_request())
1348 * directly.
1350 * Caveat:
1351 * All of the buffers must be for the same device, and must also be
1352 * of the current approved size for the device. */
1354 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1356 unsigned int major;
1357 int correct_size;
1358 int i;
1360 if (!nr)
1361 return;
1363 major = MAJOR(bhs[0]->b_dev);
1365 /* Determine correct block size for this device. */
1366 correct_size = get_hardsect_size(bhs[0]->b_dev);
1368 /* Verify requested block sizes. */
1369 for (i = 0; i < nr; i++) {
1370 struct buffer_head *bh = bhs[i];
1371 if (bh->b_size % correct_size) {
1372 printk(KERN_NOTICE "ll_rw_block: device %s: "
1373 "only %d-char blocks implemented (%u)\n",
1374 kdevname(bhs[0]->b_dev),
1375 correct_size, bh->b_size);
1376 goto sorry;
1380 if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1381 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1382 kdevname(bhs[0]->b_dev));
1383 goto sorry;
1386 for (i = 0; i < nr; i++) {
1387 struct buffer_head *bh = bhs[i];
1389 lock_buffer(bh);
1391 /* We have the buffer lock */
1392 atomic_inc(&bh->b_count);
1393 bh->b_end_io = end_buffer_io_sync;
1395 switch(rw) {
1396 case WRITE:
1397 if (!atomic_set_buffer_clean(bh))
1398 /* Hmmph! Nothing to write */
1399 goto end_io;
1400 __mark_buffer_clean(bh);
1401 break;
1403 case READA:
1404 case READ:
1405 if (buffer_uptodate(bh))
1406 /* Hmmph! Already have it */
1407 goto end_io;
1408 break;
1409 default:
1410 BUG();
1411 end_io:
1412 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1413 continue;
1416 submit_bh(rw, bh);
1418 return;
1420 sorry:
1421 /* Make sure we don't get infinite dirty retries.. */
1422 for (i = 0; i < nr; i++)
1423 mark_buffer_clean(bhs[i]);
1426 #ifdef CONFIG_STRAM_SWAP
1427 extern int stram_device_init (void);
1428 #endif
1430 static void blk_writeback_timer(unsigned long data)
1432 wakeup_bdflush();
1433 wakeup_kupdate();
1436 /**
1437 * end_that_request_first - end I/O on one buffer.
1438 * @req: the request being processed
1439 * @uptodate: 0 for I/O error
1440 * @name: the name printed for an I/O error
1442 * Description:
1443 * Ends I/O on the first buffer attached to @req, and sets it up
1444 * for the next buffer_head (if any) in the cluster.
1446 * Return:
1447 * 0 - we are done with this request, call end_that_request_last()
1448 * 1 - still buffers pending for this request
1450 * Caveat:
1451 * Drivers implementing their own end_request handling must call
1452 * blk_finished_io() appropriately.
1453 **/
1455 int end_that_request_first (struct request *req, int uptodate, char *name)
1457 struct buffer_head * bh;
1458 int nsect;
1460 req->errors = 0;
1461 if (!uptodate)
1462 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1463 kdevname(req->rq_dev), name, req->sector);
1465 if ((bh = req->bh) != NULL) {
1466 nsect = bh->b_size >> 9;
1467 blk_finished_io(nsect);
1468 blk_finished_sectors(req, nsect);
1469 req->bh = bh->b_reqnext;
1470 bh->b_reqnext = NULL;
1471 bh->b_end_io(bh, uptodate);
1472 if ((bh = req->bh) != NULL) {
1473 req->hard_sector += nsect;
1474 req->hard_nr_sectors -= nsect;
1475 req->sector = req->hard_sector;
1476 req->nr_sectors = req->hard_nr_sectors;
1478 req->current_nr_sectors = bh->b_size >> 9;
1479 req->hard_cur_sectors = req->current_nr_sectors;
1480 if (req->nr_sectors < req->current_nr_sectors) {
1481 req->nr_sectors = req->current_nr_sectors;
1482 printk("end_request: buffer-list destroyed\n");
1484 req->buffer = bh->b_data;
1485 return 1;
1488 return 0;
1491 extern int laptop_mode;
1493 void end_that_request_last(struct request *req)
1495 struct completion *waiting = req->waiting;
1497 /*
1498 * schedule the writeout of pending dirty data when the disk is idle
1499 */
1500 if (laptop_mode && req->cmd == READ)
1501 mod_timer(&writeback_timer, jiffies + 5 * HZ);
1503 req_finished_io(req);
1504 blkdev_release_request(req);
1505 if (waiting)
1506 complete(waiting);
1509 int __init blk_dev_init(void)
1511 struct blk_dev_struct *dev;
1513 request_cachep = kmem_cache_create("blkdev_requests",
1514 sizeof(struct request),
1515 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1517 if (!request_cachep)
1518 panic("Can't create request pool slab cache\n");
1520 for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1521 dev->queue = NULL;
1523 memset(ro_bits,0,sizeof(ro_bits));
1524 memset(max_readahead, 0, sizeof(max_readahead));
1525 memset(max_sectors, 0, sizeof(max_sectors));
1527 blk_max_low_pfn = max_low_pfn - 1;
1528 blk_max_pfn = max_pfn - 1;
1530 init_timer(&writeback_timer);
1531 writeback_timer.function = blk_writeback_timer;
1533 #ifdef CONFIG_AMIGA_Z2RAM
1534 z2_init();
1535 #endif
1536 #ifdef CONFIG_STRAM_SWAP
1537 stram_device_init();
1538 #endif
1539 #ifdef CONFIG_ISP16_CDI
1540 isp16_init();
1541 #endif
1542 #ifdef CONFIG_BLK_DEV_PS2
1543 ps2esdi_init();
1544 #endif
1545 #ifdef CONFIG_BLK_DEV_XD
1546 xd_init();
1547 #endif
1548 #ifdef CONFIG_BLK_DEV_MFM
1549 mfm_init();
1550 #endif
1551 #ifdef CONFIG_PARIDE
1552 { extern void paride_init(void); paride_init(); };
1553 #endif
1554 #ifdef CONFIG_MAC_FLOPPY
1555 swim3_init();
1556 #endif
1557 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1558 swimiop_init();
1559 #endif
1560 #ifdef CONFIG_AMIGA_FLOPPY
1561 amiga_floppy_init();
1562 #endif
1563 #ifdef CONFIG_ATARI_FLOPPY
1564 atari_floppy_init();
1565 #endif
1566 #ifdef CONFIG_BLK_DEV_FD
1567 floppy_init();
1568 #else
1569 #if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */
1570 outb_p(0xc, 0x3f2);
1571 #endif
1572 #endif
1573 #ifdef CONFIG_CDU31A
1574 cdu31a_init();
1575 #endif
1576 #ifdef CONFIG_ATARI_ACSI
1577 acsi_init();
1578 #endif
1579 #ifdef CONFIG_MCD
1580 mcd_init();
1581 #endif
1582 #ifdef CONFIG_MCDX
1583 mcdx_init();
1584 #endif
1585 #ifdef CONFIG_SBPCD
1586 sbpcd_init();
1587 #endif
1588 #ifdef CONFIG_AZTCD
1589 aztcd_init();
1590 #endif
1591 #ifdef CONFIG_CDU535
1592 sony535_init();
1593 #endif
1594 #ifdef CONFIG_GSCD
1595 gscd_init();
1596 #endif
1597 #ifdef CONFIG_CM206
1598 cm206_init();
1599 #endif
1600 #ifdef CONFIG_OPTCD
1601 optcd_init();
1602 #endif
1603 #ifdef CONFIG_SJCD
1604 sjcd_init();
1605 #endif
1606 #ifdef CONFIG_APBLOCK
1607 ap_init();
1608 #endif
1609 #ifdef CONFIG_DDV
1610 ddv_init();
1611 #endif
1612 #ifdef CONFIG_MDISK
1613 mdisk_init();
1614 #endif
1615 #ifdef CONFIG_DASD
1616 dasd_init();
1617 #endif
1618 #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1619 tapeblock_init();
1620 #endif
1621 #ifdef CONFIG_BLK_DEV_XPRAM
1622 xpram_init();
1623 #endif
1625 #ifdef CONFIG_SUN_JSFLASH
1626 jsfd_init();
1627 #endif
1629 #if defined(CONFIG_XEN_BLKDEV_FRONTEND)
1630 xlblk_init();
1631 #endif
1633 return 0;
1634 };
1636 EXPORT_SYMBOL(io_request_lock);
1637 EXPORT_SYMBOL(end_that_request_first);
1638 EXPORT_SYMBOL(end_that_request_last);
1639 EXPORT_SYMBOL(blk_grow_request_list);
1640 EXPORT_SYMBOL(blk_init_queue);
1641 EXPORT_SYMBOL(blk_get_queue);
1642 EXPORT_SYMBOL(blk_cleanup_queue);
1643 EXPORT_SYMBOL(blk_queue_headactive);
1644 EXPORT_SYMBOL(blk_queue_throttle_sectors);
1645 EXPORT_SYMBOL(blk_queue_make_request);
1646 EXPORT_SYMBOL(generic_make_request);
1647 EXPORT_SYMBOL(blkdev_release_request);
1648 EXPORT_SYMBOL(generic_unplug_device);
1649 EXPORT_SYMBOL(blk_queue_bounce_limit);
1650 EXPORT_SYMBOL(blk_max_low_pfn);
1651 EXPORT_SYMBOL(blk_max_pfn);
1652 EXPORT_SYMBOL(blk_seg_merge_ok);
1653 EXPORT_SYMBOL(blk_nohighio);