ia64/xen-unstable

view linux-2.4.29-xen-sparse/drivers/block/ll_rw_blk.c @ 3887:4385894c52ae

bitkeeper revision 1.1230.2.4 (421a95cepOZORm0EbZfqBeZ6PZ8MwA)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/users/cl349/BK/xen-unstable.bk
into freefall.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
author iap10@freefall.cl.cam.ac.uk
date Tue Feb 22 02:15:42 2005 +0000 (2005-02-22)
parents 0a4b76b6b5a0
children
line source
1 /*
2 * linux/drivers/block/ll_rw_blk.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 1994, Karl Keyte: Added support for disk statistics
6 * Elevator latency, (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
7 * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
8 * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> - July2000
9 */
11 /*
12 * This handles all read/write requests to block devices
13 */
14 #include <linux/sched.h>
15 #include <linux/kernel.h>
16 #include <linux/kernel_stat.h>
17 #include <linux/errno.h>
18 #include <linux/string.h>
19 #include <linux/config.h>
20 #include <linux/locks.h>
21 #include <linux/mm.h>
22 #include <linux/swap.h>
23 #include <linux/init.h>
24 #include <linux/smp_lock.h>
25 #include <linux/completion.h>
26 #include <linux/bootmem.h>
28 #include <asm/system.h>
29 #include <asm/io.h>
30 #include <linux/blk.h>
31 #include <linux/highmem.h>
32 #include <linux/slab.h>
33 #include <linux/module.h>
35 /*
36 * MAC Floppy IWM hooks
37 */
39 #ifdef CONFIG_MAC_FLOPPY_IWM
40 extern int mac_floppy_init(void);
41 #endif
43 /*
44 * For the allocated request tables
45 */
46 static kmem_cache_t *request_cachep;
48 /*
49 * The "disk" task queue is used to start the actual requests
50 * after a plug
51 */
52 DECLARE_TASK_QUEUE(tq_disk);
54 /*
55 * Protect the request list against multiple users..
56 *
57 * With this spinlock the Linux block IO subsystem is 100% SMP threaded
58 * from the IRQ event side, and almost 100% SMP threaded from the syscall
59 * side (we still have protect against block device array operations, and
60 * the do_request() side is casually still unsafe. The kernel lock protects
61 * this part currently.).
62 *
63 * there is a fair chance that things will work just OK if these functions
64 * are called with no global kernel lock held ...
65 */
66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
68 /* This specifies how many sectors to read ahead on the disk. */
70 int read_ahead[MAX_BLKDEV];
72 /* blk_dev_struct is:
73 * *request_fn
74 * *current_request
75 */
76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
78 /*
79 * blk_size contains the size of all block-devices in units of 1024 byte
80 * sectors:
81 *
82 * blk_size[MAJOR][MINOR]
83 *
84 * if (!blk_size[MAJOR]) then no minor size checking is done.
85 */
86 int * blk_size[MAX_BLKDEV];
88 /*
89 * blksize_size contains the size of all block-devices:
90 *
91 * blksize_size[MAJOR][MINOR]
92 *
93 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
94 */
95 int * blksize_size[MAX_BLKDEV];
97 /*
98 * hardsect_size contains the size of the hardware sector of a device.
99 *
100 * hardsect_size[MAJOR][MINOR]
101 *
102 * if (!hardsect_size[MAJOR])
103 * then 512 bytes is assumed.
104 * else
105 * sector_size is hardsect_size[MAJOR][MINOR]
106 * This is currently set by some scsi devices and read by the msdos fs driver.
107 * Other uses may appear later.
108 */
109 int * hardsect_size[MAX_BLKDEV];
111 /*
112 * The following tunes the read-ahead algorithm in mm/filemap.c
113 */
114 int * max_readahead[MAX_BLKDEV];
116 /*
117 * Max number of sectors per request
118 */
119 int * max_sectors[MAX_BLKDEV];
121 unsigned long blk_max_low_pfn, blk_max_pfn;
122 int blk_nohighio = 0;
124 int block_dump = 0;
126 static struct timer_list writeback_timer;
128 static inline int get_max_sectors(kdev_t dev)
129 {
130 if (!max_sectors[MAJOR(dev)])
131 return MAX_SECTORS;
132 return max_sectors[MAJOR(dev)][MINOR(dev)];
133 }
135 static inline request_queue_t *__blk_get_queue(kdev_t dev)
136 {
137 struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
139 if (bdev->queue)
140 return bdev->queue(dev);
141 else
142 return &blk_dev[MAJOR(dev)].request_queue;
143 }
145 request_queue_t *blk_get_queue(kdev_t dev)
146 {
147 return __blk_get_queue(dev);
148 }
150 static int __blk_cleanup_queue(struct request_list *list)
151 {
152 struct list_head *head = &list->free;
153 struct request *rq;
154 int i = 0;
156 while (!list_empty(head)) {
157 rq = list_entry(head->next, struct request, queue);
158 list_del(&rq->queue);
159 kmem_cache_free(request_cachep, rq);
160 i++;
161 };
163 if (i != list->count)
164 printk("request list leak!\n");
166 list->count = 0;
167 return i;
168 }
170 /**
171 * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
172 * @q: the request queue to be released
173 *
174 * Description:
175 * blk_cleanup_queue is the pair to blk_init_queue(). It should
176 * be called when a request queue is being released; typically
177 * when a block device is being de-registered. Currently, its
178 * primary task it to free all the &struct request structures that
179 * were allocated to the queue.
180 * Caveat:
181 * Hopefully the low level driver will have finished any
182 * outstanding requests first...
183 **/
184 void blk_cleanup_queue(request_queue_t * q)
185 {
186 int count = q->nr_requests;
188 count -= __blk_cleanup_queue(&q->rq);
190 if (count)
191 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
192 if (atomic_read(&q->nr_sectors))
193 printk("blk_cleanup_queue: leaked sectors (%d)\n", atomic_read(&q->nr_sectors));
195 memset(q, 0, sizeof(*q));
196 }
198 /**
199 * blk_queue_headactive - indicate whether head of request queue may be active
200 * @q: The queue which this applies to.
201 * @active: A flag indication where the head of the queue is active.
202 *
203 * Description:
204 * The driver for a block device may choose to leave the currently active
205 * request on the request queue, removing it only when it has completed.
206 * The queue handling routines assume this by default for safety reasons
207 * and will not involve the head of the request queue in any merging or
208 * reordering of requests when the queue is unplugged (and thus may be
209 * working on this particular request).
210 *
211 * If a driver removes requests from the queue before processing them, then
212 * it may indicate that it does so, there by allowing the head of the queue
213 * to be involved in merging and reordering. This is done be calling
214 * blk_queue_headactive() with an @active flag of %0.
215 *
216 * If a driver processes several requests at once, it must remove them (or
217 * at least all but one of them) from the request queue.
218 *
219 * When a queue is plugged the head will be assumed to be inactive.
220 **/
222 void blk_queue_headactive(request_queue_t * q, int active)
223 {
224 q->head_active = active;
225 }
227 /**
228 * blk_queue_throttle_sectors - indicates you will call sector throttling funcs
229 * @q: The queue which this applies to.
230 * @active: A flag indication if you want sector throttling on
231 *
232 * Description:
233 * The sector throttling code allows us to put a limit on the number of
234 * sectors pending io to the disk at a given time, sending @active nonzero
235 * indicates you will call blk_started_sectors and blk_finished_sectors in
236 * addition to calling blk_started_io and blk_finished_io in order to
237 * keep track of the number of sectors in flight.
238 **/
240 void blk_queue_throttle_sectors(request_queue_t * q, int active)
241 {
242 q->can_throttle = active;
243 }
245 /**
246 * blk_queue_make_request - define an alternate make_request function for a device
247 * @q: the request queue for the device to be affected
248 * @mfn: the alternate make_request function
249 *
250 * Description:
251 * The normal way for &struct buffer_heads to be passed to a device
252 * driver is for them to be collected into requests on a request
253 * queue, and then to allow the device driver to select requests
254 * off that queue when it is ready. This works well for many block
255 * devices. However some block devices (typically virtual devices
256 * such as md or lvm) do not benefit from the processing on the
257 * request queue, and are served best by having the requests passed
258 * directly to them. This can be achieved by providing a function
259 * to blk_queue_make_request().
260 *
261 * Caveat:
262 * The driver that does this *must* be able to deal appropriately
263 * with buffers in "highmemory", either by calling bh_kmap() to get
264 * a kernel mapping, to by calling create_bounce() to create a
265 * buffer in normal memory.
266 **/
268 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
269 {
270 q->make_request_fn = mfn;
271 }
273 /**
274 * blk_queue_bounce_limit - set bounce buffer limit for queue
275 * @q: the request queue for the device
276 * @dma_addr: bus address limit
277 *
278 * Description:
279 * Different hardware can have different requirements as to what pages
280 * it can do I/O directly to. A low level driver can call
281 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
282 * buffers for doing I/O to pages residing above @page. By default
283 * the block layer sets this to the highest numbered "low" memory page.
284 **/
285 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
286 {
287 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
288 unsigned long mb = dma_addr >> 20;
289 static request_queue_t *old_q;
291 /*
292 * keep this for debugging for now...
293 */
294 if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) {
295 old_q = q;
296 printk("blk: queue %p, ", q);
297 if (dma_addr == BLK_BOUNCE_ANY)
298 printk("no I/O memory limit\n");
299 else
300 printk("I/O limit %luMb (mask 0x%Lx)\n", mb,
301 (long long) dma_addr);
302 }
304 q->bounce_pfn = bounce_pfn;
305 }
308 /*
309 * can we merge the two segments, or do we need to start a new one?
310 */
311 static inline int __blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
312 {
313 /*
314 * if bh and nxt are contigous and don't cross a 4g boundary, it's ok
315 */
316 if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt))
317 return 1;
319 return 0;
320 }
322 int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt)
323 {
324 return __blk_seg_merge_ok(bh, nxt);
325 }
327 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
328 {
329 if (req->nr_segments < max_segments) {
330 req->nr_segments++;
331 return 1;
332 }
333 return 0;
334 }
336 static int ll_back_merge_fn(request_queue_t *q, struct request *req,
337 struct buffer_head *bh, int max_segments)
338 {
339 if (__blk_seg_merge_ok(req->bhtail, bh))
340 return 1;
342 return ll_new_segment(q, req, max_segments);
343 }
345 static int ll_front_merge_fn(request_queue_t *q, struct request *req,
346 struct buffer_head *bh, int max_segments)
347 {
348 if (__blk_seg_merge_ok(bh, req->bh))
349 return 1;
351 return ll_new_segment(q, req, max_segments);
352 }
354 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
355 struct request *next, int max_segments)
356 {
357 int total_segments = req->nr_segments + next->nr_segments;
359 if (__blk_seg_merge_ok(req->bhtail, next->bh))
360 total_segments--;
362 if (total_segments > max_segments)
363 return 0;
365 req->nr_segments = total_segments;
366 return 1;
367 }
369 /*
370 * "plug" the device if there are no outstanding requests: this will
371 * force the transfer to start only after we have put all the requests
372 * on the list.
373 *
374 * This is called with interrupts off and no requests on the queue.
375 * (and with the request spinlock acquired)
376 */
377 static void generic_plug_device(request_queue_t *q, kdev_t dev)
378 {
379 /*
380 * no need to replug device
381 */
382 if (!list_empty(&q->queue_head) || q->plugged)
383 return;
385 q->plugged = 1;
386 queue_task(&q->plug_tq, &tq_disk);
387 }
389 /*
390 * remove the plug and let it rip..
391 */
392 static inline void __generic_unplug_device(request_queue_t *q)
393 {
394 if (q->plugged) {
395 q->plugged = 0;
396 if (!list_empty(&q->queue_head))
397 q->request_fn(q);
398 }
399 }
401 void generic_unplug_device(void *data)
402 {
403 request_queue_t *q = (request_queue_t *) data;
404 unsigned long flags;
406 spin_lock_irqsave(&io_request_lock, flags);
407 __generic_unplug_device(q);
408 spin_unlock_irqrestore(&io_request_lock, flags);
409 }
411 /** blk_grow_request_list
412 * @q: The &request_queue_t
413 * @nr_requests: how many requests are desired
414 *
415 * More free requests are added to the queue's free lists, bringing
416 * the total number of requests to @nr_requests.
417 *
418 * The requests are added equally to the request queue's read
419 * and write freelists.
420 *
421 * This function can sleep.
422 *
423 * Returns the (new) number of requests which the queue has available.
424 */
425 int blk_grow_request_list(request_queue_t *q, int nr_requests, int max_queue_sectors)
426 {
427 unsigned long flags;
428 /* Several broken drivers assume that this function doesn't sleep,
429 * this causes system hangs during boot.
430 * As a temporary fix, make the function non-blocking.
431 */
432 spin_lock_irqsave(&io_request_lock, flags);
433 while (q->nr_requests < nr_requests) {
434 struct request *rq;
436 rq = kmem_cache_alloc(request_cachep, SLAB_ATOMIC);
437 if (rq == NULL)
438 break;
439 memset(rq, 0, sizeof(*rq));
440 rq->rq_status = RQ_INACTIVE;
441 list_add(&rq->queue, &q->rq.free);
442 q->rq.count++;
444 q->nr_requests++;
445 }
447 /*
448 * Wakeup waiters after both one quarter of the
449 * max-in-fligh queue and one quarter of the requests
450 * are available again.
451 */
453 q->batch_requests = q->nr_requests / 4;
454 if (q->batch_requests > 32)
455 q->batch_requests = 32;
456 q->batch_sectors = max_queue_sectors / 4;
458 q->max_queue_sectors = max_queue_sectors;
460 BUG_ON(!q->batch_sectors);
461 atomic_set(&q->nr_sectors, 0);
463 spin_unlock_irqrestore(&io_request_lock, flags);
464 return q->nr_requests;
465 }
467 static void blk_init_free_list(request_queue_t *q)
468 {
469 struct sysinfo si;
470 int megs; /* Total memory, in megabytes */
471 int nr_requests, max_queue_sectors = MAX_QUEUE_SECTORS;
473 INIT_LIST_HEAD(&q->rq.free);
474 q->rq.count = 0;
475 q->rq.pending[READ] = q->rq.pending[WRITE] = 0;
476 q->nr_requests = 0;
478 si_meminfo(&si);
479 megs = si.totalram >> (20 - PAGE_SHIFT);
480 nr_requests = MAX_NR_REQUESTS;
481 if (megs < 30) {
482 nr_requests /= 2;
483 max_queue_sectors /= 2;
484 }
485 /* notice early if anybody screwed the defaults */
486 BUG_ON(!nr_requests);
487 BUG_ON(!max_queue_sectors);
489 blk_grow_request_list(q, nr_requests, max_queue_sectors);
491 init_waitqueue_head(&q->wait_for_requests);
493 spin_lock_init(&q->queue_lock);
494 }
496 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
498 /**
499 * blk_init_queue - prepare a request queue for use with a block device
500 * @q: The &request_queue_t to be initialised
501 * @rfn: The function to be called to process requests that have been
502 * placed on the queue.
503 *
504 * Description:
505 * If a block device wishes to use the standard request handling procedures,
506 * which sorts requests and coalesces adjacent requests, then it must
507 * call blk_init_queue(). The function @rfn will be called when there
508 * are requests on the queue that need to be processed. If the device
509 * supports plugging, then @rfn may not be called immediately when requests
510 * are available on the queue, but may be called at some time later instead.
511 * Plugged queues are generally unplugged when a buffer belonging to one
512 * of the requests on the queue is needed, or due to memory pressure.
513 *
514 * @rfn is not required, or even expected, to remove all requests off the
515 * queue, but only as many as it can handle at a time. If it does leave
516 * requests on the queue, it is responsible for arranging that the requests
517 * get dealt with eventually.
518 *
519 * A global spin lock $io_request_lock must be held while manipulating the
520 * requests on the request queue.
521 *
522 * The request on the head of the queue is by default assumed to be
523 * potentially active, and it is not considered for re-ordering or merging
524 * whenever the given queue is unplugged. This behaviour can be changed with
525 * blk_queue_headactive().
526 *
527 * Note:
528 * blk_init_queue() must be paired with a blk_cleanup_queue() call
529 * when the block device is deactivated (such as at module unload).
530 **/
531 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
532 {
533 INIT_LIST_HEAD(&q->queue_head);
534 elevator_init(&q->elevator, ELEVATOR_LINUS);
535 blk_init_free_list(q);
536 q->request_fn = rfn;
537 q->back_merge_fn = ll_back_merge_fn;
538 q->front_merge_fn = ll_front_merge_fn;
539 q->merge_requests_fn = ll_merge_requests_fn;
540 q->make_request_fn = __make_request;
541 q->plug_tq.sync = 0;
542 q->plug_tq.routine = &generic_unplug_device;
543 q->plug_tq.data = q;
544 q->plugged = 0;
545 q->can_throttle = 0;
547 /*
548 * These booleans describe the queue properties. We set the
549 * default (and most common) values here. Other drivers can
550 * use the appropriate functions to alter the queue properties.
551 * as appropriate.
552 */
553 q->plug_device_fn = generic_plug_device;
554 q->head_active = 1;
556 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
557 }
559 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queue);
560 /*
561 * Get a free request. io_request_lock must be held and interrupts
562 * disabled on the way in. Returns NULL if there are no free requests.
563 */
564 static struct request *get_request(request_queue_t *q, int rw)
565 {
566 struct request *rq = NULL;
567 struct request_list *rl = &q->rq;
569 if (blk_oversized_queue(q)) {
570 int rlim = q->nr_requests >> 5;
572 if (rlim < 4)
573 rlim = 4;
575 /*
576 * if its a write, or we have more than a handful of reads
577 * pending, bail out
578 */
579 if ((rw == WRITE) || (rw == READ && rl->pending[READ] > rlim))
580 return NULL;
581 if (blk_oversized_queue_reads(q))
582 return NULL;
583 }
585 if (!list_empty(&rl->free)) {
586 rq = blkdev_free_rq(&rl->free);
587 list_del(&rq->queue);
588 rl->count--;
589 rl->pending[rw]++;
590 rq->rq_status = RQ_ACTIVE;
591 rq->cmd = rw;
592 rq->special = NULL;
593 rq->q = q;
594 }
596 return rq;
597 }
599 /*
600 * Here's the request allocation design, low latency version:
601 *
602 * 1: Blocking on request exhaustion is a key part of I/O throttling.
603 *
604 * 2: We want to be `fair' to all requesters. We must avoid starvation, and
605 * attempt to ensure that all requesters sleep for a similar duration. Hence
606 * no stealing requests when there are other processes waiting.
607 *
608 * There used to be more here, attempting to allow a process to send in a
609 * number of requests once it has woken up. But, there's no way to
610 * tell if a process has just been woken up, or if it is a new process
611 * coming in to steal requests from the waiters. So, we give up and force
612 * everyone to wait fairly.
613 *
614 * So here's what we do:
615 *
616 * a) A READA requester fails if free_requests < batch_requests
617 *
618 * We don't want READA requests to prevent sleepers from ever
619 * waking. Note that READA is used extremely rarely - a few
620 * filesystems use it for directory readahead.
621 *
622 * When a process wants a new request:
623 *
624 * b) If free_requests == 0, the requester sleeps in FIFO manner, and
625 * the queue full condition is set. The full condition is not
626 * cleared until there are no longer any waiters. Once the full
627 * condition is set, all new io must wait, hopefully for a very
628 * short period of time.
629 *
630 * When a request is released:
631 *
632 * c) If free_requests < batch_requests, do nothing.
633 *
634 * d) If free_requests >= batch_requests, wake up a single waiter.
635 *
636 * As each waiter gets a request, he wakes another waiter. We do this
637 * to prevent a race where an unplug might get run before a request makes
638 * it's way onto the queue. The result is a cascade of wakeups, so delaying
639 * the initial wakeup until we've got batch_requests available helps avoid
640 * wakeups where there aren't any requests available yet.
641 */
643 static struct request *__get_request_wait(request_queue_t *q, int rw)
644 {
645 register struct request *rq;
646 DECLARE_WAITQUEUE(wait, current);
648 add_wait_queue_exclusive(&q->wait_for_requests, &wait);
650 do {
651 set_current_state(TASK_UNINTERRUPTIBLE);
652 spin_lock_irq(&io_request_lock);
653 if (blk_oversized_queue(q) || q->rq.count == 0) {
654 __generic_unplug_device(q);
655 spin_unlock_irq(&io_request_lock);
656 schedule();
657 spin_lock_irq(&io_request_lock);
658 }
659 rq = get_request(q, rw);
660 spin_unlock_irq(&io_request_lock);
661 } while (rq == NULL);
662 remove_wait_queue(&q->wait_for_requests, &wait);
663 current->state = TASK_RUNNING;
665 return rq;
666 }
668 static void get_request_wait_wakeup(request_queue_t *q, int rw)
669 {
670 /*
671 * avoid losing an unplug if a second __get_request_wait did the
672 * generic_unplug_device while our __get_request_wait was running
673 * w/o the queue_lock held and w/ our request out of the queue.
674 */
675 if (waitqueue_active(&q->wait_for_requests))
676 wake_up(&q->wait_for_requests);
677 }
679 /* RO fail safe mechanism */
681 static long ro_bits[MAX_BLKDEV][8];
683 int is_read_only(kdev_t dev)
684 {
685 int minor,major;
687 major = MAJOR(dev);
688 minor = MINOR(dev);
689 if (major < 0 || major >= MAX_BLKDEV) return 0;
690 return ro_bits[major][minor >> 5] & (1 << (minor & 31));
691 }
693 void set_device_ro(kdev_t dev,int flag)
694 {
695 int minor,major;
697 major = MAJOR(dev);
698 minor = MINOR(dev);
699 if (major < 0 || major >= MAX_BLKDEV) return;
700 if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
701 else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
702 }
704 inline void drive_stat_acct (kdev_t dev, int rw,
705 unsigned long nr_sectors, int new_io)
706 {
707 unsigned int major = MAJOR(dev);
708 unsigned int index;
710 index = disk_index(dev);
711 if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
712 return;
714 kstat.dk_drive[major][index] += new_io;
715 if (rw == READ) {
716 kstat.dk_drive_rio[major][index] += new_io;
717 kstat.dk_drive_rblk[major][index] += nr_sectors;
718 } else if (rw == WRITE) {
719 kstat.dk_drive_wio[major][index] += new_io;
720 kstat.dk_drive_wblk[major][index] += nr_sectors;
721 } else
722 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
723 }
725 #ifdef CONFIG_BLK_STATS
726 /*
727 * Return up to two hd_structs on which to do IO accounting for a given
728 * request.
729 *
730 * On a partitioned device, we want to account both against the partition
731 * and against the whole disk.
732 */
733 static void locate_hd_struct(struct request *req,
734 struct hd_struct **hd1,
735 struct hd_struct **hd2)
736 {
737 struct gendisk *gd;
739 *hd1 = NULL;
740 *hd2 = NULL;
742 gd = get_gendisk(req->rq_dev);
743 if (gd && gd->part) {
744 /* Mask out the partition bits: account for the entire disk */
745 int devnr = MINOR(req->rq_dev) >> gd->minor_shift;
746 int whole_minor = devnr << gd->minor_shift;
748 *hd1 = &gd->part[whole_minor];
749 if (whole_minor != MINOR(req->rq_dev))
750 *hd2= &gd->part[MINOR(req->rq_dev)];
751 }
752 }
754 /*
755 * Round off the performance stats on an hd_struct.
756 *
757 * The average IO queue length and utilisation statistics are maintained
758 * by observing the current state of the queue length and the amount of
759 * time it has been in this state for.
760 * Normally, that accounting is done on IO completion, but that can result
761 * in more than a second's worth of IO being accounted for within any one
762 * second, leading to >100% utilisation. To deal with that, we do a
763 * round-off before returning the results when reading /proc/partitions,
764 * accounting immediately for all queue usage up to the current jiffies and
765 * restarting the counters again.
766 */
767 void disk_round_stats(struct hd_struct *hd)
768 {
769 unsigned long now = jiffies;
771 hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change));
772 hd->last_queue_change = now;
774 if (hd->ios_in_flight)
775 hd->io_ticks += (now - hd->last_idle_time);
776 hd->last_idle_time = now;
777 }
779 static inline void down_ios(struct hd_struct *hd)
780 {
781 disk_round_stats(hd);
782 --hd->ios_in_flight;
783 }
785 static inline void up_ios(struct hd_struct *hd)
786 {
787 disk_round_stats(hd);
788 ++hd->ios_in_flight;
789 }
791 static void account_io_start(struct hd_struct *hd, struct request *req,
792 int merge, int sectors)
793 {
794 switch (req->cmd) {
795 case READ:
796 if (merge)
797 hd->rd_merges++;
798 hd->rd_sectors += sectors;
799 break;
800 case WRITE:
801 if (merge)
802 hd->wr_merges++;
803 hd->wr_sectors += sectors;
804 break;
805 }
806 if (!merge)
807 up_ios(hd);
808 }
810 static void account_io_end(struct hd_struct *hd, struct request *req)
811 {
812 unsigned long duration = jiffies - req->start_time;
813 switch (req->cmd) {
814 case READ:
815 hd->rd_ticks += duration;
816 hd->rd_ios++;
817 break;
818 case WRITE:
819 hd->wr_ticks += duration;
820 hd->wr_ios++;
821 break;
822 }
823 down_ios(hd);
824 }
826 void req_new_io(struct request *req, int merge, int sectors)
827 {
828 struct hd_struct *hd1, *hd2;
830 locate_hd_struct(req, &hd1, &hd2);
831 if (hd1)
832 account_io_start(hd1, req, merge, sectors);
833 if (hd2)
834 account_io_start(hd2, req, merge, sectors);
835 }
837 void req_merged_io(struct request *req)
838 {
839 struct hd_struct *hd1, *hd2;
841 locate_hd_struct(req, &hd1, &hd2);
842 if (hd1)
843 down_ios(hd1);
844 if (hd2)
845 down_ios(hd2);
846 }
848 void req_finished_io(struct request *req)
849 {
850 struct hd_struct *hd1, *hd2;
852 locate_hd_struct(req, &hd1, &hd2);
853 if (hd1)
854 account_io_end(hd1, req);
855 if (hd2)
856 account_io_end(hd2, req);
857 }
858 EXPORT_SYMBOL(req_finished_io);
859 #endif /* CONFIG_BLK_STATS */
861 /*
862 * add-request adds a request to the linked list.
863 * io_request_lock is held and interrupts disabled, as we muck with the
864 * request queue list.
865 *
866 * By this point, req->cmd is always either READ/WRITE, never READA,
867 * which is important for drive_stat_acct() above.
868 */
869 static inline void add_request(request_queue_t * q, struct request * req,
870 struct list_head *insert_here)
871 {
872 drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
874 if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
875 spin_unlock_irq(&io_request_lock);
876 BUG();
877 }
879 /*
880 * elevator indicated where it wants this request to be
881 * inserted at elevator_merge time
882 */
883 list_add(&req->queue, insert_here);
884 }
886 /*
887 * Must be called with io_request_lock held and interrupts disabled
888 */
889 void blkdev_release_request(struct request *req)
890 {
891 request_queue_t *q = req->q;
893 req->rq_status = RQ_INACTIVE;
894 req->q = NULL;
896 /*
897 * Request may not have originated from ll_rw_blk. if not,
898 * assume it has free buffers and check waiters
899 */
900 if (q) {
901 struct request_list *rl = &q->rq;
902 int oversized_batch = 0;
904 if (q->can_throttle)
905 oversized_batch = blk_oversized_queue_batch(q);
906 rl->count++;
907 /*
908 * paranoia check
909 */
910 if (req->cmd == READ || req->cmd == WRITE)
911 rl->pending[req->cmd]--;
912 if (rl->pending[READ] > q->nr_requests)
913 printk("blk: reads: %u\n", rl->pending[READ]);
914 if (rl->pending[WRITE] > q->nr_requests)
915 printk("blk: writes: %u\n", rl->pending[WRITE]);
916 if (rl->pending[READ] + rl->pending[WRITE] > q->nr_requests)
917 printk("blk: r/w: %u + %u > %u\n", rl->pending[READ], rl->pending[WRITE], q->nr_requests);
918 list_add(&req->queue, &rl->free);
919 if (rl->count >= q->batch_requests && !oversized_batch) {
920 smp_mb();
921 if (waitqueue_active(&q->wait_for_requests))
922 wake_up(&q->wait_for_requests);
923 }
924 }
925 }
927 /*
928 * Has to be called with the request spinlock acquired
929 */
930 static void attempt_merge(request_queue_t * q,
931 struct request *req,
932 int max_sectors,
933 int max_segments)
934 {
935 struct request *next;
937 next = blkdev_next_request(req);
938 if (req->sector + req->nr_sectors != next->sector)
939 return;
940 if (req->cmd != next->cmd
941 || req->rq_dev != next->rq_dev
942 || req->nr_sectors + next->nr_sectors > max_sectors
943 || next->waiting)
944 return;
945 /*
946 * If we are not allowed to merge these requests, then
947 * return. If we are allowed to merge, then the count
948 * will have been updated to the appropriate number,
949 * and we shouldn't do it here too.
950 */
951 if (!q->merge_requests_fn(q, req, next, max_segments))
952 return;
954 q->elevator.elevator_merge_req_fn(req, next);
956 /* At this point we have either done a back merge
957 * or front merge. We need the smaller start_time of
958 * the merged requests to be the current request
959 * for accounting purposes.
960 */
961 if (time_after(req->start_time, next->start_time))
962 req->start_time = next->start_time;
964 req->bhtail->b_reqnext = next->bh;
965 req->bhtail = next->bhtail;
966 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
967 list_del(&next->queue);
969 /* One last thing: we have removed a request, so we now have one
970 less expected IO to complete for accounting purposes. */
971 req_merged_io(req);
973 blkdev_release_request(next);
974 }
976 static inline void attempt_back_merge(request_queue_t * q,
977 struct request *req,
978 int max_sectors,
979 int max_segments)
980 {
981 if (&req->queue == q->queue_head.prev)
982 return;
983 attempt_merge(q, req, max_sectors, max_segments);
984 }
986 static inline void attempt_front_merge(request_queue_t * q,
987 struct list_head * head,
988 struct request *req,
989 int max_sectors,
990 int max_segments)
991 {
992 struct list_head * prev;
994 prev = req->queue.prev;
995 if (head == prev)
996 return;
997 attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
998 }
1000 static int __make_request(request_queue_t * q, int rw,
1001 struct buffer_head * bh)
1003 unsigned int sector, count, sync;
1004 int max_segments = MAX_SEGMENTS;
1005 struct request * req, *freereq = NULL;
1006 int rw_ahead, max_sectors, el_ret;
1007 struct list_head *head, *insert_here;
1008 int latency;
1009 elevator_t *elevator = &q->elevator;
1010 int should_wake = 0;
1012 count = bh->b_size >> 9;
1013 sector = bh->b_rsector;
1014 sync = test_and_clear_bit(BH_Sync, &bh->b_state);
1016 rw_ahead = 0; /* normal case; gets changed below for READA */
1017 switch (rw) {
1018 case READA:
1019 #if 0 /* bread() misinterprets failed READA attempts as IO errors on SMP */
1020 rw_ahead = 1;
1021 #endif
1022 rw = READ; /* drop into READ */
1023 case READ:
1024 case WRITE:
1025 latency = elevator_request_latency(elevator, rw);
1026 break;
1027 default:
1028 BUG();
1029 goto end_io;
1032 /* We'd better have a real physical mapping!
1033 Check this bit only if the buffer was dirty and just locked
1034 down by us so at this point flushpage will block and
1035 won't clear the mapped bit under us. */
1036 if (!buffer_mapped(bh))
1037 BUG();
1039 /*
1040 * Temporary solution - in 2.5 this will be done by the lowlevel
1041 * driver. Create a bounce buffer if the buffer data points into
1042 * high memory - keep the original buffer otherwise.
1043 */
1044 bh = blk_queue_bounce(q, rw, bh);
1046 /* look for a free request. */
1047 /*
1048 * Try to coalesce the new request with old requests
1049 */
1050 max_sectors = get_max_sectors(bh->b_rdev);
1052 req = NULL;
1053 head = &q->queue_head;
1054 /*
1055 * Now we acquire the request spinlock, we have to be mega careful
1056 * not to schedule or do something nonatomic
1057 */
1058 spin_lock_irq(&io_request_lock);
1060 again:
1061 insert_here = head->prev;
1063 if (list_empty(head)) {
1064 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
1065 goto get_rq;
1066 } else if (q->head_active && !q->plugged)
1067 head = head->next;
1069 el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors);
1070 switch (el_ret) {
1072 case ELEVATOR_BACK_MERGE:
1073 if (!q->back_merge_fn(q, req, bh, max_segments)) {
1074 insert_here = &req->queue;
1075 break;
1077 req->bhtail->b_reqnext = bh;
1078 req->bhtail = bh;
1079 req->nr_sectors = req->hard_nr_sectors += count;
1080 blk_started_io(count);
1081 blk_started_sectors(req, count);
1082 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1083 req_new_io(req, 1, count);
1084 attempt_back_merge(q, req, max_sectors, max_segments);
1085 goto out;
1087 case ELEVATOR_FRONT_MERGE:
1088 if (!q->front_merge_fn(q, req, bh, max_segments)) {
1089 insert_here = req->queue.prev;
1090 break;
1092 bh->b_reqnext = req->bh;
1093 req->bh = bh;
1094 /*
1095 * may not be valid, but queues not having bounce
1096 * enabled for highmem pages must not look at
1097 * ->buffer anyway
1098 */
1099 req->buffer = bh->b_data;
1100 req->current_nr_sectors = req->hard_cur_sectors = count;
1101 req->sector = req->hard_sector = sector;
1102 req->nr_sectors = req->hard_nr_sectors += count;
1103 blk_started_io(count);
1104 blk_started_sectors(req, count);
1105 drive_stat_acct(req->rq_dev, req->cmd, count, 0);
1106 req_new_io(req, 1, count);
1107 attempt_front_merge(q, head, req, max_sectors, max_segments);
1108 goto out;
1110 /*
1111 * elevator says don't/can't merge. get new request
1112 */
1113 case ELEVATOR_NO_MERGE:
1114 /*
1115 * use elevator hints as to where to insert the
1116 * request. if no hints, just add it to the back
1117 * of the queue
1118 */
1119 if (req)
1120 insert_here = &req->queue;
1121 break;
1123 default:
1124 printk("elevator returned crap (%d)\n", el_ret);
1125 BUG();
1128 get_rq:
1129 if (freereq) {
1130 req = freereq;
1131 freereq = NULL;
1132 } else {
1133 /*
1134 * See description above __get_request_wait()
1135 */
1136 if (rw_ahead) {
1137 if (q->rq.count < q->batch_requests || blk_oversized_queue_batch(q)) {
1138 spin_unlock_irq(&io_request_lock);
1139 goto end_io;
1141 req = get_request(q, rw);
1142 if (req == NULL)
1143 BUG();
1144 } else {
1145 req = get_request(q, rw);
1146 if (req == NULL) {
1147 spin_unlock_irq(&io_request_lock);
1148 freereq = __get_request_wait(q, rw);
1149 head = &q->queue_head;
1150 spin_lock_irq(&io_request_lock);
1151 should_wake = 1;
1152 goto again;
1157 /* fill up the request-info, and add it to the queue */
1158 req->elevator_sequence = latency;
1159 req->cmd = rw;
1160 req->errors = 0;
1161 req->hard_sector = req->sector = sector;
1162 req->hard_nr_sectors = req->nr_sectors = count;
1163 req->current_nr_sectors = req->hard_cur_sectors = count;
1164 req->nr_segments = 1; /* Always 1 for a new request. */
1165 req->nr_hw_segments = 1; /* Always 1 for a new request. */
1166 req->buffer = bh->b_data;
1167 req->waiting = NULL;
1168 req->bh = bh;
1169 req->bhtail = bh;
1170 req->rq_dev = bh->b_rdev;
1171 req->start_time = jiffies;
1172 req_new_io(req, 0, count);
1173 blk_started_io(count);
1174 blk_started_sectors(req, count);
1175 add_request(q, req, insert_here);
1176 out:
1177 if (freereq)
1178 blkdev_release_request(freereq);
1179 if (should_wake)
1180 get_request_wait_wakeup(q, rw);
1181 if (sync)
1182 __generic_unplug_device(q);
1183 spin_unlock_irq(&io_request_lock);
1184 return 0;
1185 end_io:
1186 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1187 return 0;
1190 /**
1191 * generic_make_request: hand a buffer head to it's device driver for I/O
1192 * @rw: READ, WRITE, or READA - what sort of I/O is desired.
1193 * @bh: The buffer head describing the location in memory and on the device.
1195 * generic_make_request() is used to make I/O requests of block
1196 * devices. It is passed a &struct buffer_head and a &rw value. The
1197 * %READ and %WRITE options are (hopefully) obvious in meaning. The
1198 * %READA value means that a read is required, but that the driver is
1199 * free to fail the request if, for example, it cannot get needed
1200 * resources immediately.
1202 * generic_make_request() does not return any status. The
1203 * success/failure status of the request, along with notification of
1204 * completion, is delivered asynchronously through the bh->b_end_io
1205 * function described (one day) else where.
1207 * The caller of generic_make_request must make sure that b_page,
1208 * b_addr, b_size are set to describe the memory buffer, that b_rdev
1209 * and b_rsector are set to describe the device address, and the
1210 * b_end_io and optionally b_private are set to describe how
1211 * completion notification should be signaled. BH_Mapped should also
1212 * be set (to confirm that b_dev and b_blocknr are valid).
1214 * generic_make_request and the drivers it calls may use b_reqnext,
1215 * and may change b_rdev and b_rsector. So the values of these fields
1216 * should NOT be depended on after the call to generic_make_request.
1217 * Because of this, the caller should record the device address
1218 * information in b_dev and b_blocknr.
1220 * Apart from those fields mentioned above, no other fields, and in
1221 * particular, no other flags, are changed by generic_make_request or
1222 * any lower level drivers.
1223 * */
1224 void generic_make_request (int rw, struct buffer_head * bh)
1226 int major = MAJOR(bh->b_rdev);
1227 int minorsize = 0;
1228 request_queue_t *q;
1230 if (!bh->b_end_io)
1231 BUG();
1233 /* Test device size, when known. */
1234 if (blk_size[major])
1235 minorsize = blk_size[major][MINOR(bh->b_rdev)];
1236 if (minorsize) {
1237 unsigned long maxsector = (minorsize << 1) + 1;
1238 unsigned long sector = bh->b_rsector;
1239 unsigned int count = bh->b_size >> 9;
1241 if (maxsector < count || maxsector - count < sector) {
1242 /* Yecch */
1243 bh->b_state &= ~(1 << BH_Dirty);
1245 /* This may well happen - the kernel calls bread()
1246 without checking the size of the device, e.g.,
1247 when mounting a device. */
1248 printk(KERN_INFO
1249 "attempt to access beyond end of device\n");
1250 printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
1251 kdevname(bh->b_rdev), rw,
1252 (sector + count)>>1, minorsize);
1254 bh->b_end_io(bh, 0);
1255 return;
1259 /*
1260 * Resolve the mapping until finished. (drivers are
1261 * still free to implement/resolve their own stacking
1262 * by explicitly returning 0)
1263 */
1264 /* NOTE: we don't repeat the blk_size check for each new device.
1265 * Stacking drivers are expected to know what they are doing.
1266 */
1267 do {
1268 q = __blk_get_queue(bh->b_rdev);
1269 if (!q) {
1270 printk(KERN_ERR
1271 "generic_make_request: Trying to access "
1272 "nonexistent block-device %s (%ld)\n",
1273 kdevname(bh->b_rdev), bh->b_rsector);
1274 buffer_IO_error(bh);
1275 break;
1277 } while (q->make_request_fn(q, rw, bh));
1281 /**
1282 * submit_bh: submit a buffer_head to the block device later for I/O
1283 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1284 * @bh: The &struct buffer_head which describes the I/O
1286 * submit_bh() is very similar in purpose to generic_make_request(), and
1287 * uses that function to do most of the work.
1289 * The extra functionality provided by submit_bh is to determine
1290 * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
1291 * This is is appropriate for IO requests that come from the buffer
1292 * cache and page cache which (currently) always use aligned blocks.
1293 */
1294 void submit_bh(int rw, struct buffer_head * bh)
1296 int count = bh->b_size >> 9;
1298 if (!test_bit(BH_Lock, &bh->b_state))
1299 BUG();
1301 set_bit(BH_Req, &bh->b_state);
1302 set_bit(BH_Launder, &bh->b_state);
1304 /*
1305 * First step, 'identity mapping' - RAID or LVM might
1306 * further remap this.
1307 */
1308 bh->b_rdev = bh->b_dev;
1309 bh->b_rsector = bh->b_blocknr * count;
1311 get_bh(bh);
1312 generic_make_request(rw, bh);
1314 /* fix race condition with wait_on_buffer() */
1315 smp_mb(); /* spin_unlock may have inclusive semantics */
1316 if (waitqueue_active(&bh->b_wait))
1317 wake_up(&bh->b_wait);
1319 if (block_dump)
1320 printk(KERN_DEBUG "%s: %s block %lu/%u on %s\n", current->comm, rw == WRITE ? "WRITE" : "READ", bh->b_rsector, count, kdevname(bh->b_rdev));
1322 put_bh(bh);
1323 switch (rw) {
1324 case WRITE:
1325 kstat.pgpgout += count;
1326 break;
1327 default:
1328 kstat.pgpgin += count;
1329 break;
1333 /**
1334 * ll_rw_block: low-level access to block devices
1335 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
1336 * @nr: number of &struct buffer_heads in the array
1337 * @bhs: array of pointers to &struct buffer_head
1339 * ll_rw_block() takes an array of pointers to &struct buffer_heads,
1340 * and requests an I/O operation on them, either a %READ or a %WRITE.
1341 * The third %READA option is described in the documentation for
1342 * generic_make_request() which ll_rw_block() calls.
1344 * This function provides extra functionality that is not in
1345 * generic_make_request() that is relevant to buffers in the buffer
1346 * cache or page cache. In particular it drops any buffer that it
1347 * cannot get a lock on (with the BH_Lock state bit), any buffer that
1348 * appears to be clean when doing a write request, and any buffer that
1349 * appears to be up-to-date when doing read request. Further it marks
1350 * as clean buffers that are processed for writing (the buffer cache
1351 * wont assume that they are actually clean until the buffer gets
1352 * unlocked).
1354 * ll_rw_block sets b_end_io to simple completion handler that marks
1355 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1356 * any waiters. As client that needs a more interesting completion
1357 * routine should call submit_bh() (or generic_make_request())
1358 * directly.
1360 * Caveat:
1361 * All of the buffers must be for the same device, and must also be
1362 * of the current approved size for the device. */
1364 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1366 unsigned int major;
1367 int correct_size;
1368 int i;
1370 if (!nr)
1371 return;
1373 major = MAJOR(bhs[0]->b_dev);
1375 /* Determine correct block size for this device. */
1376 correct_size = get_hardsect_size(bhs[0]->b_dev);
1378 /* Verify requested block sizes. */
1379 for (i = 0; i < nr; i++) {
1380 struct buffer_head *bh = bhs[i];
1381 if (bh->b_size % correct_size) {
1382 printk(KERN_NOTICE "ll_rw_block: device %s: "
1383 "only %d-char blocks implemented (%u)\n",
1384 kdevname(bhs[0]->b_dev),
1385 correct_size, bh->b_size);
1386 goto sorry;
1390 if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1391 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1392 kdevname(bhs[0]->b_dev));
1393 goto sorry;
1396 for (i = 0; i < nr; i++) {
1397 struct buffer_head *bh = bhs[i];
1399 lock_buffer(bh);
1401 /* We have the buffer lock */
1402 atomic_inc(&bh->b_count);
1403 bh->b_end_io = end_buffer_io_sync;
1405 switch(rw) {
1406 case WRITE:
1407 if (!atomic_set_buffer_clean(bh))
1408 /* Hmmph! Nothing to write */
1409 goto end_io;
1410 __mark_buffer_clean(bh);
1411 break;
1413 case READA:
1414 case READ:
1415 if (buffer_uptodate(bh))
1416 /* Hmmph! Already have it */
1417 goto end_io;
1418 break;
1419 default:
1420 BUG();
1421 end_io:
1422 bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1423 continue;
1426 submit_bh(rw, bh);
1428 return;
1430 sorry:
1431 /* Make sure we don't get infinite dirty retries.. */
1432 for (i = 0; i < nr; i++)
1433 mark_buffer_clean(bhs[i]);
1436 #ifdef CONFIG_STRAM_SWAP
1437 extern int stram_device_init (void);
1438 #endif
1440 static void blk_writeback_timer(unsigned long data)
1442 wakeup_bdflush();
1443 wakeup_kupdate();
1446 /**
1447 * end_that_request_first - end I/O on one buffer.
1448 * @req: the request being processed
1449 * @uptodate: 0 for I/O error
1450 * @name: the name printed for an I/O error
1452 * Description:
1453 * Ends I/O on the first buffer attached to @req, and sets it up
1454 * for the next buffer_head (if any) in the cluster.
1456 * Return:
1457 * 0 - we are done with this request, call end_that_request_last()
1458 * 1 - still buffers pending for this request
1460 * Caveat:
1461 * Drivers implementing their own end_request handling must call
1462 * blk_finished_io() appropriately.
1463 **/
1465 int end_that_request_first (struct request *req, int uptodate, char *name)
1467 struct buffer_head * bh;
1468 int nsect;
1470 req->errors = 0;
1471 if (!uptodate)
1472 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1473 kdevname(req->rq_dev), name, req->sector);
1475 if ((bh = req->bh) != NULL) {
1476 nsect = bh->b_size >> 9;
1477 blk_finished_io(nsect);
1478 blk_finished_sectors(req, nsect);
1479 req->bh = bh->b_reqnext;
1480 bh->b_reqnext = NULL;
1481 bh->b_end_io(bh, uptodate);
1482 if ((bh = req->bh) != NULL) {
1483 req->hard_sector += nsect;
1484 req->hard_nr_sectors -= nsect;
1485 req->sector = req->hard_sector;
1486 req->nr_sectors = req->hard_nr_sectors;
1488 req->current_nr_sectors = bh->b_size >> 9;
1489 req->hard_cur_sectors = req->current_nr_sectors;
1490 if (req->nr_sectors < req->current_nr_sectors) {
1491 req->nr_sectors = req->current_nr_sectors;
1492 printk("end_request: buffer-list destroyed\n");
1494 req->buffer = bh->b_data;
1495 return 1;
1498 return 0;
1501 extern int laptop_mode;
1503 void end_that_request_last(struct request *req)
1505 struct completion *waiting = req->waiting;
1507 /*
1508 * schedule the writeout of pending dirty data when the disk is idle
1509 */
1510 if (laptop_mode && req->cmd == READ)
1511 mod_timer(&writeback_timer, jiffies + 5 * HZ);
1513 req_finished_io(req);
1514 blkdev_release_request(req);
1515 if (waiting)
1516 complete(waiting);
1519 int __init blk_dev_init(void)
1521 struct blk_dev_struct *dev;
1523 request_cachep = kmem_cache_create("blkdev_requests",
1524 sizeof(struct request),
1525 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1527 if (!request_cachep)
1528 panic("Can't create request pool slab cache\n");
1530 for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1531 dev->queue = NULL;
1533 memset(ro_bits,0,sizeof(ro_bits));
1534 memset(max_readahead, 0, sizeof(max_readahead));
1535 memset(max_sectors, 0, sizeof(max_sectors));
1537 blk_max_low_pfn = max_low_pfn - 1;
1538 blk_max_pfn = max_pfn - 1;
1540 init_timer(&writeback_timer);
1541 writeback_timer.function = blk_writeback_timer;
1543 #ifdef CONFIG_AMIGA_Z2RAM
1544 z2_init();
1545 #endif
1546 #ifdef CONFIG_STRAM_SWAP
1547 stram_device_init();
1548 #endif
1549 #ifdef CONFIG_ISP16_CDI
1550 isp16_init();
1551 #endif
1552 #ifdef CONFIG_BLK_DEV_PS2
1553 ps2esdi_init();
1554 #endif
1555 #ifdef CONFIG_BLK_DEV_XD
1556 xd_init();
1557 #endif
1558 #ifdef CONFIG_BLK_DEV_MFM
1559 mfm_init();
1560 #endif
1561 #ifdef CONFIG_PARIDE
1562 { extern void paride_init(void); paride_init(); };
1563 #endif
1564 #ifdef CONFIG_MAC_FLOPPY
1565 swim3_init();
1566 #endif
1567 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1568 swimiop_init();
1569 #endif
1570 #ifdef CONFIG_AMIGA_FLOPPY
1571 amiga_floppy_init();
1572 #endif
1573 #ifdef CONFIG_ATARI_FLOPPY
1574 atari_floppy_init();
1575 #endif
1576 #ifdef CONFIG_BLK_DEV_FD
1577 floppy_init();
1578 #else
1579 #if defined(__i386__) && !defined(CONFIG_XEN) /* Do we even need this? */
1580 outb_p(0xc, 0x3f2);
1581 #endif
1582 #endif
1583 #ifdef CONFIG_CDU31A
1584 cdu31a_init();
1585 #endif
1586 #ifdef CONFIG_ATARI_ACSI
1587 acsi_init();
1588 #endif
1589 #ifdef CONFIG_MCD
1590 mcd_init();
1591 #endif
1592 #ifdef CONFIG_MCDX
1593 mcdx_init();
1594 #endif
1595 #ifdef CONFIG_SBPCD
1596 sbpcd_init();
1597 #endif
1598 #ifdef CONFIG_AZTCD
1599 aztcd_init();
1600 #endif
1601 #ifdef CONFIG_CDU535
1602 sony535_init();
1603 #endif
1604 #ifdef CONFIG_GSCD
1605 gscd_init();
1606 #endif
1607 #ifdef CONFIG_CM206
1608 cm206_init();
1609 #endif
1610 #ifdef CONFIG_OPTCD
1611 optcd_init();
1612 #endif
1613 #ifdef CONFIG_SJCD
1614 sjcd_init();
1615 #endif
1616 #ifdef CONFIG_APBLOCK
1617 ap_init();
1618 #endif
1619 #ifdef CONFIG_DDV
1620 ddv_init();
1621 #endif
1622 #ifdef CONFIG_MDISK
1623 mdisk_init();
1624 #endif
1625 #ifdef CONFIG_DASD
1626 dasd_init();
1627 #endif
1628 #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK)
1629 tapeblock_init();
1630 #endif
1631 #ifdef CONFIG_BLK_DEV_XPRAM
1632 xpram_init();
1633 #endif
1635 #ifdef CONFIG_SUN_JSFLASH
1636 jsfd_init();
1637 #endif
1639 #if defined(CONFIG_XEN_BLKDEV_FRONTEND)
1640 xlblk_init();
1641 #endif
1643 return 0;
1644 };
1646 EXPORT_SYMBOL(io_request_lock);
1647 EXPORT_SYMBOL(end_that_request_first);
1648 EXPORT_SYMBOL(end_that_request_last);
1649 EXPORT_SYMBOL(blk_grow_request_list);
1650 EXPORT_SYMBOL(blk_init_queue);
1651 EXPORT_SYMBOL(blk_get_queue);
1652 EXPORT_SYMBOL(blk_cleanup_queue);
1653 EXPORT_SYMBOL(blk_queue_headactive);
1654 EXPORT_SYMBOL(blk_queue_throttle_sectors);
1655 EXPORT_SYMBOL(blk_queue_make_request);
1656 EXPORT_SYMBOL(generic_make_request);
1657 EXPORT_SYMBOL(blkdev_release_request);
1658 EXPORT_SYMBOL(generic_unplug_device);
1659 EXPORT_SYMBOL(blk_queue_bounce_limit);
1660 EXPORT_SYMBOL(blk_max_low_pfn);
1661 EXPORT_SYMBOL(blk_max_pfn);
1662 EXPORT_SYMBOL(blk_seg_merge_ok);
1663 EXPORT_SYMBOL(blk_nohighio);