ia64/xen-unstable

changeset 221:a9945f054eb9

bitkeeper revision 1.79 (3e5537e5Vnw-9Lpy24SyRJVm4xxZ3w)

xl_block.c, xen_block.c, process.c:
Fixes to Xenolinux blkdev handling. Still rather broken though :-(
newdom:
Change mode to -rwxrwxr-x
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Feb 20 20:17:41 2003 +0000 (2003-02-20)
parents 7f19fcd72411
children 7d86ebeca986
files tools/domain_builder/newdom xen-2.4.16/arch/i386/process.c xen-2.4.16/drivers/block/xen_block.c xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c
line diff
     2.1 --- a/xen-2.4.16/arch/i386/process.c	Thu Feb 20 16:02:49 2003 +0000
     2.2 +++ b/xen-2.4.16/arch/i386/process.c	Thu Feb 20 20:17:41 2003 +0000
     2.3 @@ -19,6 +19,7 @@
     2.4  #include <xeno/smp.h>
     2.5  #include <asm/ptrace.h>
     2.6  #include <xeno/delay.h>
     2.7 +#include <xeno/interrupt.h>
     2.8  #include <asm/mc146818rtc.h>
     2.9  
    2.10  #include <asm/system.h>
    2.11 @@ -52,7 +53,7 @@ static void default_idle(void)
    2.12  {
    2.13      if (!hlt_counter) {
    2.14          __cli();
    2.15 -        if (!current->hyp_events)
    2.16 +        if (!current->hyp_events && !softirq_pending(smp_processor_id()))
    2.17              safe_halt();
    2.18          else
    2.19              __sti();
    2.20 @@ -67,6 +68,8 @@ static void default_idle(void)
    2.21   */
    2.22  void cpu_idle (void)
    2.23  {
    2.24 +    int cpu = smp_processor_id();
    2.25 +
    2.26      ASSERT(current->domain == IDLE_DOMAIN_ID);
    2.27  
    2.28      current->has_cpu = 1;
    2.29 @@ -82,9 +85,10 @@ void cpu_idle (void)
    2.30  
    2.31      for ( ; ; )
    2.32      {
    2.33 -        while (!current->hyp_events)
    2.34 +        while (!current->hyp_events && !softirq_pending(cpu))
    2.35              default_idle();
    2.36          do_hyp_events();
    2.37 +        do_softirq();
    2.38      }
    2.39  }
    2.40  
     3.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 16:02:49 2003 +0000
     3.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Thu Feb 20 20:17:41 2003 +0000
     3.3 @@ -251,14 +251,20 @@ static void dispatch_probe_block_io(stru
     3.4      extern void ide_probe_devices(xen_disk_info_t *xdi);
     3.5      blk_ring_t *blk_ring = p->blk_ring_base;
     3.6      xen_disk_info_t *xdi;
     3.7 +    unsigned long flags, cpu_mask;
     3.8      
     3.9      xdi = phys_to_virt((unsigned long)blk_ring->req_ring[index].buffer);
    3.10      
    3.11      ide_probe_devices(xdi);
    3.12  
    3.13 +    spin_lock_irqsave(&p->blk_ring_lock, flags);
    3.14      blk_ring->resp_ring[blk_ring->resp_prod].id = blk_ring->req_ring[index].id;
    3.15      blk_ring->resp_ring[blk_ring->resp_prod].status = 0;
    3.16      blk_ring->resp_prod = BLK_RESP_RING_INC(blk_ring->resp_prod);
    3.17 +    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
    3.18 +
    3.19 +    cpu_mask = mark_guest_event(p, _EVENT_BLK_RESP);
    3.20 +    guest_event_notify(cpu_mask); 
    3.21  }
    3.22  
    3.23  static void dispatch_rw_block_io(struct task_struct *p, int index)
     4.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 16:02:49 2003 +0000
     4.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/block/xl_block.c	Thu Feb 20 20:17:41 2003 +0000
     4.3 @@ -55,6 +55,13 @@ xlblk_device_t xlblk_device;
     4.4  #define XLBLK_DEBUG       0
     4.5  #define XLBLK_DEBUG_IOCTL 0
     4.6  
     4.7 +/* Our per-request identifier is a discriminated union, using LSB. */
     4.8 +#define BH_TO_ID(_bh)   ((void *)(_bh))
     4.9 +#define REQ_TO_ID(_req) ((void *)((unsigned long)(_req) | 1))
    4.10 +#define ID_IS_REQ(_id)  ((int)(_id) & 1)
    4.11 +#define ID_TO_BH(_id)   ((struct buffer_head *)(_id))
    4.12 +#define ID_TO_REQ(_id)  ((struct request *)((unsigned long)(_id) & ~1))
    4.13 +
    4.14  static blk_ring_t *blk_ring;
    4.15  
    4.16  /* 
    4.17 @@ -208,8 +215,7 @@ void hypervisor_request(void *         i
    4.18      void *buffer_pa, *buffer_ma; 
    4.19      kdev_t phys_device = (kdev_t) 0;
    4.20      unsigned long sector_number = 0;
    4.21 -    struct gendisk *gd; 
    4.22 -    
    4.23 +    struct gendisk *gd;     
    4.24  
    4.25      buffer_pa = (void *)virt_to_phys(buffer); 
    4.26      buffer_ma = (void *)phys_to_machine((unsigned long)buffer_pa); 
    4.27 @@ -222,7 +228,6 @@ void hypervisor_request(void *         i
    4.28  
    4.29  	/*
    4.30  	 * map logial major device to the physical device number 
    4.31 -	 *
    4.32  	 *           XLBLK_MAJOR -> IDE0_MAJOR  (123 -> 3)
    4.33  	 */
    4.34  	if (MAJOR(device) == XLBLK_MAJOR) 
    4.35 @@ -234,14 +239,11 @@ void hypervisor_request(void *         i
    4.36  	}
    4.37  
    4.38  	/*
    4.39 -	 * compute real buffer location on disk
    4.40 -	 * (from ll_rw_block.c::submit_bh)
    4.41 +	 * compute real buffer location on disk (from ll_rw_block.c::submit_bh)
    4.42  	 */
    4.43 +	sector_number = block_number;
    4.44  
    4.45 -
    4.46 -	sector_number = block_number /* * block_size >> 9 */;
    4.47 -
    4.48 -	if((gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL)
    4.49 +	if ( (gd = (struct gendisk *)xen_disk_info.disks[0].gendisk) != NULL )
    4.50  	    sector_number += gd->part[MINOR(device)&IDE_PARTN_MASK].start_sect;
    4.51      }
    4.52  
    4.53 @@ -265,8 +267,8 @@ void hypervisor_request(void *         i
    4.54  
    4.55      blk_ring->req_prod = BLK_REQ_RING_INC(blk_ring->req_prod);
    4.56  
    4.57 -    switch(mode) { 
    4.58 -
    4.59 +    switch ( mode )
    4.60 +    { 
    4.61      case XEN_BLOCK_SYNC:  
    4.62  	/* trap into hypervisor */
    4.63  	HYPERVISOR_block_io_op();
    4.64 @@ -299,49 +301,53 @@ void hypervisor_request(void *         i
    4.65  static void do_xlblk_request (request_queue_t *rq)
    4.66  {
    4.67      struct request *req;
    4.68 +    struct buffer_head *bh;
    4.69 +    unsigned long offset;
    4.70 +    unsigned long length;
    4.71 +    int rw, nsect;
    4.72      
    4.73 -    if (XLBLK_DEBUG)
    4.74 +    if ( XLBLK_DEBUG )
    4.75  	printk (KERN_ALERT "xlblk.c::do_xlblk_request for '%s'\n", 
    4.76  		DEVICE_NAME); 
    4.77 -    
    4.78 -    while (!QUEUE_EMPTY)
    4.79 +
    4.80 +    /*
    4.81 +     * XXXXXXX KAF: This is really inefficient!!!!
    4.82 +     * 
    4.83 +     * What we really want is a scatter/gather interface, where each 
    4.84 +     * request maps onto one scatter/gather descriptor.
    4.85 +     * 
    4.86 +     * We then don't need to worry about buffer_heads getting serviced out
    4.87 +     * of order (because we get one reponse when an entire request is done).
    4.88 +     * 
    4.89 +     * We should look at SCSI code to see how to queue multiple requests
    4.90 +     * at once. Quite likely we'll have to take charge of the requests and
    4.91 +     * peel them off of the request_queue.
    4.92 +     * 
    4.93 +     * This is all for another day :-)
    4.94 +     * 
    4.95 +     * Just bear in mind that we'd like the following to be a loop!
    4.96 +     */
    4.97 +    /* while*/ if ( !QUEUE_EMPTY )
    4.98      {
    4.99 -	struct buffer_head *bh;
   4.100 -	unsigned long offset;
   4.101 -	unsigned long length;
   4.102 -	int rw;
   4.103 -	
   4.104 -	if(rq->plugged) 
   4.105 -	    return ; 
   4.106 -	
   4.107  	req = CURRENT;
   4.108 -	
   4.109 +	if ( rq->plugged || (req == NULL) ) return; 
   4.110 +		
   4.111  	if (XLBLK_DEBUG) 
   4.112  	    printk (KERN_ALERT
   4.113 -		    "do_xlblk_request %p: cmd %i, sec %lx, (%li) bh:%p\n",
   4.114 +		    "do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
   4.115  		    req, req->cmd, req->sector,
   4.116 -		    req->current_nr_sectors, req->bh);
   4.117 +		    req->current_nr_sectors, req->nr_sectors, req->bh);
   4.118  	
   4.119 -	/* is there space in the tx ring for this request?
   4.120 -	 * if the ring is full, then leave the request in the queue
   4.121 -	 *
   4.122 -	 * THIS IS A BIT BOGUS SINCE XEN COULD BE UPDATING REQ_CONS
   4.123 -	 * AT THE SAME TIME
   4.124 -	 */
   4.125 -        if (BLK_RESP_RING_INC(blk_ring->req_prod) == blk_ring->req_cons)
   4.126 -        {
   4.127 -            printk (KERN_ALERT "OOPS, TX LOOKS FULL  cons: %d  prod: %d\n",
   4.128 -                    blk_ring->req_cons, blk_ring->req_prod);
   4.129 -            BUG(); 
   4.130 -            break;
   4.131 -        }
   4.132  	
   4.133  	req->errors = 0;
   4.134 -	blkdev_dequeue_request(req);
   4.135 -	
   4.136 -	bh = req->bh;
   4.137 -	
   4.138 -	while (bh)
   4.139 +
   4.140 +        bh = req->bh;
   4.141 +        /*
   4.142 +         * XXX KAF: I get read errors if I turn the following into a loop.
   4.143 +         * Why doesn't it work? I should even be able to handle out-of-order
   4.144 +         * responses... :-(
   4.145 +         */
   4.146 +        /* while */ if ( bh != NULL )
   4.147  	{
   4.148  	    offset = bh->b_rsector << 9;
   4.149  	    length = bh->b_size;
   4.150 @@ -354,18 +360,37 @@ static void do_xlblk_request (request_qu
   4.151  		BUG();
   4.152  	    }
   4.153  
   4.154 -	    hypervisor_request (req, rw == READ ? 
   4.155 -				XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   4.156 -				bh->b_data, bh->b_rsector, bh->b_size, 
   4.157 -				bh->b_dev, XEN_BLOCK_SYNC);
   4.158 -	    bh = bh->b_reqnext;
   4.159 -	}
   4.160 +            if ( bh->b_reqnext != NULL )
   4.161 +            {
   4.162 +                hypervisor_request(
   4.163 +                    BH_TO_ID(bh),
   4.164 +                    rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   4.165 +                    bh->b_data, bh->b_rsector, bh->b_size, 
   4.166 +                    bh->b_dev, XEN_BLOCK_SYNC);
   4.167  
   4.168 -	blkdev_dequeue_request(req);
   4.169 -
   4.170 +                /* From ll_rw_blk.c:end_that_request_first(). */
   4.171 +                nsect = bh->b_size >> 9;
   4.172 +                req->bh = bh->b_reqnext;
   4.173 +                bh->b_reqnext = NULL;
   4.174 +                bh = req->bh;
   4.175 +                req->hard_sector += nsect;
   4.176 +                req->hard_nr_sectors -= nsect;
   4.177 +                req->sector = req->hard_sector;
   4.178 +                req->nr_sectors = req->hard_nr_sectors;
   4.179 +                req->current_nr_sectors = bh->b_size >> 9;
   4.180 +                req->buffer = bh->b_data;
   4.181 +            }
   4.182 +            else
   4.183 +            {
   4.184 +                hypervisor_request(
   4.185 +                    REQ_TO_ID(req),
   4.186 +                    rw == READ ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
   4.187 +                    bh->b_data, bh->b_rsector, bh->b_size, 
   4.188 +                    bh->b_dev, XEN_BLOCK_SYNC);
   4.189 +                bh = NULL;
   4.190 +            }
   4.191 +        }
   4.192      }
   4.193 -
   4.194 -    return;
   4.195  }
   4.196  
   4.197  
   4.198 @@ -382,26 +407,43 @@ static void xlblk_response_int(int irq, 
   4.199  {
   4.200      struct request *req;
   4.201      int loop;
   4.202 -    u_long flags; 
   4.203 +    unsigned long flags; 
   4.204 +    struct buffer_head *bh;
   4.205      
   4.206 -    for (loop = blk_ring->resp_cons;
   4.207 -	 loop != blk_ring->resp_prod;
   4.208 -	 loop = BLK_RESP_RING_INC(loop)) {
   4.209 +    spin_lock_irqsave(&io_request_lock, flags);	    
   4.210  
   4.211 +    for ( loop = blk_ring->resp_cons;
   4.212 +	  loop != blk_ring->resp_prod;
   4.213 +	  loop = BLK_RESP_RING_INC(loop) )
   4.214 +    {
   4.215  	blk_ring_resp_entry_t *bret = &blk_ring->resp_ring[loop];
   4.216 -	
   4.217 -	req = (struct request *)bret->id;
   4.218 -        if ( req == NULL ) continue; /* probes have NULL id */
   4.219 +
   4.220 +        if ( bret->id == NULL ) continue; /* probes have NULL id */
   4.221  
   4.222 -	spin_lock_irqsave(&io_request_lock, flags);
   4.223 -	    
   4.224 -	if (!end_that_request_first(req, 1, "XenBlk"))
   4.225 -	    end_that_request_last(req);
   4.226 -	spin_unlock_irqrestore(&io_request_lock, flags);
   4.227 -	
   4.228 +	if ( ID_IS_REQ(bret->id) )
   4.229 +        {
   4.230 +            req = ID_TO_REQ(bret->id);
   4.231 +            if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
   4.232 +            blkdev_dequeue_request(req);
   4.233 +            end_that_request_last(req);
   4.234 +	}
   4.235 +        else
   4.236 +        {
   4.237 +            bh = ID_TO_BH(bret->id);
   4.238 +            bh->b_end_io(bh, 1);
   4.239 +        }
   4.240      }
   4.241      
   4.242      blk_ring->resp_cons = loop;
   4.243 +
   4.244 +    /*
   4.245 +     * KAF: I believe this is safe. It also appears to be necessary, if
   4.246 +     * we left any data outstanding when welast exited do_xlblk_request.
   4.247 +     * Otherwise we just hang...
   4.248 +     */
   4.249 +    do_xlblk_request(BLK_DEFAULT_QUEUE(MAJOR_NR));
   4.250 +    
   4.251 +    spin_unlock_irqrestore(&io_request_lock, flags);
   4.252  }
   4.253  
   4.254  
   4.255 @@ -427,11 +469,13 @@ int __init xlblk_init(void)
   4.256  
   4.257      hypervisor_request(NULL, XEN_BLOCK_PROBE, (char *) &xen_disk_info,
   4.258  		       0, 0, (kdev_t) 0, XEN_BLOCK_SYNC);
   4.259 -    for (loop = 0; loop < xen_disk_info.count; loop++) 
   4.260 +    while ( blk_ring->resp_prod != 1 ) barrier();
   4.261 +    for ( loop = 0; loop < xen_disk_info.count; loop++ )
   4.262 +    { 
   4.263  	printk (KERN_ALERT "  %2d: type: %d, capacity: %ld\n",
   4.264  		loop, xen_disk_info.disks[loop].type, 
   4.265  		xen_disk_info.disks[loop].capacity);
   4.266 -
   4.267 +    }
   4.268      
   4.269      SET_MODULE_OWNER(&xenolinux_block_fops);
   4.270      result = register_blkdev(xlblk_major, "block", &xenolinux_block_fops);
   4.271 @@ -457,12 +501,16 @@ int __init xlblk_init(void)
   4.272      max_sectors[xlblk_major]   = xlblk_max_sectors;
   4.273  
   4.274      blk_init_queue(BLK_DEFAULT_QUEUE(xlblk_major), do_xlblk_request);
   4.275 -    /* 
   4.276 -    ** XXX SMH: we don't leave req on queue => are happy for evelator
   4.277 -    ** to reorder things including it. (main reason for this decision
   4.278 -    ** is that it works while 'standard' case doesn't. Ho hum). 
   4.279 -    */
   4.280 +
   4.281 +#if 0 /* KAF: We now do the default thing and leave requests on the queue. */
   4.282 +    /*
   4.283 +     * XXX KAF (again): see big XXX comment above. As per SCSI code, we'll
   4.284 +     * probably add this in so that we can peel off multiple outstanding
   4.285 +     * requests from teh request queue, giving us easy access to the 
   4.286 +     * real head that still has work to be sent down to Xen.
   4.287 +     */
   4.288      blk_queue_headactive(BLK_DEFAULT_QUEUE(xlblk_major), 0);
   4.289 +#endif
   4.290  
   4.291      xlblk_ide_register_disk(0, xen_disk_info.disks[0].capacity);
   4.292