ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/blktap/blktap_datapath.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /******************************************************************************
2 * blktap_datapath.c
3 *
4 * XenLinux virtual block-device tap.
5 * Block request routing data path.
6 *
7 * Copyright (c) 2004, Andrew Warfield
8 * -- see full header in blktap.c
9 */
11 #include "blktap.h"
12 #include <asm-xen/evtchn.h>
14 /*-----[ The data paths ]-------------------------------------------------*/
16 /* Connection to a single backend domain. */
17 blkif_front_ring_t blktap_be_ring;
19 /*-----[ Tracking active requests ]---------------------------------------*/
21 /* this must be the same as MAX_PENDING_REQS in blkback.c */
22 #define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U)
24 active_req_t active_reqs[MAX_ACTIVE_REQS];
25 ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS];
26 spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
27 ACTIVE_RING_IDX active_prod, active_cons;
28 #define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
29 #define ACTIVE_IDX(_ar) (_ar - active_reqs)
30 #define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons)
32 inline active_req_t *get_active_req(void)
33 {
34 ACTIVE_RING_IDX idx;
35 active_req_t *ar;
36 unsigned long flags;
38 ASSERT(active_cons != active_prod);
40 spin_lock_irqsave(&active_req_lock, flags);
41 idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)];
42 ar = &active_reqs[idx];
43 spin_unlock_irqrestore(&active_req_lock, flags);
45 return ar;
46 }
48 inline void free_active_req(active_req_t *ar)
49 {
50 unsigned long flags;
52 spin_lock_irqsave(&active_req_lock, flags);
53 active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
54 spin_unlock_irqrestore(&active_req_lock, flags);
55 }
57 active_req_t *lookup_active_req(ACTIVE_RING_IDX idx)
58 {
59 return &active_reqs[idx];
60 }
62 void active_reqs_init(void)
63 {
64 ACTIVE_RING_IDX i;
66 active_cons = 0;
67 active_prod = MAX_ACTIVE_REQS;
68 memset(active_reqs, 0, sizeof(active_reqs));
69 for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
70 active_req_ring[i] = i;
71 }
73 /* Requests passing through the tap to the backend hijack the id field
74 * in the request message. In it we put the AR index _AND_ the fe domid.
75 * the domid is used by the backend to map the pages properly.
76 */
78 static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
79 {
80 return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
81 }
83 /*-----[ Ring helpers ]---------------------------------------------------*/
85 static void maybe_trigger_blktap_schedule(void);
87 inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp)
88 {
89 blkif_response_t *resp_d;
90 active_req_t *ar;
92 ar = &active_reqs[ID_TO_IDX(rsp->id)];
93 rsp->id = ar->id;
95 resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
96 blkif->blk_ring.rsp_prod_pvt);
97 memcpy(resp_d, rsp, sizeof(blkif_response_t));
98 wmb();
99 blkif->blk_ring.rsp_prod_pvt++;
101 blkif_put(ar->blkif);
102 free_active_req(ar);
104 return 0;
105 }
107 inline int write_req_to_be_ring(blkif_request_t *req)
108 {
109 blkif_request_t *req_d;
111 if ( blktap_be_state != BLKIF_STATE_CONNECTED ) {
112 WPRINTK("Tap trying to access an unconnected backend!\n");
113 return 0;
114 }
116 req_d = RING_GET_REQUEST(&blktap_be_ring,
117 blktap_be_ring.req_prod_pvt);
118 memcpy(req_d, req, sizeof(blkif_request_t));
119 wmb();
120 blktap_be_ring.req_prod_pvt++;
122 return 0;
123 }
125 void kick_fe_domain(blkif_t *blkif)
126 {
127 RING_PUSH_RESPONSES(&blkif->blk_ring);
128 notify_via_evtchn(blkif->evtchn);
129 DPRINTK("notified FE(dom %u)\n", blkif->domid);
131 /* We just feed up a batch of request slots... */
132 maybe_trigger_blktap_schedule();
134 }
136 void kick_be_domain(void)
137 {
138 if ( blktap_be_state != BLKIF_STATE_CONNECTED )
139 return;
141 wmb(); /* Ensure that the frontend can see the requests. */
142 RING_PUSH_REQUESTS(&blktap_be_ring);
143 notify_via_evtchn(blktap_be_evtchn);
144 DPRINTK("notified BE\n");
145 }
147 /*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
149 /*-----[ Scheduler list maint -from blkback ]--- */
151 static struct list_head blkio_schedule_list;
152 static spinlock_t blkio_schedule_list_lock;
154 static int __on_blkdev_list(blkif_t *blkif)
155 {
156 return blkif->blkdev_list.next != NULL;
157 }
159 static void remove_from_blkdev_list(blkif_t *blkif)
160 {
161 unsigned long flags;
162 if ( !__on_blkdev_list(blkif) ) return;
163 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
164 if ( __on_blkdev_list(blkif) )
165 {
166 list_del(&blkif->blkdev_list);
167 blkif->blkdev_list.next = NULL;
168 blkif_put(blkif);
169 }
170 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
171 }
173 static void add_to_blkdev_list_tail(blkif_t *blkif)
174 {
175 unsigned long flags;
176 if ( __on_blkdev_list(blkif) ) return;
177 spin_lock_irqsave(&blkio_schedule_list_lock, flags);
178 if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
179 {
180 list_add_tail(&blkif->blkdev_list, &blkio_schedule_list);
181 blkif_get(blkif);
182 }
183 spin_unlock_irqrestore(&blkio_schedule_list_lock, flags);
184 }
187 /*-----[ Scheduler functions - from blkback ]--- */
189 static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait);
191 static int do_block_io_op(blkif_t *blkif, int max_to_do);
193 static int blkio_schedule(void *arg)
194 {
195 DECLARE_WAITQUEUE(wq, current);
197 blkif_t *blkif;
198 struct list_head *ent;
200 daemonize(
201 "xentapd"
202 );
204 for ( ; ; )
205 {
206 /* Wait for work to do. */
207 add_wait_queue(&blkio_schedule_wait, &wq);
208 set_current_state(TASK_INTERRUPTIBLE);
209 if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) ||
210 list_empty(&blkio_schedule_list) )
211 schedule();
212 __set_current_state(TASK_RUNNING);
213 remove_wait_queue(&blkio_schedule_wait, &wq);
215 /* Queue up a batch of requests. */
216 while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) &&
217 !list_empty(&blkio_schedule_list) )
218 {
219 ent = blkio_schedule_list.next;
220 blkif = list_entry(ent, blkif_t, blkdev_list);
221 blkif_get(blkif);
222 remove_from_blkdev_list(blkif);
223 if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) )
224 add_to_blkdev_list_tail(blkif);
225 blkif_put(blkif);
226 }
227 }
228 }
230 static void maybe_trigger_blktap_schedule(void)
231 {
232 /*
233 * Needed so that two processes, who together make the following predicate
234 * true, don't both read stale values and evaluate the predicate
235 * incorrectly. Incredibly unlikely to stall the scheduler on x86, but...
236 */
237 smp_mb();
239 if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS/2)) &&
240 !list_empty(&blkio_schedule_list) )
241 wake_up(&blkio_schedule_wait);
242 }
244 void blkif_deschedule(blkif_t *blkif)
245 {
246 remove_from_blkdev_list(blkif);
247 }
249 void __init blkdev_schedule_init(void)
250 {
251 spin_lock_init(&blkio_schedule_list_lock);
252 INIT_LIST_HEAD(&blkio_schedule_list);
254 if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 )
255 BUG();
256 }
258 /*-----[ Interrupt entry from a frontend ]------ */
260 irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
261 {
262 blkif_t *blkif = dev_id;
264 add_to_blkdev_list_tail(blkif);
265 maybe_trigger_blktap_schedule();
266 return IRQ_HANDLED;
267 }
269 /*-----[ Other Frontend Ring functions ]-------- */
271 /* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/
272 static int do_block_io_op(blkif_t *blkif, int max_to_do)
273 {
274 /* we have pending messages from the real frontend. */
276 blkif_request_t *req_s;
277 RING_IDX i, rp;
278 unsigned long flags;
279 active_req_t *ar;
280 int more_to_do = 0;
281 int notify_be = 0, notify_user = 0;
283 /* lock both rings */
284 spin_lock_irqsave(&blkif_io_lock, flags);
286 rp = blkif->blk_ring.sring->req_prod;
287 rmb();
289 for ( i = blkif->blk_ring.req_cons;
290 (i != rp) &&
291 !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
292 i++ )
293 {
295 if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS))
296 {
297 more_to_do = 1;
298 break;
299 }
301 req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
302 /* This is a new request:
303 * Assign an active request record, and remap the id.
304 */
305 ar = get_active_req();
306 ar->id = req_s->id;
307 ar->nr_pages = req_s->nr_segments;
308 blkif_get(blkif);
309 ar->blkif = blkif;
310 req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
311 /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */
313 /* FE -> BE interposition point is here. */
315 /* ------------------------------------------------------------- */
316 /* BLKIF_OP_PROBE_HACK: */
317 /* Signal to the backend that we are a tap domain. */
319 if (req_s->operation == BLKIF_OP_PROBE) {
320 DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n");
321 req_s->frame_and_sects[1] = BLKTAP_COOKIE;
322 }
324 /* ------------------------------------------------------------- */
326 /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
327 if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
328 (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
330 /* Copy the response message to UFERing */
331 /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
332 /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
334 DPRINTK("req->UFERing\n");
335 blktap_write_fe_ring(req_s);
336 notify_user = 1;
337 }
339 /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
340 if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
341 (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
343 /* be included to prevent noise from the fe when its off */
344 /* copy the request message to the BERing */
346 DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
347 (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
348 (unsigned)blktap_be_ring.req_prod_pvt &
349 (RING_SIZE((&blktap_be_ring)-1)));
351 write_req_to_be_ring(req_s);
352 notify_be = 1;
353 }
354 }
356 blkif->blk_ring.req_cons = i;
358 /* unlock rings */
359 spin_unlock_irqrestore(&blkif_io_lock, flags);
361 if (notify_user)
362 blktap_kick_user();
363 if (notify_be)
364 kick_be_domain();
366 return more_to_do;
367 }
369 /*-----[ Data to/from Backend (server) VM ]------------------------------*/
372 irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
373 struct pt_regs *ptregs)
374 {
375 blkif_response_t *resp_s;
376 blkif_t *blkif;
377 RING_IDX rp, i;
378 unsigned long flags;
380 DPRINTK("PT got BE interrupt.\n");
382 /* lock both rings */
383 spin_lock_irqsave(&blkif_io_lock, flags);
385 rp = blktap_be_ring.sring->rsp_prod;
386 rmb();
388 for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
389 {
390 resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
392 /* BE -> FE interposition point is here. */
394 blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif;
396 /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
397 if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
398 (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
400 /* Copy the response message to UBERing */
401 /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
402 /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
404 DPRINTK("rsp->UBERing\n");
405 blktap_write_be_ring(resp_s);
406 blktap_kick_user();
408 }
410 /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
411 if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
412 (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
414 /* (fe included to prevent random interference from the BE) */
415 /* Copy the response message to FERing */
417 DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
418 (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
419 (unsigned)blkif->blk_ring.rsp_prod_pvt &
420 (RING_SIZE((&blkif->blk_ring)-1)));
422 write_resp_to_fe_ring(blkif, resp_s);
423 kick_fe_domain(blkif);
425 }
426 }
428 blktap_be_ring.rsp_cons = i;
431 spin_unlock_irqrestore(&blkif_io_lock, flags);
433 return IRQ_HANDLED;
434 }
436 /* Debug : print the current ring indices. */
438 void print_be_ring_idxs(void)
439 {
440 if (blktap_be_ring.sring != NULL) {
441 WPRINTK("BE Ring: \n--------\n");
442 WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d "
443 "| req_prod: %2d, rsp_prod: %2d\n",
444 blktap_be_ring.rsp_cons,
445 blktap_be_ring.req_prod_pvt,
446 blktap_be_ring.sring->req_prod,
447 blktap_be_ring.sring->rsp_prod);
448 }
449 }