]> xenbits.xensource.com Git - people/dstodden/blktap.git/commitdiff
CP-1734: bug fixes (for failover from mirror mode on ENOSPC)
authorAndrei Lifchits <andrei.lifchits@citrix.com>
Tue, 17 Aug 2010 11:38:32 +0000 (12:38 +0100)
committerAndrei Lifchits <andrei.lifchits@citrix.com>
Tue, 17 Aug 2010 11:38:32 +0000 (12:38 +0100)
drivers/tapdisk-vbd.c
drivers/tapdisk-vbd.h
drivers/tapdisk.h

index 7c9e3e812560b4cd4f98361b657a0b44f74e06b1..e20fbde8e356c06843815561bd43437a83e2c890 100644 (file)
@@ -199,6 +199,12 @@ tapdisk_vbd_close_vdi(td_vbd_t *vbd)
                DPRINTF("Secondary image closed\n");
        }
 
+       if (vbd->retired) {
+               td_close(vbd->retired);
+               tapdisk_image_free(vbd->retired);
+               DPRINTF("Retired mirror image closed\n");
+       }
+
        INIT_LIST_HEAD(&vbd->images);
        td_flag_set(vbd->state, TD_VBD_CLOSED);
 }
@@ -375,6 +381,7 @@ fail:
 
 done:
        vbd->secondary = second;
+       leaf->flags |= TD_IGNORE_ENOSPC;
        if (td_flag_test(vbd->flags, TD_OPEN_STANDBY)) {
                DPRINTF("In standby mode\n");
                vbd->secondary_mode = TD_VBD_SECONDARY_STANDBY;
@@ -1748,17 +1755,23 @@ tapdisk_vbd_complete_td_request(td_request_t treq, int res)
 
        tapdisk_vbd_mark_progress(vbd);
 
-       if (abs(res) == ENOSPC && image != vbd->secondary &&
-                       vbd->secondary_mode != TD_VBD_SECONDARY_DISABLED) {
-               if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR)
+       if (abs(res) == ENOSPC && td_flag_test(image->flags,
+                               TD_IGNORE_ENOSPC)) {
+               res = 0;
+               leaf = tapdisk_vbd_first_image(vbd);
+               if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR) {
                        DPRINTF("ENOSPC: disabling mirroring\n");
-               else if (vbd->secondary_mode == TD_VBD_SECONDARY_STANDBY)
+                       list_del_init(&leaf->next);
+                       vbd->retired = leaf;
+               } else if (vbd->secondary_mode == TD_VBD_SECONDARY_STANDBY) {
                        DPRINTF("ENOSPC: failing over to secondary image\n");
-               leaf = tapdisk_vbd_first_image(vbd);
-               list_add(&vbd->secondary->next, leaf->next.prev);
-               vbd->secondary = NULL;
-               vbd->secondary_mode = TD_VBD_SECONDARY_DISABLED;
-               signal_enospc(vbd);
+                       list_add(&vbd->secondary->next, leaf->next.prev);
+               }
+               if (vbd->secondary_mode != TD_VBD_SECONDARY_DISABLED) {
+                       vbd->secondary = NULL;
+                       vbd->secondary_mode = TD_VBD_SECONDARY_DISABLED;
+                       signal_enospc(vbd);
+               }
        }
 
        DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08llx "
@@ -1783,9 +1796,15 @@ tapdisk_vbd_submit_request(td_vbd_t *vbd, blkif_request_t *req,
        switch (req->operation) {
        case BLKIF_OP_WRITE:
                treq.op = TD_OP_WRITE;
-               td_queue_write(treq.image, treq);
+               /* it's important to queue the mirror request before queuing 
+                * the main one. If the main image runs into ENOSPC, the 
+                * mirroring could be disabled before td_queue_write returns, 
+                * so if the mirror request was queued after (which would then 
+                * not happen), we'd lose that write and cause the process to 
+                * hang with unacknowledged writes */
                if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR)
                        queue_mirror_req(vbd, treq);
+               td_queue_write(treq.image, treq);
                break;
 
        case BLKIF_OP_READ:
@@ -1862,11 +1881,6 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
                    "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs,
                    treq.buf, (int)req->operation);
 
-               if (i == req->nr_segments - 1) {
-                       tapdisk_vbd_submit_request(vbd, req, treq);
-                       treq_started = 0;
-               }
-
                vreq->secs_pending += nsects;
                vbd->secs_pending  += nsects;
                if (vbd->secondary_mode == TD_VBD_SECONDARY_MIRROR &&
@@ -1875,6 +1889,11 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
                        vbd->secs_pending  += nsects;
                }
 
+               if (i == req->nr_segments - 1) {
+                       tapdisk_vbd_submit_request(vbd, req, treq);
+                       treq_started = 0;
+               }
+
                sector_nr += nsects;
        }
 
index 1109d078034a3036d4f6832c471e51dba5cdb5c1..935b2414ce0656f860a326b216551c4412e16a7d 100644 (file)
@@ -107,6 +107,13 @@ struct td_vbd_handle {
        td_image_t                 *secondary;
        uint8_t                     secondary_mode;
 
+       /* when we encounter ENOSPC on the primary leaf image in mirror mode, 
+        * we need to remove it from the VBD chain so that writes start going 
+        * on the secondary leaf. However, we cannot free the image at that 
+        * time since it might still have in-flight treqs referencing it.  
+        * Therefore, we move it into 'retired' until shutdown. */
+       td_image_t                 *retired;
+
        struct list_head            new_requests;
        struct list_head            pending_requests;
        struct list_head            failed_requests;
index 743f30d8860d92e11465c9148fc0987710753d0f..494b7dcb7faf9ab0b8e4a00ee301e2c44591c9d2 100644 (file)
@@ -91,6 +91,7 @@
 #define TD_OPEN_REUSE_PARENT         0x00200
 #define TD_OPEN_SECONDARY            0x00400
 #define TD_OPEN_STANDBY              0x00800
+#define TD_IGNORE_ENOSPC             0x01000
 
 #define TD_CREATE_SPARSE             0x00001
 #define TD_CREATE_MULTITYPE          0x00002