let static_vdis = "/opt/xensource/bin/static-vdis"
(** Generate the static configuration and attach the VDI now *)
-let permanent_vdi_attach ~__context ~vdi ~reason =
- info "permanent_vdi_attach: vdi = %s; sr = %s"
- (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
- Helpers.call_script static_vdis
- [ "add"; Db.VDI.get_uuid ~__context ~self:vdi; reason ];
- (* VDI will be attached on next boot; attach it now too *)
- String.rtrim (Helpers.call_script static_vdis
- [ "attach"; Db.VDI.get_uuid ~__context ~self:vdi ])
+let permanent_vdi_attach ~__context ~vdi ~reason =
+ info "permanent_vdi_attach: vdi = %s; sr = %s"
+ (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
+ ignore (Helpers.call_script static_vdis [ "add"; Db.VDI.get_uuid ~__context ~self:vdi; reason ]);
+ (* VDI will be attached on next boot; attach it now too *)
+ String.rtrim (Helpers.call_script static_vdis
+ [ "attach"; Db.VDI.get_uuid ~__context ~self:vdi ])
- (** Detach the VDI (by reference) now and destroy the static configuration *)
-let permanent_vdi_detach ~__context ~vdi =
- info "permanent_vdi_detach: vdi = %s; sr = %s"
- (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
- Sm.call_sm_vdi_functions ~__context ~vdi
- (fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi);
- ignore(Helpers.call_script static_vdis
- [ "del"; Db.VDI.get_uuid ~__context ~self:vdi ])
+(** Detach the VDI (by reference) now and destroy the static configuration *)
+let permanent_vdi_detach ~__context ~vdi =
+ info "permanent_vdi_detach: vdi = %s; sr = %s"
+ (Ref.string_of vdi) (Ref.string_of (Db.VDI.get_SR ~__context ~self:vdi));
+ Sm.call_sm_vdi_functions ~__context ~vdi
+ (fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi);
+ ignore(Helpers.call_script static_vdis
+ [ "del"; Db.VDI.get_uuid ~__context ~self:vdi ])
(** Detach the VDI (by uuid) now and destroy the static configuration *)
-let permanent_vdi_detach_by_uuid ~__context ~uuid =
- info "permanent_vdi_detach: vdi-uuid = %s" uuid;
- begin
- try
- (* This might fail because the VDI has been destroyed *)
- let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
- Sm.call_sm_vdi_functions ~__context ~vdi
- (fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi)
- with e ->
- warn "Ignoring exception calling SM vdi_detach for VDI uuid %s: %s (possibly VDI has been deleted while we were offline" uuid (ExnHelper.string_of_exn e)
- end;
- ignore(Helpers.call_script static_vdis [ "del"; uuid ])
+let permanent_vdi_detach_by_uuid ~__context ~uuid =
+ info "permanent_vdi_detach: vdi-uuid = %s" uuid;
+ begin
+ try
+ (* This might fail because the VDI has been destroyed *)
+ let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
+ Sm.call_sm_vdi_functions ~__context ~vdi
+ (fun srconf srtype sr -> Sm.vdi_detach srconf srtype sr vdi)
+ with e ->
+ warn "Ignoring exception calling SM vdi_detach for VDI uuid %s: %s (possibly VDI has been deleted while we were offline" uuid (ExnHelper.string_of_exn e)
+ end;
+ ignore(Helpers.call_script static_vdis [ "del"; uuid ])
+
+(** Added for CA-48539. Deactivates a vdi. You should probably follow
+ this call with one of the previous vdi_detach functions. *)
+let permanent_vdi_deactivate_by_uuid ~__context ~uuid =
+ info "permanent_vdi_detach: vdi-uuid = %s" uuid ;
+ try
+ let vdi = Db.VDI.get_by_uuid ~__context ~uuid in
+ Sm.call_sm_vdi_functions ~__context ~vdi
+ (fun srconf srtype sr -> Sm.vdi_deactivate srconf srtype sr vdi)
+ with e ->
+ warn "Ignoring exception calling SM vdi_deactivate for VDI uuid %s: %s (possibly VDI has been deleted while we were offline"
+ uuid
+ (ExnHelper.string_of_exn e)
(** Detaches and removes records for VDIs which have been deleted *)
-let gc () =
- Server_helpers.exec_with_new_task "GCing on-boot VDIs" (fun __context ->
- List.iter
- (fun vdi ->
- let exists = try ignore(Db.VDI.get_by_uuid ~__context ~uuid:vdi.uuid); true with _ -> false in
- if not(exists) then begin
- warn "static-vdi %s cannot be found in database; removing on-boot configuration" vdi.uuid;
- (* NB we can't call the SM functions since the record has gone *)
- ignore(Helpers.call_script static_vdis [ "del"; vdi.uuid ])
- end
- ) (list ()))
+let gc () =
+ Server_helpers.exec_with_new_task "GCing on-boot VDIs" (fun __context ->
+ List.iter
+ (fun vdi ->
+ let exists = try ignore(Db.VDI.get_by_uuid ~__context ~uuid:vdi.uuid); true with _ -> false in
+ if not(exists) then begin
+ warn "static-vdi %s cannot be found in database; removing on-boot configuration" vdi.uuid;
+ (* NB we can't call the SM functions since the record has gone *)
+ ignore(Helpers.call_script static_vdis [ "del"; vdi.uuid ])
+ end
+ ) (list ()))
(** If we just rebooted and failed to attach our static VDIs then this can be called to reattempt the attach:
- this is necessary for HA to start. *)
-let reattempt_on_boot_attach () =
- let script = "/etc/init.d/attach-static-vdis" in
- try
- ignore(Helpers.call_script script [ "start" ])
- with e ->
- warn "Attempt to reattach static VDIs via '%s start' failed: %s" script (ExnHelper.string_of_exn e)
+ this is necessary for HA to start. *)
+let reattempt_on_boot_attach () =
+ let script = "/etc/init.d/attach-static-vdis" in
+ try
+ ignore(Helpers.call_script script [ "start" ])
+ with e ->
+ warn "Attempt to reattach static VDIs via '%s start' failed: %s" script (ExnHelper.string_of_exn e)
let path = Filename.concat main_dir x in
let uuid = Unixext.string_of_file (Filename.concat path "vdi-uuid") in
let reason = Unixext.string_of_file (Filename.concat path "reason") in
- let bool_of_string x = String.lowercase x = "true" in
+ (* let bool_of_string x = String.lowercase x = "true" in *)
let delete_next_boot =
try ignore(Unix.stat (Filename.concat path "delete-next-boot")); true
with _ -> false in
let call_script ?log_successful_output script args =
try
Mutex.execute ha_script_m (fun () -> Helpers.call_script ?log_successful_output script args)
- with Forkhelpers.Spawn_internal_error(stderr, stdout, Unix.WEXITED n) as e ->
+ with Forkhelpers.Spawn_internal_error(stderr, stdout, Unix.WEXITED n) ->
let code = Xha_errno.of_int n in
warn "%s %s returned %s (%s)" script (String.concat " " args)
(Xha_errno.to_string code) (Xha_errno.to_description_string code);
String_unmarshall_helper.map (fun x -> x) (fun x -> x) v
(** Without using the Pool's database, returns the IP address of a particular host
- named by UUID. *)
+ named by UUID. *)
let address_of_host_uuid uuid =
let table = get_uuid_to_ip_mapping () in
if not(List.mem_assoc uuid table) then begin
end else List.assoc uuid table
(** Without using the Pool's database, returns the UUID of a particular host named by
- heartbeat IP address. This is only necesary because the liveset info doesn't include
- the host IP address *)
+ heartbeat IP address. This is only necesary because the liveset info doesn't include
+ the host IP address *)
let uuid_of_host_address address =
let table = List.map (fun (k, v) -> v, k) (get_uuid_to_ip_mapping ()) in
if not(List.mem_assoc address table) then begin
end else List.assoc address table
(** Called in two circumstances:
- 1. When I started up I thought I was the master but my proposal was rejected by the
- heartbeat component.
- 2. I was happily running as someone's slave but they left the liveset.
+ 1. When I started up I thought I was the master but my proposal was rejected by the
+ heartbeat component.
+ 2. I was happily running as someone's slave but they left the liveset.
*)
let on_master_failure () =
(* The plan is: keep asking if I should be the master. If I'm rejected then query the
- live set and see if someone else has been marked as master, if so become a slave of them. *)
+ live set and see if someone else has been marked as master, if so become a slave of them. *)
let become_master () =
info "This node will become the master";
let process_liveset_on_master liveset =
let pool = Helpers.get_pool ~__context in
let to_tolerate = Int64.to_int (Db.Pool.get_ha_host_failures_to_tolerate ~__context ~self:pool) in
- let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in
+ (* let planned_for = Int64.to_int (Db.Pool.get_ha_plan_exists_for ~__context ~self:pool) in *)
(* First consider whether VM failover actions need to happen.
Convert the liveset into a list of Host references used by the VM failover code *)
(** Called when xapi restarts: server may be in emergency mode at this point. We need
- to inspect the local configuration and if HA is supposed to be armed we need to
- set everything up.
- Note that
- the master shouldn't be able to activate HA while we are offline since that would cause
- us to come up with a broken configuration (the enable-HA stage has the critical task of
- synchronising the HA configuration on all the hosts). So really we only want to notice
- if the Pool has had HA disabled while we were offline. *)
+ to inspect the local configuration and if HA is supposed to be armed we need to
+ set everything up.
+ Note that
+ the master shouldn't be able to activate HA while we are offline since that would cause
+ us to come up with a broken configuration (the enable-HA stage has the critical task of
+ synchronising the HA configuration on all the hosts). So really we only want to notice
+ if the Pool has had HA disabled while we were offline. *)
let on_server_restart () =
let armed = bool_of_string (Localdb.get Constants.ha_armed) in
let (_ : string) = call_script ha_start_daemon [] in
finished := true;
with
- | Xha_error Xha_errno.Mtc_exit_daemon_is_present as e ->
+ | Xha_error Xha_errno.Mtc_exit_daemon_is_present ->
warn "ha_start_daemon failed with MTC_EXIT_DAEMON_IS_PRESENT: continuing with startup";
finished := true;
| Xha_error Xha_errno.Mtc_exit_invalid_pool_state as e ->
end
(** Called in the master xapi startup when the database is ready. We set all hosts (including this one) to
- disabled then signal the monitor thread to look. It can then wait for slaves to turn up
- before trying to restart VMs. *)
+ disabled then signal the monitor thread to look. It can then wait for slaves to turn up
+ before trying to restart VMs. *)
let on_database_engine_ready () =
info "Setting all hosts to dead and disabled. Hosts must re-enable themselves explicitly";
Server_helpers.exec_with_new_task "Setting all hosts to dead and disabled"
(* Internal API calls to configure individual hosts *)
(** Internal API call to prevent this node making an unsafe failover decision.
- This call is idempotent. *)
+ This call is idempotent. *)
let ha_disable_failover_decisions __context localhost =
debug "Disabling failover decisions";
(* FIST *)
Localdb.put Constants.ha_disable_failover_decisions "true"
(** Internal API call to disarm localhost.
- If the daemon is missing then we return success. Either fencing was previously disabled and the
- daemon has shutdown OR the daemon has died and this node will fence shortly...
+ If the daemon is missing then we return success. Either fencing was previously disabled and the
+ daemon has shutdown OR the daemon has died and this node will fence shortly...
*)
let ha_disarm_fencing __context localhost =
try
let (_ : string) = call_script ha_set_excluded [] in ()
(** Internal API call to stop the HA daemon.
- This call is idempotent. *)
+ This call is idempotent. *)
let ha_stop_daemon __context localhost =
Monitor.stop ();
let (_ : string) = call_script ha_stop_daemon [] in ()
(* Might not be able to access the database to detach statefiles; however this isn't critical *)
()
-(** Internal API call to release any HA resources after the system has been shutdown.
- This call is idempotent. *)
+(** Internal API call to release any HA resources after the system has
+ been shutdown. This call is idempotent. Modified for CA-48539 to
+ call vdi.deactivate before vdi.detach. *)
let ha_release_resources __context localhost =
Monitor.stop ();
- (* Detach any statefile VDIs *)
- let pool = Helpers.get_pool ~__context in
- List.iter
- (fun vdi ->
- let uuid = Db.VDI.get_uuid ~__context ~self:vdi in
- Helpers.log_exn_continue
- (Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
- (fun () -> Static_vdis.permanent_vdi_detach ~__context ~vdi) ()
- ) (List.map Ref.of_string (Db.Pool.get_ha_statefiles ~__context ~self:pool));
-
- (* Detach any metadata VDIs *)
- Xha_metadata_vdi.detach_existing ~__context;
+
+ (* Why aren't we calling Xha_statefile.detach_existing_statefiles?
+ Does Db.Pool.get_ha_statefiles return a different set of
+ statefiles than Xha_statefile.list_existing_statefiles? *)
+
+ (* Deactivate and detach all statefile VDIs in the entire pool *)
+ let statefile_vdis = Db.Pool.get_ha_statefiles ~__context ~self:(Helpers.get_pool ~__context)
+ and deactiavte_and_detach_vdi vdi_str =
+ let uuid = Db.VDI.get_uuid ~__context ~self:(Ref.of_string vdi_str) in
+ Helpers.log_exn_continue
+ (Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
+ (fun () ->
+ Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid ;
+ Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) ()
+ in List.iter deactiavte_and_detach_vdi statefile_vdis ;
+
+ (* Deactivate and detach any metadata VDIs *)
+ Helpers.log_exn_continue
+ (Printf.sprintf "deactivating and detaching metadata VDIs")
+ (fun () -> Xha_metadata_vdi.deactivate_and_detach_existing ~__context) ();
(* At this point a restart won't enable the HA subsystem *)
Localdb.put Constants.ha_armed "false"
(** Internal API call which blocks until this node's xHA daemon spots the invalid statefile
- and exits cleanly. If the daemon survives but the statefile access is lost then this function
- will return an exception and the no-statefile shutdown can be attempted.
+ and exits cleanly. If the daemon survives but the statefile access is lost then this function
+ will return an exception and the no-statefile shutdown can be attempted.
*)
let ha_wait_for_shutdown_via_statefile __context localhost =
try
let write_config_file ~__context statevdi_paths generation =
let local_heart_beat_interface = Xapi_inventory.lookup Xapi_inventory._management_interface in
(* Need to find the name of the physical interface, so xHA can monitor the bonding status (if appropriate).
- Note that this interface isn't used for sending packets so VLANs don't matter: the physical NIC or bond device is all we need. *)
+ Note that this interface isn't used for sending packets so VLANs don't matter: the physical NIC or bond device is all we need. *)
let localhost = Helpers.get_localhost ~__context in
let mgmt_pifs = List.filter (fun self -> Db.PIF.get_management ~__context ~self) (Db.Host.get_PIFs ~__context ~self:localhost) in
if mgmt_pifs = [] then failwith (Printf.sprintf "Cannot enable HA on host %s: there is no management interface for heartbeating" (Db.Host.get_hostname ~__context ~self:localhost));
Db.Host.set_ha_statefiles ~__context ~self:localhost ~value:(List.map Ref.string_of statevdis);
(* The master has already attached the statefile VDIs and written the
- configuration file. *)
+ configuration file. *)
if not(Pool_role.is_master ()) then begin
let statefiles = attach_statefiles ~__context statevdis in
write_config_file ~__context statefiles generation;
info "Local flag ha_armed <- true";
(* If this host is the current master then it must assert its authority as master;
- otherwise another host's heartbeat thread might conclude that the master has gone
- and propose itself. This would lead the xHA notion of master to immediately diverge
- from the XenAPI notion. *)
+ otherwise another host's heartbeat thread might conclude that the master has gone
+ and propose itself. This would lead the xHA notion of master to immediately diverge
+ from the XenAPI notion. *)
if Pool_role.is_master () then begin
if not (propose_master ())
then failwith "failed to propose the current master as master";
redo_log_ha_disabled_during_runtime __context;
(* Steps from 8.6 Disabling HA
- If the master has access to the state file (how do we determine this)?
- * ha_set_pool_state(invalid)
- If the master hasn't access to the state file but all hosts are available via heartbeat
- * set the flag "can not be master and no VM failover decision on next boot"
- * ha_disarm_fencing()
- * ha_stop_daemon()
- Otherwise we'll be fenced *)
+ If the master has access to the state file (how do we determine this)?
+ * ha_set_pool_state(invalid)
+ If the master hasn't access to the state file but all hosts are available via heartbeat
+ * set the flag "can not be master and no VM failover decision on next boot"
+ * ha_disarm_fencing()
+ * ha_stop_daemon()
+ Otherwise we'll be fenced *)
let hosts = Db.Host.get_all ~__context in
(List.map (fun (pif,pifr) -> Ref.string_of pif) unplugged_ununpluggable_pifs)));
(* Check also that any PIFs with IP information set are currently attached - it's a non-fatal
- error if they are, but we'll warn with a message *)
+ error if they are, but we'll warn with a message *)
let pifs_with_ip_config = List.filter (fun (_,pifr) -> pifr.API.pIF_ip_configuration_mode <> `None) pifs in
let not_bond_slaves = List.filter (fun (_,pifr) -> not (Db.is_valid_ref pifr.API.pIF_bond_slave_of)) pifs_with_ip_config in
let without_disallow_unplug = List.filter (fun (_,pifr) -> not (pifr.API.pIF_disallow_unplug || pifr.API.pIF_management)) not_bond_slaves in
if not alive then raise (Api_errors.Server_error(Api_errors.host_offline, [ Ref.string_of host ]))
) (Db.Host.get_all ~__context);
- let set_difference a b = List.filter (fun x -> not(List.mem x b)) a in
+ (* let set_difference a b = List.filter (fun x -> not(List.mem x b)) a in *)
(* Steps from 8.7 Enabling HA in Marathon spec:
* 1. Bring up state file VDI(s)
let vdis = list_existing() in
List.iter (fun x -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:x.Static_vdis.uuid) vdis
+(** Added for CA-48539 *)
+let deactivate_and_detach_existing ~__context =
+ let vdi_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing ()) in
+ List.iter (fun vdi_uuid -> Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid:vdi_uuid) vdi_uuids ;
+ List.iter (fun vdi_uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:vdi_uuid) vdi_uuids
+
open Pervasiveext
(** Attempt to flush the database to the metadata VDI *)
open Listext
open Stringext
-(** Return the minimum size of an HA statefile, as of
+(** Return the minimum size of an HA statefile, as of
XenServer HA state-file description vsn 1.3 *)
-let minimum_size number_of_hosts =
- let ( ** ) = Int64.mul
- and ( ++ ) = Int64.add in
+let minimum_size number_of_hosts =
+ let ( ** ) = Int64.mul
+ and ( ++ ) = Int64.add in
- let global_section_size = 4096L
- and host_section_size = 4096L in
- global_section_size ++ (Int64.of_int number_of_hosts) ** host_section_size
+ let global_section_size = 4096L
+ and host_section_size = 4096L in
+ global_section_size ++ (Int64.of_int number_of_hosts) ** host_section_size
let set_difference a b = List.filter (fun x -> not(List.mem x b)) a
-let assert_sr_can_host_statefile ~__context ~sr =
- (* Check that each host has a PBD to this SR *)
- let pbds = Db.SR.get_PBDs ~__context ~self:sr in
- let connected_hosts = List.setify (List.map (fun self -> Db.PBD.get_host ~__context ~self) pbds) in
- let all_hosts = Db.Host.get_all ~__context in
- if List.length connected_hosts < (List.length all_hosts) then begin
- error "Cannot place statefile in SR %s: some hosts lack a PBD: [ %s ]"
- (Ref.string_of sr)
- (String.concat "; " (List.map Ref.string_of (set_difference all_hosts connected_hosts)));
- raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
- end;
- (* Check that each PBD is plugged in *)
- List.iter (fun self ->
- if not(Db.PBD.get_currently_attached ~__context ~self) then begin
- error "Cannot place statefile in SR %s: PBD %s is not plugged"
- (Ref.string_of sr) (Ref.string_of self);
- (* Same exception is used in this case (see Helpers.assert_pbd_is_plugged) *)
- raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
- end) pbds;
- (* Check the exported capabilities of the SR's SM plugin *)
- let srtype = Db.SR.get_type ~__context ~self:sr in
- if not (List.mem Smint.Vdi_generate_config (Sm.capabilities_of_driver srtype))
- then raise (Api_errors.Server_error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]))
+let assert_sr_can_host_statefile ~__context ~sr =
+ (* Check that each host has a PBD to this SR *)
+ let pbds = Db.SR.get_PBDs ~__context ~self:sr in
+ let connected_hosts = List.setify (List.map (fun self -> Db.PBD.get_host ~__context ~self) pbds) in
+ let all_hosts = Db.Host.get_all ~__context in
+ if List.length connected_hosts < (List.length all_hosts) then begin
+ error "Cannot place statefile in SR %s: some hosts lack a PBD: [ %s ]"
+ (Ref.string_of sr)
+ (String.concat "; " (List.map Ref.string_of (set_difference all_hosts connected_hosts)));
+ raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
+ end;
+ (* Check that each PBD is plugged in *)
+ List.iter (fun self ->
+ if not(Db.PBD.get_currently_attached ~__context ~self) then begin
+ error "Cannot place statefile in SR %s: PBD %s is not plugged"
+ (Ref.string_of sr) (Ref.string_of self);
+ (* Same exception is used in this case (see Helpers.assert_pbd_is_plugged) *)
+ raise (Api_errors.Server_error(Api_errors.sr_no_pbds, [ Ref.string_of sr ]))
+ end) pbds;
+ (* Check the exported capabilities of the SR's SM plugin *)
+ let srtype = Db.SR.get_type ~__context ~self:sr in
+ if not (List.mem Smint.Vdi_generate_config (Sm.capabilities_of_driver srtype))
+ then raise (Api_errors.Server_error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]))
-let list_srs_which_can_host_statefile ~__context =
- List.filter (fun sr -> try assert_sr_can_host_statefile ~__context ~sr; true
- with _ -> false) (Db.SR.get_all ~__context)
+let list_srs_which_can_host_statefile ~__context =
+ List.filter (fun sr -> try assert_sr_can_host_statefile ~__context ~sr; true
+ with _ -> false) (Db.SR.get_all ~__context)
-let create ~__context ~sr =
- assert_sr_can_host_statefile ~__context ~sr;
- let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
- Helpers.call_api_functions ~__context
- (fun rpc session_id ->
- Client.VDI.create ~rpc ~session_id
- ~name_label:"Statefile for HA"
- ~name_description:"Used for storage heartbeating"
- ~sR:sr ~virtual_size:size ~_type:`ha_statefile
- ~sharable:true ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:statefile_sm_config ~tags:[]
- )
+let create ~__context ~sr =
+ assert_sr_can_host_statefile ~__context ~sr;
+ let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
+ Helpers.call_api_functions ~__context
+ (fun rpc session_id ->
+ Client.VDI.create ~rpc ~session_id
+ ~name_label:"Statefile for HA"
+ ~name_description:"Used for storage heartbeating"
+ ~sR:sr ~virtual_size:size ~_type:`ha_statefile
+ ~sharable:true ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:statefile_sm_config ~tags:[]
+ )
(** Return a reference to a valid statefile VDI in the given SR.
This function prefers to reuse existing VDIs to avoid confusing the heartbeat component:
it expects to see a poisoned VDI but not necessarily a stale or corrupted one. Consider that
when using LVM-based SRs the VDI could be deleted on the master but the slaves would still
have access to stale data. *)
-let find_or_create ~__context ~sr =
- assert_sr_can_host_statefile ~__context ~sr;
- let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
- match
- List.filter
- (fun self -> true
- && (Db.VDI.get_type ~__context ~self = `ha_statefile)
- && (Db.VDI.get_virtual_size ~__context ~self >= size))
- (Db.SR.get_VDIs ~__context ~self:sr) with
- | x :: _ ->
- info "re-using existing statefile: %s" (Db.VDI.get_uuid ~__context ~self:x);
- x
- | [] ->
- info "no suitable existing statefile found; creating a fresh one";
- create ~__context ~sr
+let find_or_create ~__context ~sr =
+ assert_sr_can_host_statefile ~__context ~sr;
+ let size = minimum_size (List.length (Db.Host.get_all ~__context)) in
+ match
+ List.filter
+ (fun self -> true
+ && (Db.VDI.get_type ~__context ~self = `ha_statefile)
+ && (Db.VDI.get_virtual_size ~__context ~self >= size))
+ (Db.SR.get_VDIs ~__context ~self:sr) with
+ | x :: _ ->
+ info "re-using existing statefile: %s" (Db.VDI.get_uuid ~__context ~self:x);
+ x
+ | [] ->
+ info "no suitable existing statefile found; creating a fresh one";
+ create ~__context ~sr
-let list_existing_statefiles () =
- List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ())
+let list_existing_statefiles () =
+ List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ())
(** Detach all statefiles attached with reason 'HA statefile', to clear stale state *)
-let detach_existing_statefiles ~__context =
- let statefiles = List.filter (fun x -> x.Static_vdis.reason = reason) (Static_vdis.list ()) in
- List.iter (fun x -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid:x.Static_vdis.uuid) statefiles
-
+let detach_existing_statefiles ~__context =
+ let statefile_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing_statefiles ()) in
+ List.iter (fun uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) statefile_uuids
+
+(** Added for CA-48539. Deactivate and detach all statefiles attached
+ with reason 'HA statefile', to clear stale state *)
+let deactivate_and_detach_existing_statefiles ~__context =
+ let statefile_uuids = List.map (fun vdi -> vdi.Static_vdis.uuid) (list_existing_statefiles ()) in
+ List.iter (fun uuid -> Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid) statefile_uuids ;
+ List.iter (fun uuid -> Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid) statefile_uuids