ia64/xen-unstable

view tools/python/xen/xend/XendDomain.py @ 7348:067b9aacb6c2

Fix deadlock in XendDomainInfo when a domain is cleaned up. We are renaming
the domain, to make it clear that it is a zombie, but this renaming cannot
check the uniqueness of the new name, because this causes a deadlock with
XendDomain. Instead, we allow the name to be non-unique for the case of
zombie domains.

Change the locking in waitForShutdown and state_set to be robust in the face of
exceptions.

Rename the STATE_VM_ constants to STATE_DOM_.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Wed Oct 12 10:11:35 2005 +0100 (2005-10-12)
parents 74d56b7ff46c
children 7b9547485703
line source
1 #============================================================================
2 # This library is free software; you can redistribute it and/or
3 # modify it under the terms of version 2.1 of the GNU Lesser General Public
4 # License as published by the Free Software Foundation.
5 #
6 # This library is distributed in the hope that it will be useful,
7 # but WITHOUT ANY WARRANTY; without even the implied warranty of
8 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 # Lesser General Public License for more details.
10 #
11 # You should have received a copy of the GNU Lesser General Public
12 # License along with this library; if not, write to the Free Software
13 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 #============================================================================
15 # Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
16 # Copyright (C) 2005 Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
17 # Copyright (C) 2005 XenSource Ltd
18 #============================================================================
20 """Handler for domain operations.
21 Nothing here is persistent (across reboots).
22 Needs to be persistent for one uptime.
23 """
24 import os
25 import logging
26 import threading
28 import xen.lowlevel.xc
30 import XendDomainInfo
32 from xen.xend import XendRoot
33 from xen.xend import XendCheckpoint
34 from xen.xend.XendError import XendError
35 from xen.xend.XendLogging import log
36 from xen.xend.server import relocate
39 xc = xen.lowlevel.xc.new()
40 xroot = XendRoot.instance()
43 __all__ = [ "XendDomain" ]
45 PRIV_DOMAIN = 0
47 class XendDomain:
48 """Index of all domains. Singleton.
49 """
51 ## public:
53 def __init__(self):
54 # Hack alert. Python does not support mutual imports, but XendDomainInfo
55 # needs access to the XendDomain instance to look up domains. Attempting
56 # to import XendDomain from XendDomainInfo causes unbounded recursion.
57 # So we stuff the XendDomain instance (self) into xroot's components.
58 xroot.add_component("xen.xend.XendDomain", self)
59 self.domains = {}
60 self.domains_lock = threading.RLock()
61 self.watchReleaseDomain()
63 self.domains_lock.acquire()
64 try:
65 self.refresh(True)
66 self.dom0_setup()
67 finally:
68 self.domains_lock.release()
71 def list(self):
72 """Get list of domain objects.
74 @return: domain objects
75 """
76 self.domains_lock.acquire()
77 try:
78 self.refresh()
79 return self.domains.values()
80 finally:
81 self.domains_lock.release()
84 def list_sorted(self):
85 """Get list of domain objects, sorted by name.
87 @return: domain objects
88 """
89 doms = self.list()
90 doms.sort(lambda x, y: cmp(x.getName(), y.getName()))
91 return doms
93 def list_names(self):
94 """Get list of domain names.
96 @return: domain names
97 """
98 doms = self.list_sorted()
99 return map(lambda x: x.getName(), doms)
102 ## private:
104 def onReleaseDomain(self):
105 self.domains_lock.acquire()
106 try:
107 self.refresh()
108 finally:
109 self.domains_lock.release()
112 def watchReleaseDomain(self):
113 from xen.xend.xenstore.xswatch import xswatch
114 self.releaseDomain = xswatch("@releaseDomain", self.onReleaseDomain)
117 def xen_domains(self):
118 """Get table of domains indexed by id from xc. Expects to be
119 protected by the domains_lock.
120 """
121 domlist = xc.domain_getinfo()
122 doms = {}
123 for d in domlist:
124 domid = d['dom']
125 doms[domid] = d
126 return doms
129 def dom0_setup(self):
130 """Expects to be protected by the domains_lock."""
131 dom0 = self.domains[PRIV_DOMAIN]
132 dom0.dom0_enforce_vcpus()
135 def _add_domain(self, info):
136 """Add the given domain entry to this instance's internal cache.
137 Expects to be protected by the domains_lock.
138 """
139 self.domains[info.getDomid()] = info
142 def _delete_domain(self, domid):
143 """Remove the given domain from this instance's internal cache.
144 Expects to be protected by the domains_lock.
145 """
146 info = self.domains.get(domid)
147 if info:
148 del self.domains[domid]
149 info.cleanupDomain()
152 def refresh(self, initialising = False):
153 """Refresh domain list from Xen. Expects to be protected by the
154 domains_lock.
156 @param initialising True if this is the first refresh after starting
157 Xend. This does not change this method's behaviour, except for
158 logging.
159 """
160 doms = self.xen_domains()
161 for d in self.domains.values():
162 info = doms.get(d.getDomid())
163 if info:
164 d.update(info)
165 else:
166 self._delete_domain(d.getDomid())
167 for d in doms:
168 if d not in self.domains:
169 if doms[d]['dying']:
170 log.log(initialising and logging.ERROR or logging.DEBUG,
171 'Cannot recreate information for dying domain %d.'
172 ' Xend will ignore this domain from now on.',
173 doms[d]['dom'])
174 else:
175 try:
176 dominfo = XendDomainInfo.recreate(doms[d])
177 self._add_domain(dominfo)
178 except:
179 if d == PRIV_DOMAIN:
180 log.exception(
181 "Failed to recreate information for domain "
182 "%d. Doing nothing except crossing my "
183 "fingers.", d)
184 else:
185 log.exception(
186 "Failed to recreate information for domain "
187 "%d. Destroying it in the hope of "
188 "recovery.", d)
189 try:
190 xc.domain_destroy(dom = d)
191 except:
192 log.exception('Destruction of %d failed.', d)
195 ## public:
197 def domain_create(self, config):
198 """Create a domain from a configuration.
200 @param config: configuration
201 @return: domain
202 """
203 self.domains_lock.acquire()
204 try:
205 dominfo = XendDomainInfo.create(config)
206 self._add_domain(dominfo)
207 return dominfo
208 finally:
209 self.domains_lock.release()
212 def domain_configure(self, config):
213 """Configure an existing domain.
215 @param vmconfig: vm configuration
216 """
217 # !!!
218 raise XendError("Unsupported")
220 def domain_restore(self, src):
221 """Restore a domain from file.
223 @param src: source file
224 """
226 try:
227 fd = os.open(src, os.O_RDONLY)
228 try:
229 return self.domain_restore_fd(fd)
230 finally:
231 os.close(fd)
232 except OSError, ex:
233 raise XendError("can't read guest state file %s: %s" %
234 (src, ex[1]))
236 def domain_restore_fd(self, fd):
237 """Restore a domain from the given file descriptor."""
239 try:
240 return XendCheckpoint.restore(self, fd)
241 except:
242 # I don't really want to log this exception here, but the error
243 # handling in the relocation-socket handling code (relocate.py) is
244 # poor, so we need to log this for debugging.
245 log.exception("Restore failed")
246 raise
249 def restore_(self, config):
250 """Create a domain as part of the restore process. This is called
251 only from {@link XendCheckpoint}.
253 A restore request comes into XendDomain through {@link
254 #domain_restore} or {@link #domain_restore_fd}. That request is
255 forwarded immediately to XendCheckpoint which, when it is ready, will
256 call this method. It is necessary to come through here rather than go
257 directly to {@link XendDomainInfo.restore} because we need to
258 serialise the domain creation process, but cannot lock
259 domain_restore_fd as a whole, otherwise we will deadlock waiting for
260 the old domain to die.
261 """
262 self.domains_lock.acquire()
263 try:
264 dominfo = XendDomainInfo.restore(config)
265 self._add_domain(dominfo)
266 return dominfo
267 finally:
268 self.domains_lock.release()
271 def domain_lookup(self, domid):
272 self.domains_lock.acquire()
273 try:
274 self.refresh()
275 return self.domains.get(domid)
276 finally:
277 self.domains_lock.release()
280 def domain_lookup_nr(self, domid):
281 self.domains_lock.acquire()
282 try:
283 return self.domains.get(domid)
284 finally:
285 self.domains_lock.release()
288 def domain_lookup_by_name_or_id(self, name):
289 self.domains_lock.acquire()
290 try:
291 self.refresh()
292 return self.domain_lookup_by_name_or_id_nr(name)
293 finally:
294 self.domains_lock.release()
297 def domain_lookup_by_name_or_id_nr(self, name):
298 self.domains_lock.acquire()
299 try:
300 dominfo = self.domain_lookup_by_name_nr(name)
302 if dominfo:
303 return dominfo
304 else:
305 try:
306 return self.domains.get(int(name))
307 except ValueError:
308 return None
309 finally:
310 self.domains_lock.release()
313 def domain_lookup_by_name_nr(self, name):
314 self.domains_lock.acquire()
315 try:
316 matching = filter(lambda d: d.getName() == name,
317 self.domains.values())
318 n = len(matching)
319 if n == 1:
320 return matching[0]
321 elif n > 1 and not d.isTerminated():
322 log.error('Name uniqueness has been violated for name %s! '
323 'Recovering by renaming:', name)
324 for d in matching:
325 d.renameUniquely()
327 return None
328 finally:
329 self.domains_lock.release()
332 def privilegedDomain(self):
333 self.domains_lock.acquire()
334 try:
335 return self.domains[PRIV_DOMAIN]
336 finally:
337 self.domains_lock.release()
340 def domain_unpause(self, domid):
341 """Unpause domain execution."""
342 try:
343 dominfo = self.domain_lookup(domid)
344 log.info("Domain %s (%d) unpaused.", dominfo.getName(),
345 dominfo.getDomid())
346 return xc.domain_unpause(dom=dominfo.getDomid())
347 except Exception, ex:
348 raise XendError(str(ex))
351 def domain_pause(self, domid):
352 """Pause domain execution."""
353 try:
354 dominfo = self.domain_lookup(domid)
355 log.info("Domain %s (%d) paused.", dominfo.getName(),
356 dominfo.getDomid())
357 return xc.domain_pause(dom=dominfo.getDomid())
358 except Exception, ex:
359 raise XendError(str(ex))
362 def domain_destroy(self, domid):
363 """Terminate domain immediately."""
365 if domid == PRIV_DOMAIN:
366 raise XendError("Cannot destroy privileged domain %i" % domid)
368 dominfo = self.domain_lookup(domid)
369 if dominfo:
370 val = dominfo.destroy()
371 else:
372 try:
373 val = xc.domain_destroy(dom=domid)
374 except Exception, ex:
375 raise XendError(str(ex))
376 return val
378 def domain_migrate(self, domid, dst, live=False, resource=0):
379 """Start domain migration."""
381 dominfo = self.domain_lookup(domid)
383 port = xroot.get_xend_relocation_port()
384 sock = relocate.setupRelocation(dst, port)
386 XendCheckpoint.save(sock.fileno(), dominfo, live)
389 def domain_save(self, domid, dst):
390 """Start saving a domain to file.
392 @param dst: destination file
393 """
395 try:
396 dominfo = self.domain_lookup(domid)
398 fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
399 try:
400 # For now we don't support 'live checkpoint'
401 return XendCheckpoint.save(fd, dominfo, False)
402 finally:
403 os.close(fd)
404 except OSError, ex:
405 raise XendError("can't write guest state file %s: %s" %
406 (dst, ex[1]))
408 def domain_pincpu(self, domid, vcpu, cpumap):
409 """Set which cpus vcpu can use
411 @param cpumap: bitmap of usable cpus
412 """
413 dominfo = self.domain_lookup(domid)
414 try:
415 return xc.domain_pincpu(dominfo.getDomid(), vcpu, cpumap)
416 except Exception, ex:
417 raise XendError(str(ex))
419 def domain_cpu_bvt_set(self, domid, mcuadv, warpback, warpvalue, warpl,
420 warpu):
421 """Set BVT (Borrowed Virtual Time) scheduler parameters for a domain.
422 """
423 dominfo = self.domain_lookup(domid)
424 try:
425 return xc.bvtsched_domain_set(dom=dominfo.getDomid(),
426 mcuadv=mcuadv,
427 warpback=warpback,
428 warpvalue=warpvalue,
429 warpl=warpl, warpu=warpu)
430 except Exception, ex:
431 raise XendError(str(ex))
433 def domain_cpu_bvt_get(self, domid):
434 """Get BVT (Borrowed Virtual Time) scheduler parameters for a domain.
435 """
436 dominfo = self.domain_lookup(domid)
437 try:
438 return xc.bvtsched_domain_get(dominfo.getDomid())
439 except Exception, ex:
440 raise XendError(str(ex))
443 def domain_cpu_sedf_set(self, domid, period, slice_, latency, extratime,
444 weight):
445 """Set Simple EDF scheduler parameters for a domain.
446 """
447 dominfo = self.domain_lookup(domid)
448 try:
449 return xc.sedf_domain_set(dominfo.getDomid(), period, slice_,
450 latency, extratime, weight)
451 except Exception, ex:
452 raise XendError(str(ex))
454 def domain_cpu_sedf_get(self, domid):
455 """Get Simple EDF scheduler parameters for a domain.
456 """
457 dominfo = self.domain_lookup(domid)
458 try:
459 return xc.sedf_domain_get(dominfo.getDomid())
460 except Exception, ex:
461 raise XendError(str(ex))
464 def domain_vif_limit_set(self, domid, vif, credit, period):
465 """Limit the vif's transmission rate
466 """
467 dominfo = self.domain_lookup(domid)
468 dev = dominfo.getDevice('vif', vif)
469 if not dev:
470 raise XendError("invalid vif")
471 return dev.setCreditLimit(credit, period)
473 def domain_shadow_control(self, domid, op):
474 """Shadow page control."""
475 dominfo = self.domain_lookup(domid)
476 try:
477 return xc.shadow_control(dominfo.getDomid(), op)
478 except Exception, ex:
479 raise XendError(str(ex))
481 def domain_maxmem_set(self, domid, mem):
482 """Set the memory limit for a domain.
484 @param mem: memory limit (in MiB)
485 @return: 0 on success, -1 on error
486 """
487 dominfo = self.domain_lookup(domid)
488 maxmem = int(mem) * 1024
489 try:
490 return xc.domain_setmaxmem(dominfo.getDomid(),
491 maxmem_kb = maxmem)
492 except Exception, ex:
493 raise XendError(str(ex))
496 def instance():
497 """Singleton constructor. Use this instead of the class constructor.
498 """
499 global inst
500 try:
501 inst
502 except:
503 inst = XendDomain()
504 return inst