ia64/xen-unstable

view tools/misc/xend @ 16323:468a30d74bd6

xenstored: Recover from corrupt tdb on reboot

Xen cannot work when xenstored's tdb is corrupt. When that happens
somehow (and we've seen it happen), even reboot doesn't recover from
it. It could: there is no state in tdb that needs to be persisted
across reboots.

This patch arranges that tdb is removed before xenstored is started,
provided it doesn't already run. This is safe, because:

* xenstored cannot be restarted. If it dies, Xen's screwed until
reboot.

* /usr/sbin/xend always starts xenstored anyway.

* xenstored locks its pid-file (see write_pidfile() in
tools/xenstore/xenstored_core.c), and refuses to start when it
can't.

* My patch makes /usr/sbin/xend remove tdb iff it can lock the
pid-file. In other words, it removes tdb only when xenstored is not
running, and locks it out until it is done.

Bonus fix: it also removes stale copies of the tdb xenstored tends
to leave behind when it exits uncleanly.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
author Keir Fraser <keir@xensource.com>
date Tue Nov 06 09:40:44 2007 +0000 (2007-11-06)
parents afb41f6bc30a
children 0ebac8576495
line source
1 #!/usr/bin/env python
2 # -*- mode: python; -*-
3 #============================================================================
4 # Copyright (C) 2004 Mike Wray <mike.wray@hp.com>
5 # Copyright (C) 2005-2006 XenSource Inc
6 #============================================================================
8 """Xen management daemon.
9 Provides console server and HTTP management api.
11 Run:
12 xend start
14 Restart:
15 xend restart
17 The daemon is stopped with:
18 xend stop
20 The daemon should reconnect to device control interfaces
21 and recover its state when restarted.
23 On Solaris, the daemons are SMF managed, and you should not attempt
24 to start xend by hand.
25 """
26 import fcntl
27 import glob
28 import os
29 import os.path
30 import sys
31 import socket
32 import signal
33 import time
34 import commands
36 result = commands.getstatusoutput(os.path.join(os.path.dirname(sys.argv[0]),
37 'xen-python-path'))
38 if result[0] != 0:
39 print >>sys.stderr, result[1]
40 sys.exit(1)
42 sys.path.append(result[1])
44 from xen.xend.server import SrvDaemon
46 class CheckError(ValueError):
47 pass
49 def hline():
50 print >>sys.stderr, "*" * 70
52 def msg(message):
53 print >>sys.stderr, "*" * 3, message
55 def check_logging():
56 """Check python logging is installed and raise an error if not.
57 Logging is standard from Python 2.3 on.
58 """
59 try:
60 import logging
61 except ImportError:
62 hline()
63 msg("Python logging is not installed.")
64 msg("Use 'make install-logging' at the xen root to install.")
65 msg("")
66 msg("Alternatively download and install from")
67 msg("http://www.red-dove.com/python_logging.html")
68 hline()
69 raise CheckError("logging is not installed")
71 def check_user():
72 """Check that the effective user id is 0 (root).
73 """
74 if os.geteuid() != 0:
75 hline()
76 msg("Xend must be run as root.")
77 hline()
78 raise CheckError("invalid user")
80 def start_xenstored():
81 pidfname = "/var/run/xenstore.pid"
82 try:
83 f = open(pidfname, "a")
84 try:
85 fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
86 rootdir = os.getenv("XENSTORED_ROOTDIR") or "/var/lib/xenstored"
87 for i in glob.glob(rootdir + "/tdb*"):
88 try:
89 os.unlink(i)
90 except:
91 pass
92 os.unlink(pidfname)
93 except:
94 pass
95 f.close()
96 except:
97 pass
98 XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
99 cmd = "xenstored --pid-file /var/run/xenstore.pid"
100 if XENSTORED_TRACE:
101 cmd += " -T /var/log/xen/xenstored-trace.log"
102 s,o = commands.getstatusoutput(cmd)
104 def start_consoled():
105 if os.fork() == 0:
106 os.execvp('xenconsoled', ['xenconsoled'])
108 def start_blktapctrl():
109 if os.fork() == 0:
110 os.execvp('blktapctrl', ['blktapctrl'])
112 def main():
113 try:
114 check_logging()
115 check_user()
116 except CheckError:
117 sys.exit(1)
119 daemon = SrvDaemon.instance()
120 if not sys.argv[1:]:
121 print 'usage: %s {start|stop|reload|restart}' % sys.argv[0]
122 elif sys.argv[1] == 'start':
123 if os.uname()[0] != "SunOS":
124 start_xenstored()
125 start_consoled()
126 start_blktapctrl()
127 return daemon.start()
128 elif sys.argv[1] == 'trace_start':
129 start_xenstored()
130 start_consoled()
131 start_blktapctrl()
132 return daemon.start(trace=1)
133 elif sys.argv[1] == 'stop':
134 return daemon.stop()
135 elif sys.argv[1] == 'reload':
136 return daemon.reloadConfig()
137 elif sys.argv[1] == 'restart':
138 start_xenstored()
139 start_consoled()
140 start_blktapctrl()
141 return daemon.stop() or daemon.start()
142 elif sys.argv[1] == 'status':
143 return daemon.status()
144 else:
145 print 'not an option:', sys.argv[1]
146 return 1
148 if __name__ == '__main__':
149 sys.exit(main())