ia64/xen-unstable
changeset 10414:fbc0e953732e
merge with xen-unstable.hg
author | awilliam@xenbuild.aw |
---|---|
date | Thu Jun 15 10:23:57 2006 -0600 (2006-06-15) |
parents | 7f67c15e2c91 73c73fb8875c |
children | 08378b83ea1e |
files | tools/security/python/xensec_tools/acm_getdecision |
line diff
1.1 --- a/extras/mini-os/events.c Thu Jun 15 10:02:53 2006 -0600 1.2 +++ b/extras/mini-os/events.c Thu Jun 15 10:23:57 2006 -0600 1.3 @@ -35,24 +35,29 @@ int do_event(u32 port, struct pt_regs *r 1.4 ev_action_t *action; 1.5 if (port >= NR_EVS) { 1.6 printk("Port number too large: %d\n", port); 1.7 - return 0; 1.8 + goto out; 1.9 } 1.10 1.11 action = &ev_actions[port]; 1.12 action->count++; 1.13 1.14 if (!action->handler) 1.15 + { 1.16 + printk("Spurious event on port %d\n", port); 1.17 goto out; 1.18 + } 1.19 1.20 if (action->status & EVS_DISABLED) 1.21 + { 1.22 + printk("Event on port %d disabled\n", port); 1.23 goto out; 1.24 + } 1.25 1.26 /* call the handler */ 1.27 action->handler(port, regs); 1.28 - 1.29 - clear_evtchn(port); 1.30 1.31 out: 1.32 + clear_evtchn(port); 1.33 return 1; 1.34 1.35 } 1.36 @@ -135,6 +140,7 @@ void init_events(void) 1.37 { 1.38 ev_actions[i].status = EVS_DISABLED; 1.39 ev_actions[i].handler = default_handler; 1.40 + mask_evtchn(i); 1.41 } 1.42 } 1.43
2.1 --- a/extras/mini-os/include/xenbus.h Thu Jun 15 10:02:53 2006 -0600 2.2 +++ b/extras/mini-os/include/xenbus.h Thu Jun 15 10:23:57 2006 -0600 2.3 @@ -1,6 +1,34 @@ 2.4 #ifndef XENBUS_H__ 2.5 #define XENBUS_H__ 2.6 2.7 +/* Initialize the XenBus system. */ 2.8 void init_xenbus(void); 2.9 2.10 +/* Read the value associated with a path. Returns a malloc'd error 2.11 + string on failure and sets *value to NULL. On success, *value is 2.12 + set to a malloc'd copy of the value. */ 2.13 +char *xenbus_read(const char *path, char **value); 2.14 + 2.15 +/* Associates a value with a path. Returns a malloc'd error string on 2.16 + failure. */ 2.17 +char *xenbus_write(const char *path, const char *value); 2.18 + 2.19 +/* Removes the value associated with a path. Returns a malloc'd error 2.20 + string on failure. */ 2.21 +char *xenbus_rm(const char *path); 2.22 + 2.23 +/* List the contents of a directory. Returns a malloc'd error string 2.24 + on failure and sets *contents to NULL. On success, *contents is 2.25 + set to a malloc'd array of pointers to malloc'd strings. The array 2.26 + is NULL terminated. May block. */ 2.27 +char *xenbus_ls(const char *prefix, char ***contents); 2.28 + 2.29 +/* Reads permissions associated with a path. Returns a malloc'd error 2.30 + string on failure and sets *value to NULL. On success, *value is 2.31 + set to a malloc'd copy of the value. */ 2.32 +char *xenbus_get_perms(const char *path, char **value); 2.33 + 2.34 +/* Sets the permissions associated with a path. Returns a malloc'd 2.35 + error string on failure. */ 2.36 +char *xenbus_set_perms(const char *path, domid_t dom, char perm); 2.37 #endif /* XENBUS_H__ */
3.1 --- a/extras/mini-os/kernel.c Thu Jun 15 10:02:53 2006 -0600 3.2 +++ b/extras/mini-os/kernel.c Thu Jun 15 10:23:57 2006 -0600 3.3 @@ -82,17 +82,6 @@ static shared_info_t *map_shared_info(un 3.4 } 3.5 3.6 3.7 -void test_xenbus(void); 3.8 - 3.9 -/* Do initialisation from a thread once the scheduler's available */ 3.10 -static void init_xs(void *ign) 3.11 -{ 3.12 - init_xenbus(); 3.13 - 3.14 - test_xenbus(); 3.15 -} 3.16 - 3.17 - 3.18 u8 xen_features[XENFEAT_NR_SUBMAPS * 32]; 3.19 3.20 void setup_xen_features(void) 3.21 @@ -111,10 +100,18 @@ void setup_xen_features(void) 3.22 } 3.23 } 3.24 3.25 +void test_xenbus(void); 3.26 + 3.27 +void xenbus_tester(void *p) 3.28 +{ 3.29 + test_xenbus(); 3.30 +} 3.31 + 3.32 /* This should be overridden by the application we are linked against. */ 3.33 __attribute__((weak)) int app_main(start_info_t *si) 3.34 { 3.35 printk("Dummy main: start_info=%p\n", si); 3.36 + create_thread("xenbus_tester", xenbus_tester, si); 3.37 return 0; 3.38 } 3.39 3.40 @@ -183,8 +180,8 @@ void start_kernel(start_info_t *si) 3.41 /* Init scheduler. */ 3.42 init_sched(); 3.43 3.44 - /* Init XenBus from a separate thread */ 3.45 - create_thread("init_xs", init_xs, NULL); 3.46 + /* Init XenBus */ 3.47 + init_xenbus(); 3.48 3.49 /* Call (possibly overridden) app_main() */ 3.50 app_main(&start_info);
4.1 --- a/extras/mini-os/xenbus/xenbus.c Thu Jun 15 10:02:53 2006 -0600 4.2 +++ b/extras/mini-os/xenbus/xenbus.c Thu Jun 15 10:23:57 2006 -0600 4.3 @@ -3,11 +3,12 @@ 4.4 * (C) 2006 - Cambridge University 4.5 **************************************************************************** 4.6 * 4.7 - * File: mm.c 4.8 + * File: xenbus.c 4.9 * Author: Steven Smith (sos22@cam.ac.uk) 4.10 * Changes: Grzegorz Milos (gm281@cam.ac.uk) 4.11 + * Changes: John D. Ramsdell 4.12 * 4.13 - * Date: Mar 2006, chages Aug 2005 4.14 + * Date: Jun 2006, chages Aug 2005 4.15 * 4.16 * Environment: Xen Minimal OS 4.17 * Description: Minimal implementation of xenbus 4.18 @@ -167,6 +168,7 @@ static int allocate_xenbus_id(void) 4.19 void init_xenbus(void) 4.20 { 4.21 int err; 4.22 + printk("Initialising xenbus\n"); 4.23 DEBUG("init_xenbus called.\n"); 4.24 xenstore_buf = mfn_to_virt(start_info.store_mfn); 4.25 create_thread("xenstore", xenbus_thread_func, NULL); 4.26 @@ -262,15 +264,15 @@ static void xb_write(int type, int req_i 4.27 /* Send a mesasge to xenbus, in the same fashion as xb_write, and 4.28 block waiting for a reply. The reply is malloced and should be 4.29 freed by the caller. */ 4.30 -static void *xenbus_msg_reply(int type, 4.31 +static struct xsd_sockmsg * 4.32 +xenbus_msg_reply(int type, 4.33 int trans, 4.34 struct write_req *io, 4.35 int nr_reqs) 4.36 { 4.37 int id; 4.38 DEFINE_WAIT(w); 4.39 - void *rep; 4.40 - struct xsd_sockmsg *repmsg; 4.41 + struct xsd_sockmsg *rep; 4.42 4.43 id = allocate_xenbus_id(); 4.44 add_waiter(w, req_info[id].waitq); 4.45 @@ -281,13 +283,27 @@ static void *xenbus_msg_reply(int type, 4.46 wake(current); 4.47 4.48 rep = req_info[id].reply; 4.49 - repmsg = rep; 4.50 - BUG_ON(repmsg->req_id != id); 4.51 + BUG_ON(rep->req_id != id); 4.52 release_xenbus_id(id); 4.53 - 4.54 return rep; 4.55 } 4.56 4.57 +static char *errmsg(struct xsd_sockmsg *rep) 4.58 +{ 4.59 + if (!rep) { 4.60 + char msg[] = "No reply"; 4.61 + size_t len = strlen(msg) + 1; 4.62 + return memcpy(malloc(len), msg, len); 4.63 + } 4.64 + if (rep->type != XS_ERROR) 4.65 + return NULL; 4.66 + char *res = malloc(rep->len + 1); 4.67 + memcpy(res, rep + 1, rep->len); 4.68 + res[rep->len] = 0; 4.69 + free(rep); 4.70 + return res; 4.71 +} 4.72 + 4.73 /* Send a debug message to xenbus. Can block. */ 4.74 static void xenbus_debug_msg(const char *msg) 4.75 { 4.76 @@ -296,27 +312,29 @@ static void xenbus_debug_msg(const char 4.77 { "print", sizeof("print") }, 4.78 { msg, len }, 4.79 { "", 1 }}; 4.80 - void *reply; 4.81 - struct xsd_sockmsg *repmsg; 4.82 + struct xsd_sockmsg *reply; 4.83 4.84 - reply = xenbus_msg_reply(XS_DEBUG, 0, req, 3); 4.85 - repmsg = reply; 4.86 + reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req)); 4.87 DEBUG("Got a reply, type %d, id %d, len %d.\n", 4.88 - repmsg->type, repmsg->req_id, repmsg->len); 4.89 + reply->type, reply->req_id, reply->len); 4.90 } 4.91 4.92 /* List the contents of a directory. Returns a malloc()ed array of 4.93 pointers to malloc()ed strings. The array is NULL terminated. May 4.94 block. */ 4.95 -static char **xenbus_ls(const char *pre) 4.96 +char *xenbus_ls(const char *pre, char ***contents) 4.97 { 4.98 - void *reply; 4.99 - struct xsd_sockmsg *repmsg; 4.100 + struct xsd_sockmsg *reply, *repmsg; 4.101 struct write_req req[] = { { pre, strlen(pre)+1 } }; 4.102 int nr_elems, x, i; 4.103 char **res; 4.104 4.105 - repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, 1); 4.106 + repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req)); 4.107 + char *msg = errmsg(repmsg); 4.108 + if (msg) { 4.109 + *contents = NULL; 4.110 + return msg; 4.111 + } 4.112 reply = repmsg + 1; 4.113 for (x = nr_elems = 0; x < repmsg->len; x++) 4.114 nr_elems += (((char *)reply)[x] == 0); 4.115 @@ -329,20 +347,91 @@ static char **xenbus_ls(const char *pre) 4.116 } 4.117 res[i] = NULL; 4.118 free(repmsg); 4.119 - return res; 4.120 + *contents = res; 4.121 + return NULL; 4.122 } 4.123 4.124 -static char *xenbus_read(const char *path) 4.125 +char *xenbus_read(const char *path, char **value) 4.126 { 4.127 - struct write_req req[] = { {path, strlen(path) + 1}}; 4.128 + struct write_req req[] = { {path, strlen(path) + 1} }; 4.129 struct xsd_sockmsg *rep; 4.130 char *res; 4.131 - rep = xenbus_msg_reply(XS_READ, 0, req, 1); 4.132 + rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req)); 4.133 + char *msg = errmsg(rep); 4.134 + if (msg) { 4.135 + *value = NULL; 4.136 + return msg; 4.137 + } 4.138 res = malloc(rep->len + 1); 4.139 memcpy(res, rep + 1, rep->len); 4.140 res[rep->len] = 0; 4.141 free(rep); 4.142 - return res; 4.143 + *value = res; 4.144 + return NULL; 4.145 +} 4.146 + 4.147 +char *xenbus_write(const char *path, const char *value) 4.148 +{ 4.149 + struct write_req req[] = { 4.150 + {path, strlen(path) + 1}, 4.151 + {value, strlen(value) + 1}, 4.152 + }; 4.153 + struct xsd_sockmsg *rep; 4.154 + rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req)); 4.155 + char *msg = errmsg(rep); 4.156 + if (msg) 4.157 + return msg; 4.158 + free(rep); 4.159 + return NULL; 4.160 +} 4.161 + 4.162 +char *xenbus_rm(const char *path) 4.163 +{ 4.164 + struct write_req req[] = { {path, strlen(path) + 1} }; 4.165 + struct xsd_sockmsg *rep; 4.166 + rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req)); 4.167 + char *msg = errmsg(rep); 4.168 + if (msg) 4.169 + return msg; 4.170 + free(rep); 4.171 + return NULL; 4.172 +} 4.173 + 4.174 +char *xenbus_get_perms(const char *path, char **value) 4.175 +{ 4.176 + struct write_req req[] = { {path, strlen(path) + 1} }; 4.177 + struct xsd_sockmsg *rep; 4.178 + char *res; 4.179 + rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req)); 4.180 + char *msg = errmsg(rep); 4.181 + if (msg) { 4.182 + *value = NULL; 4.183 + return msg; 4.184 + } 4.185 + res = malloc(rep->len + 1); 4.186 + memcpy(res, rep + 1, rep->len); 4.187 + res[rep->len] = 0; 4.188 + free(rep); 4.189 + *value = res; 4.190 + return NULL; 4.191 +} 4.192 + 4.193 +#define PERM_MAX_SIZE 32 4.194 +char *xenbus_set_perms(const char *path, domid_t dom, char perm) 4.195 +{ 4.196 + char value[PERM_MAX_SIZE]; 4.197 + snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom); 4.198 + struct write_req req[] = { 4.199 + {path, strlen(path) + 1}, 4.200 + {value, strlen(value) + 1}, 4.201 + }; 4.202 + struct xsd_sockmsg *rep; 4.203 + rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req)); 4.204 + char *msg = errmsg(rep); 4.205 + if (msg) 4.206 + return msg; 4.207 + free(rep); 4.208 + return NULL; 4.209 } 4.210 4.211 static void do_ls_test(const char *pre) 4.212 @@ -351,7 +440,12 @@ static void do_ls_test(const char *pre) 4.213 int x; 4.214 4.215 DEBUG("ls %s...\n", pre); 4.216 - dirs = xenbus_ls(pre); 4.217 + char *msg = xenbus_ls(pre, &dirs); 4.218 + if (msg) { 4.219 + DEBUG("Error in xenbus ls: %s\n", msg); 4.220 + free(msg); 4.221 + return; 4.222 + } 4.223 for (x = 0; dirs[x]; x++) 4.224 { 4.225 DEBUG("ls %s[%d] -> %s\n", pre, x, dirs[x]); 4.226 @@ -364,11 +458,40 @@ static void do_read_test(const char *pat 4.227 { 4.228 char *res; 4.229 DEBUG("Read %s...\n", path); 4.230 - res = xenbus_read(path); 4.231 + char *msg = xenbus_read(path, &res); 4.232 + if (msg) { 4.233 + DEBUG("Error in xenbus read: %s\n", msg); 4.234 + free(msg); 4.235 + return; 4.236 + } 4.237 DEBUG("Read %s -> %s.\n", path, res); 4.238 free(res); 4.239 } 4.240 4.241 +static void do_write_test(const char *path, const char *val) 4.242 +{ 4.243 + DEBUG("Write %s to %s...\n", val, path); 4.244 + char *msg = xenbus_write(path, val); 4.245 + if (msg) { 4.246 + DEBUG("Result %s\n", msg); 4.247 + free(msg); 4.248 + } else { 4.249 + DEBUG("Success.\n"); 4.250 + } 4.251 +} 4.252 + 4.253 +static void do_rm_test(const char *path) 4.254 +{ 4.255 + DEBUG("rm %s...\n", path); 4.256 + char *msg = xenbus_rm(path); 4.257 + if (msg) { 4.258 + DEBUG("Result %s\n", msg); 4.259 + free(msg); 4.260 + } else { 4.261 + DEBUG("Success.\n"); 4.262 + } 4.263 +} 4.264 + 4.265 /* Simple testing thing */ 4.266 void test_xenbus(void) 4.267 { 4.268 @@ -383,5 +506,22 @@ void test_xenbus(void) 4.269 DEBUG("Doing read test.\n"); 4.270 do_read_test("device/vif/0/mac"); 4.271 do_read_test("device/vif/0/backend"); 4.272 - printk("Xenbus initialised.\n"); 4.273 + 4.274 + DEBUG("Doing write test.\n"); 4.275 + do_write_test("device/vif/0/flibble", "flobble"); 4.276 + do_read_test("device/vif/0/flibble"); 4.277 + do_write_test("device/vif/0/flibble", "widget"); 4.278 + do_read_test("device/vif/0/flibble"); 4.279 + 4.280 + DEBUG("Doing rm test.\n"); 4.281 + do_rm_test("device/vif/0/flibble"); 4.282 + do_read_test("device/vif/0/flibble"); 4.283 + DEBUG("(Should have said ENOENT)\n"); 4.284 } 4.285 + 4.286 +/* 4.287 + * Local variables: 4.288 + * mode: C 4.289 + * c-basic-offset: 4 4.290 + * End: 4.291 + */
5.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Thu Jun 15 10:02:53 2006 -0600 5.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c Thu Jun 15 10:23:57 2006 -0600 5.3 @@ -133,6 +133,7 @@ void xen_tlb_flush(void) 5.4 op.cmd = MMUEXT_TLB_FLUSH_LOCAL; 5.5 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); 5.6 } 5.7 +EXPORT_SYMBOL(xen_tlb_flush); 5.8 5.9 void xen_invlpg(unsigned long ptr) 5.10 { 5.11 @@ -141,6 +142,7 @@ void xen_invlpg(unsigned long ptr) 5.12 op.arg1.linear_addr = ptr & PAGE_MASK; 5.13 BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0); 5.14 } 5.15 +EXPORT_SYMBOL(xen_invlpg); 5.16 5.17 #ifdef CONFIG_SMP 5.18 5.19 @@ -363,7 +365,8 @@ void xen_destroy_contiguous_region(unsig 5.20 }; 5.21 set_xen_guest_handle(reservation.extent_start, &frame); 5.22 5.23 - if (xen_feature(XENFEAT_auto_translated_physmap)) 5.24 + if (xen_feature(XENFEAT_auto_translated_physmap) || 5.25 + !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap)) 5.26 return; 5.27 5.28 scrub_pages(vstart, 1 << order);
6.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Thu Jun 15 10:02:53 2006 -0600 6.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Thu Jun 15 10:23:57 2006 -0600 6.3 @@ -763,7 +763,7 @@ void __init pgtable_cache_init(void) 6.4 #endif 6.5 0, 6.6 pgd_ctor, 6.7 - pgd_dtor); 6.8 + PTRS_PER_PMD == 1 ? pgd_dtor : NULL); 6.9 if (!pgd_cache) 6.10 panic("pgtable_cache_init(): Cannot create pgd cache"); 6.11 }
7.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jun 15 10:02:53 2006 -0600 7.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Jun 15 10:23:57 2006 -0600 7.3 @@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c 7.4 unsigned long flags; 7.5 7.6 if (PTRS_PER_PMD > 1) { 7.7 - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) { 7.8 - int rc = xen_create_contiguous_region( 7.9 - (unsigned long)pgd, 0, 32); 7.10 - BUG_ON(rc); 7.11 - } 7.12 if (HAVE_SHARED_KERNEL_PMD) 7.13 clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, 7.14 swapper_pg_dir + USER_PTRS_PER_PGD, 7.15 @@ -320,69 +315,105 @@ void pgd_ctor(void *pgd, kmem_cache_t *c 7.16 } 7.17 } 7.18 7.19 +/* never called when PTRS_PER_PMD > 1 */ 7.20 void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) 7.21 { 7.22 unsigned long flags; /* can be called from interrupt context */ 7.23 7.24 - if (PTRS_PER_PMD > 1) { 7.25 - if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) 7.26 - xen_destroy_contiguous_region((unsigned long)pgd, 0); 7.27 - } else { 7.28 - spin_lock_irqsave(&pgd_lock, flags); 7.29 - pgd_list_del(pgd); 7.30 - spin_unlock_irqrestore(&pgd_lock, flags); 7.31 + spin_lock_irqsave(&pgd_lock, flags); 7.32 + pgd_list_del(pgd); 7.33 + spin_unlock_irqrestore(&pgd_lock, flags); 7.34 7.35 - pgd_test_and_unpin(pgd); 7.36 - } 7.37 + pgd_test_and_unpin(pgd); 7.38 } 7.39 7.40 pgd_t *pgd_alloc(struct mm_struct *mm) 7.41 { 7.42 int i; 7.43 pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); 7.44 + pmd_t **pmd; 7.45 + unsigned long flags; 7.46 7.47 pgd_test_and_unpin(pgd); 7.48 7.49 if (PTRS_PER_PMD == 1 || !pgd) 7.50 return pgd; 7.51 7.52 - for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 7.53 - pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 7.54 - if (!pmd) 7.55 - goto out_oom; 7.56 - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 7.57 - } 7.58 - 7.59 - if (!HAVE_SHARED_KERNEL_PMD) { 7.60 - unsigned long flags; 7.61 - 7.62 - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { 7.63 + if (HAVE_SHARED_KERNEL_PMD) { 7.64 + for (i = 0; i < USER_PTRS_PER_PGD; ++i) { 7.65 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 7.66 if (!pmd) 7.67 goto out_oom; 7.68 set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); 7.69 } 7.70 + return pgd; 7.71 + } 7.72 7.73 - spin_lock_irqsave(&pgd_lock, flags); 7.74 - for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { 7.75 - unsigned long v = (unsigned long)i << PGDIR_SHIFT; 7.76 - pgd_t *kpgd = pgd_offset_k(v); 7.77 - pud_t *kpud = pud_offset(kpgd, v); 7.78 - pmd_t *kpmd = pmd_offset(kpud, v); 7.79 - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); 7.80 - memcpy(pmd, kpmd, PAGE_SIZE); 7.81 - make_lowmem_page_readonly( 7.82 - pmd, XENFEAT_writable_page_tables); 7.83 + /* 7.84 + * We can race save/restore (if we sleep during a GFP_KERNEL memory 7.85 + * allocation). We therefore store virtual addresses of pmds as they 7.86 + * do not change across save/restore, and poke the machine addresses 7.87 + * into the pgdir under the pgd_lock. 7.88 + */ 7.89 + pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL); 7.90 + if (!pmd) { 7.91 + kmem_cache_free(pgd_cache, pgd); 7.92 + return NULL; 7.93 + } 7.94 + 7.95 + /* Allocate pmds, remember virtual addresses. */ 7.96 + for (i = 0; i < PTRS_PER_PGD; ++i) { 7.97 + pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL); 7.98 + if (!pmd[i]) 7.99 + goto out_oom; 7.100 + } 7.101 + 7.102 + spin_lock_irqsave(&pgd_lock, flags); 7.103 + 7.104 + /* Protect against save/restore: move below 4GB under pgd_lock. */ 7.105 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) { 7.106 + int rc = xen_create_contiguous_region( 7.107 + (unsigned long)pgd, 0, 32); 7.108 + if (rc) { 7.109 + spin_unlock_irqrestore(&pgd_lock, flags); 7.110 + goto out_oom; 7.111 } 7.112 - pgd_list_add(pgd); 7.113 - spin_unlock_irqrestore(&pgd_lock, flags); 7.114 + } 7.115 + 7.116 + /* Copy kernel pmd contents and write-protect the new pmds. */ 7.117 + for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { 7.118 + unsigned long v = (unsigned long)i << PGDIR_SHIFT; 7.119 + pgd_t *kpgd = pgd_offset_k(v); 7.120 + pud_t *kpud = pud_offset(kpgd, v); 7.121 + pmd_t *kpmd = pmd_offset(kpud, v); 7.122 + memcpy(pmd[i], kpmd, PAGE_SIZE); 7.123 + make_lowmem_page_readonly( 7.124 + pmd[i], XENFEAT_writable_page_tables); 7.125 } 7.126 7.127 + /* It is safe to poke machine addresses of pmds under the pmd_lock. */ 7.128 + for (i = 0; i < PTRS_PER_PGD; i++) 7.129 + set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i]))); 7.130 + 7.131 + /* Ensure this pgd gets picked up and pinned on save/restore. */ 7.132 + pgd_list_add(pgd); 7.133 + 7.134 + spin_unlock_irqrestore(&pgd_lock, flags); 7.135 + 7.136 + kfree(pmd); 7.137 + 7.138 return pgd; 7.139 7.140 out_oom: 7.141 - for (i--; i >= 0; i--) 7.142 - kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); 7.143 + if (HAVE_SHARED_KERNEL_PMD) { 7.144 + for (i--; i >= 0; i--) 7.145 + kmem_cache_free(pmd_cache, 7.146 + (void *)__va(pgd_val(pgd[i])-1)); 7.147 + } else { 7.148 + for (i--; i >= 0; i--) 7.149 + kmem_cache_free(pmd_cache, pmd[i]); 7.150 + kfree(pmd); 7.151 + } 7.152 kmem_cache_free(pgd_cache, pgd); 7.153 return NULL; 7.154 } 7.155 @@ -391,6 +422,14 @@ void pgd_free(pgd_t *pgd) 7.156 { 7.157 int i; 7.158 7.159 + /* 7.160 + * After this the pgd should not be pinned for the duration of this 7.161 + * function's execution. We should never sleep and thus never race: 7.162 + * 1. User pmds will not become write-protected under our feet due 7.163 + * to a concurrent mm_pin_all(). 7.164 + * 2. The machine addresses in PGD entries will not become invalid 7.165 + * due to a concurrent save/restore. 7.166 + */ 7.167 pgd_test_and_unpin(pgd); 7.168 7.169 /* in the PAE case user pgd entries are overwritten before usage */ 7.170 @@ -399,11 +438,13 @@ void pgd_free(pgd_t *pgd) 7.171 pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); 7.172 kmem_cache_free(pmd_cache, pmd); 7.173 } 7.174 + 7.175 if (!HAVE_SHARED_KERNEL_PMD) { 7.176 unsigned long flags; 7.177 spin_lock_irqsave(&pgd_lock, flags); 7.178 pgd_list_del(pgd); 7.179 spin_unlock_irqrestore(&pgd_lock, flags); 7.180 + 7.181 for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) { 7.182 pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1); 7.183 make_lowmem_page_writable( 7.184 @@ -411,8 +452,13 @@ void pgd_free(pgd_t *pgd) 7.185 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); 7.186 kmem_cache_free(pmd_cache, pmd); 7.187 } 7.188 + 7.189 + if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) 7.190 + xen_destroy_contiguous_region( 7.191 + (unsigned long)pgd, 0); 7.192 } 7.193 } 7.194 + 7.195 /* in the non-PAE case, free_pgtables() clears user pgd entries */ 7.196 kmem_cache_free(pgd_cache, pgd); 7.197 } 7.198 @@ -588,7 +634,7 @@ static void pgd_test_and_unpin(pgd_t *pg 7.199 void mm_pin(struct mm_struct *mm) 7.200 { 7.201 if (xen_feature(XENFEAT_writable_page_tables)) 7.202 - return; 7.203 + return; 7.204 spin_lock(&mm->page_table_lock); 7.205 __pgd_pin(mm->pgd); 7.206 spin_unlock(&mm->page_table_lock); 7.207 @@ -597,7 +643,7 @@ void mm_pin(struct mm_struct *mm) 7.208 void mm_unpin(struct mm_struct *mm) 7.209 { 7.210 if (xen_feature(XENFEAT_writable_page_tables)) 7.211 - return; 7.212 + return; 7.213 spin_lock(&mm->page_table_lock); 7.214 __pgd_unpin(mm->pgd); 7.215 spin_unlock(&mm->page_table_lock); 7.216 @@ -607,13 +653,19 @@ void mm_pin_all(void) 7.217 { 7.218 struct page *page; 7.219 if (xen_feature(XENFEAT_writable_page_tables)) 7.220 - return; 7.221 + return; 7.222 for (page = pgd_list; page; page = (struct page *)page->index) { 7.223 if (!test_bit(PG_pinned, &page->flags)) 7.224 __pgd_pin((pgd_t *)page_address(page)); 7.225 } 7.226 } 7.227 7.228 +void _arch_dup_mmap(struct mm_struct *mm) 7.229 +{ 7.230 + if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags)) 7.231 + mm_pin(mm); 7.232 +} 7.233 + 7.234 void _arch_exit_mmap(struct mm_struct *mm) 7.235 { 7.236 struct task_struct *tsk = current;
8.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jun 15 10:02:53 2006 -0600 8.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Jun 15 10:23:57 2006 -0600 8.3 @@ -130,6 +130,12 @@ void mm_pin_all(void) 8.4 context.unpinned)); 8.5 } 8.6 8.7 +void _arch_dup_mmap(struct mm_struct *mm) 8.8 +{ 8.9 + if (!mm->context.pinned) 8.10 + mm_pin(mm); 8.11 +} 8.12 + 8.13 void _arch_exit_mmap(struct mm_struct *mm) 8.14 { 8.15 struct task_struct *tsk = current;
9.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Jun 15 10:02:53 2006 -0600 9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Jun 15 10:23:57 2006 -0600 9.3 @@ -342,9 +342,21 @@ static void connect(struct blkfront_info 9.4 static void blkfront_closing(struct xenbus_device *dev) 9.5 { 9.6 struct blkfront_info *info = dev->dev.driver_data; 9.7 + unsigned long flags; 9.8 9.9 DPRINTK("blkfront_closing: %s removed\n", dev->nodename); 9.10 9.11 + if (info->rq == NULL) 9.12 + return; 9.13 + 9.14 + spin_lock_irqsave(&blkif_io_lock, flags); 9.15 + /* No more blkif_request(). */ 9.16 + blk_stop_queue(info->rq); 9.17 + /* No more gnttab callback work. */ 9.18 + gnttab_cancel_free_callback(&info->callback); 9.19 + flush_scheduled_work(); 9.20 + spin_unlock_irqrestore(&blkif_io_lock, flags); 9.21 + 9.22 xlvbd_del(info); 9.23 9.24 xenbus_switch_state(dev, XenbusStateClosed); 9.25 @@ -407,7 +419,8 @@ static void blkif_restart_queue(void *ar 9.26 { 9.27 struct blkfront_info *info = (struct blkfront_info *)arg; 9.28 spin_lock_irq(&blkif_io_lock); 9.29 - kick_pending_request_queues(info); 9.30 + if (info->connected == BLKIF_STATE_CONNECTED) 9.31 + kick_pending_request_queues(info); 9.32 spin_unlock_irq(&blkif_io_lock); 9.33 } 9.34 9.35 @@ -695,6 +708,12 @@ static void blkif_free(struct blkfront_i 9.36 spin_lock_irq(&blkif_io_lock); 9.37 info->connected = suspend ? 9.38 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 9.39 + /* No more blkif_request(). */ 9.40 + if (info->rq) 9.41 + blk_stop_queue(info->rq); 9.42 + /* No more gnttab callback work. */ 9.43 + gnttab_cancel_free_callback(&info->callback); 9.44 + flush_scheduled_work(); 9.45 spin_unlock_irq(&blkif_io_lock); 9.46 9.47 /* Free resources associated with old device channel. */ 9.48 @@ -768,17 +787,17 @@ static void blkif_recover(struct blkfron 9.49 9.50 (void)xenbus_switch_state(info->xbdev, XenbusStateConnected); 9.51 9.52 + spin_lock_irq(&blkif_io_lock); 9.53 + 9.54 /* Now safe for us to use the shared ring */ 9.55 - spin_lock_irq(&blkif_io_lock); 9.56 info->connected = BLKIF_STATE_CONNECTED; 9.57 - spin_unlock_irq(&blkif_io_lock); 9.58 9.59 /* Send off requeued requests */ 9.60 flush_requests(info); 9.61 9.62 /* Kick any other new requests queued since we resumed */ 9.63 - spin_lock_irq(&blkif_io_lock); 9.64 kick_pending_request_queues(info); 9.65 + 9.66 spin_unlock_irq(&blkif_io_lock); 9.67 } 9.68
10.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu Jun 15 10:02:53 2006 -0600 10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c Thu Jun 15 10:23:57 2006 -0600 10.3 @@ -334,6 +334,21 @@ out: 10.4 } 10.5 EXPORT_SYMBOL_GPL(gnttab_request_free_callback); 10.6 10.7 +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) 10.8 +{ 10.9 + struct gnttab_free_callback **pcb; 10.10 + unsigned long flags; 10.11 + 10.12 + spin_lock_irqsave(&gnttab_list_lock, flags); 10.13 + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { 10.14 + if (*pcb == callback) { 10.15 + *pcb = callback->next; 10.16 + break; 10.17 + } 10.18 + } 10.19 + spin_unlock_irqrestore(&gnttab_list_lock, flags); 10.20 +} 10.21 + 10.22 #ifndef __ia64__ 10.23 static int map_pte_fn(pte_t *pte, struct page *pmd_page, 10.24 unsigned long addr, void *data)
11.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Thu Jun 15 10:02:53 2006 -0600 11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c Thu Jun 15 10:23:57 2006 -0600 11.3 @@ -121,8 +121,15 @@ static int __init skbuff_init(void) 11.4 for (order = 0; order <= MAX_SKBUFF_ORDER; order++) { 11.5 size = PAGE_SIZE << order; 11.6 sprintf(name[order], "xen-skb-%lu", size); 11.7 - skbuff_order_cachep[order] = kmem_cache_create( 11.8 - name[order], size, size, 0, skbuff_ctor, skbuff_dtor); 11.9 + if (is_running_on_xen() && 11.10 + (xen_start_info->flags & SIF_PRIVILEGED)) 11.11 + skbuff_order_cachep[order] = kmem_cache_create( 11.12 + name[order], size, size, 0, 11.13 + skbuff_ctor, skbuff_dtor); 11.14 + else 11.15 + skbuff_order_cachep[order] = kmem_cache_create( 11.16 + name[order], size, size, 0, NULL, NULL); 11.17 + 11.18 } 11.19 11.20 skbuff_cachep = skbuff_order_cachep[0];
12.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jun 15 10:02:53 2006 -0600 12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jun 15 10:23:57 2006 -0600 12.3 @@ -1072,68 +1072,39 @@ static void xennet_set_features(struct n 12.4 12.5 static void network_connect(struct net_device *dev) 12.6 { 12.7 - struct netfront_info *np; 12.8 + struct netfront_info *np = netdev_priv(dev); 12.9 int i, requeue_idx; 12.10 - struct netif_tx_request *tx; 12.11 struct sk_buff *skb; 12.12 12.13 xennet_set_features(dev); 12.14 12.15 - np = netdev_priv(dev); 12.16 spin_lock_irq(&np->tx_lock); 12.17 spin_lock(&np->rx_lock); 12.18 12.19 - /* Recovery procedure: */ 12.20 - 12.21 /* 12.22 - * Step 1: Rebuild the RX and TX ring contents. 12.23 - * NB. We could just free the queued TX packets now but we hope 12.24 - * that sending them out might do some good. We have to rebuild 12.25 - * the RX ring because some of our pages are currently flipped out 12.26 - * so we can't just free the RX skbs. 12.27 - * NB2. Freelist index entries are always going to be less than 12.28 + * Recovery procedure: 12.29 + * NB. Freelist index entries are always going to be less than 12.30 * PAGE_OFFSET, whereas pointers to skbs will always be equal or 12.31 - * greater than PAGE_OFFSET: we use this property to distinguish 12.32 - * them. 12.33 - */ 12.34 + * greater than PAGE_OFFSET: we use this property to distinguish 12.35 + * them. 12.36 + */ 12.37 12.38 - /* 12.39 - * Rebuild the TX buffer freelist and the TX ring itself. 12.40 - * NB. This reorders packets. We could keep more private state 12.41 - * to avoid this but maybe it doesn't matter so much given the 12.42 - * interface has been down. 12.43 - */ 12.44 + /* Step 1: Discard all pending TX packet fragments. */ 12.45 for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) { 12.46 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET) 12.47 continue; 12.48 12.49 skb = np->tx_skbs[i]; 12.50 - 12.51 - tx = RING_GET_REQUEST(&np->tx, requeue_idx); 12.52 - requeue_idx++; 12.53 - 12.54 - tx->id = i; 12.55 - gnttab_grant_foreign_access_ref( 12.56 - np->grant_tx_ref[i], np->xbdev->otherend_id, 12.57 - virt_to_mfn(np->tx_skbs[i]->data), 12.58 - GNTMAP_readonly); 12.59 - tx->gref = np->grant_tx_ref[i]; 12.60 - tx->offset = (unsigned long)skb->data & ~PAGE_MASK; 12.61 - tx->size = skb->len; 12.62 - tx->flags = 0; 12.63 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ 12.64 - tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; 12.65 - if (skb->proto_data_valid) /* remote but checksummed? */ 12.66 - tx->flags |= NETTXF_data_validated; 12.67 - 12.68 - np->stats.tx_bytes += skb->len; 12.69 - np->stats.tx_packets++; 12.70 + gnttab_end_foreign_access_ref( 12.71 + np->grant_tx_ref[i], GNTMAP_readonly); 12.72 + gnttab_release_grant_reference( 12.73 + &np->gref_tx_head, np->grant_tx_ref[i]); 12.74 + np->grant_tx_ref[i] = GRANT_INVALID_REF; 12.75 + add_id_to_freelist(np->tx_skbs, i); 12.76 + dev_kfree_skb_irq(skb); 12.77 } 12.78 12.79 - np->tx.req_prod_pvt = requeue_idx; 12.80 - RING_PUSH_REQUESTS(&np->tx); 12.81 - 12.82 - /* Rebuild the RX buffer freelist and the RX ring itself. */ 12.83 + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ 12.84 for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { 12.85 if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET) 12.86 continue; 12.87 @@ -1150,7 +1121,7 @@ static void network_connect(struct net_d 12.88 RING_PUSH_REQUESTS(&np->rx); 12.89 12.90 /* 12.91 - * Step 2: All public and private state should now be sane. Get 12.92 + * Step 3: All public and private state should now be sane. Get 12.93 * ready to start sending and receiving packets and give the driver 12.94 * domain a kick because we've probably just requeued some 12.95 * packets. 12.96 @@ -1158,6 +1129,7 @@ static void network_connect(struct net_d 12.97 netif_carrier_on(dev); 12.98 notify_remote_via_irq(np->irq); 12.99 network_tx_buf_gc(dev); 12.100 + network_alloc_rx_buffers(dev); 12.101 12.102 spin_unlock(&np->rx_lock); 12.103 spin_unlock_irq(&np->tx_lock);
13.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h Thu Jun 15 10:02:53 2006 -0600 13.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h Thu Jun 15 10:23:57 2006 -0600 13.3 @@ -18,4 +18,8 @@ typedef struct { 13.4 extern void _arch_exit_mmap(struct mm_struct *mm); 13.5 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) 13.6 13.7 +/* kernel/fork.c:dup_mmap hook */ 13.8 +extern void _arch_dup_mmap(struct mm_struct *mm); 13.9 +#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) 13.10 + 13.11 #endif
14.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Thu Jun 15 10:02:53 2006 -0600 14.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h Thu Jun 15 10:23:57 2006 -0600 14.3 @@ -51,8 +51,7 @@ static inline void switch_mm(struct mm_s 14.4 struct mmuext_op _op[2], *op = _op; 14.5 14.6 if (likely(prev != next)) { 14.7 - if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) 14.8 - mm_pin(next); 14.9 + BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)); 14.10 14.11 /* stop flush ipis for the previous mm */ 14.12 cpu_clear(cpu, prev->cpu_vm_mask); 14.13 @@ -99,7 +98,11 @@ static inline void switch_mm(struct mm_s 14.14 #define deactivate_mm(tsk, mm) \ 14.15 asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) 14.16 14.17 -#define activate_mm(prev, next) \ 14.18 - switch_mm((prev),(next),NULL) 14.19 +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) 14.20 +{ 14.21 + if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags)) 14.22 + mm_pin(next); 14.23 + switch_mm(prev, next, NULL); 14.24 +} 14.25 14.26 #endif
15.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Thu Jun 15 10:02:53 2006 -0600 15.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h Thu Jun 15 10:23:57 2006 -0600 15.3 @@ -25,9 +25,9 @@ static char * __init machine_specific_me 15.4 if ( rc == -ENOSYS ) { 15.5 memmap.nr_entries = 1; 15.6 map[0].addr = 0ULL; 15.7 - map[0].size = xen_start_info->nr_pages << PAGE_SHIFT; 15.8 + map[0].size = PFN_PHYS(xen_start_info->nr_pages); 15.9 /* 8MB slack (to balance backend allocations). */ 15.10 - map[0].size += 8 << 20; 15.11 + map[0].size += 8ULL << 20; 15.12 map[0].type = E820_RAM; 15.13 rc = 0; 15.14 }
16.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h Thu Jun 15 10:02:53 2006 -0600 16.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h Thu Jun 15 10:23:57 2006 -0600 16.3 @@ -28,6 +28,10 @@ extern spinlock_t mm_unpinned_lock; 16.4 /* mm/memory.c:exit_mmap hook */ 16.5 extern void _arch_exit_mmap(struct mm_struct *mm); 16.6 #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm) 16.7 + 16.8 +/* kernel/fork.c:dup_mmap hook */ 16.9 +extern void _arch_dup_mmap(struct mm_struct *mm); 16.10 +#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm)) 16.11 #endif 16.12 16.13 #endif
17.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h Thu Jun 15 10:02:53 2006 -0600 17.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h Thu Jun 15 10:23:57 2006 -0600 17.3 @@ -73,8 +73,7 @@ static inline void switch_mm(struct mm_s 17.4 struct mmuext_op _op[3], *op = _op; 17.5 17.6 if (likely(prev != next)) { 17.7 - if (!next->context.pinned) 17.8 - mm_pin(next); 17.9 + BUG_ON(!next->context.pinned); 17.10 17.11 /* stop flush ipis for the previous mm */ 17.12 clear_bit(cpu, &prev->cpu_vm_mask); 17.13 @@ -127,8 +126,11 @@ static inline void switch_mm(struct mm_s 17.14 asm volatile("movl %0,%%fs"::"r"(0)); \ 17.15 } while(0) 17.16 17.17 -#define activate_mm(prev, next) do { \ 17.18 - switch_mm((prev),(next),NULL); \ 17.19 -} while (0) 17.20 +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) 17.21 +{ 17.22 + if (!next->context.pinned) 17.23 + mm_pin(next); 17.24 + switch_mm(prev, next, NULL); 17.25 +} 17.26 17.27 #endif
18.1 --- a/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:02:53 2006 -0600 18.2 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h Thu Jun 15 10:23:57 2006 -0600 18.3 @@ -100,6 +100,7 @@ void gnttab_release_grant_reference(gran 18.4 18.5 void gnttab_request_free_callback(struct gnttab_free_callback *callback, 18.6 void (*fn)(void *), void *arg, u16 count); 18.7 +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); 18.8 18.9 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 18.10 unsigned long frame, int readonly);
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/linux-2.6-xen-sparse/kernel/fork.c Thu Jun 15 10:23:57 2006 -0600 19.3 @@ -0,0 +1,1619 @@ 19.4 +/* 19.5 + * linux/kernel/fork.c 19.6 + * 19.7 + * Copyright (C) 1991, 1992 Linus Torvalds 19.8 + */ 19.9 + 19.10 +/* 19.11 + * 'fork.c' contains the help-routines for the 'fork' system call 19.12 + * (see also entry.S and others). 19.13 + * Fork is rather simple, once you get the hang of it, but the memory 19.14 + * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' 19.15 + */ 19.16 + 19.17 +#include <linux/config.h> 19.18 +#include <linux/slab.h> 19.19 +#include <linux/init.h> 19.20 +#include <linux/unistd.h> 19.21 +#include <linux/smp_lock.h> 19.22 +#include <linux/module.h> 19.23 +#include <linux/vmalloc.h> 19.24 +#include <linux/completion.h> 19.25 +#include <linux/namespace.h> 19.26 +#include <linux/personality.h> 19.27 +#include <linux/mempolicy.h> 19.28 +#include <linux/sem.h> 19.29 +#include <linux/file.h> 19.30 +#include <linux/key.h> 19.31 +#include <linux/binfmts.h> 19.32 +#include <linux/mman.h> 19.33 +#include <linux/fs.h> 19.34 +#include <linux/capability.h> 19.35 +#include <linux/cpu.h> 19.36 +#include <linux/cpuset.h> 19.37 +#include <linux/security.h> 19.38 +#include <linux/swap.h> 19.39 +#include <linux/syscalls.h> 19.40 +#include <linux/jiffies.h> 19.41 +#include <linux/futex.h> 19.42 +#include <linux/rcupdate.h> 19.43 +#include <linux/ptrace.h> 19.44 +#include <linux/mount.h> 19.45 +#include <linux/audit.h> 19.46 +#include <linux/profile.h> 19.47 +#include <linux/rmap.h> 19.48 +#include <linux/acct.h> 19.49 +#include <linux/cn_proc.h> 19.50 + 19.51 +#include <asm/pgtable.h> 19.52 +#include <asm/pgalloc.h> 19.53 +#include <asm/uaccess.h> 19.54 +#include <asm/mmu_context.h> 19.55 +#include <asm/cacheflush.h> 19.56 +#include <asm/tlbflush.h> 19.57 + 19.58 +/* 19.59 + * Protected counters by write_lock_irq(&tasklist_lock) 19.60 + */ 19.61 +unsigned long total_forks; /* Handle normal Linux uptimes. */ 19.62 +int nr_threads; /* The idle threads do not count.. */ 19.63 + 19.64 +int max_threads; /* tunable limit on nr_threads */ 19.65 + 19.66 +DEFINE_PER_CPU(unsigned long, process_counts) = 0; 19.67 + 19.68 + __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 19.69 + 19.70 +EXPORT_SYMBOL(tasklist_lock); 19.71 + 19.72 +int nr_processes(void) 19.73 +{ 19.74 + int cpu; 19.75 + int total = 0; 19.76 + 19.77 + for_each_online_cpu(cpu) 19.78 + total += per_cpu(process_counts, cpu); 19.79 + 19.80 + return total; 19.81 +} 19.82 + 19.83 +#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 19.84 +# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) 19.85 +# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) 19.86 +static kmem_cache_t *task_struct_cachep; 19.87 +#endif 19.88 + 19.89 +/* SLAB cache for signal_struct structures (tsk->signal) */ 19.90 +kmem_cache_t *signal_cachep; 19.91 + 19.92 +/* SLAB cache for sighand_struct structures (tsk->sighand) */ 19.93 +kmem_cache_t *sighand_cachep; 19.94 + 19.95 +/* SLAB cache for files_struct structures (tsk->files) */ 19.96 +kmem_cache_t *files_cachep; 19.97 + 19.98 +/* SLAB cache for fs_struct structures (tsk->fs) */ 19.99 +kmem_cache_t *fs_cachep; 19.100 + 19.101 +/* SLAB cache for vm_area_struct structures */ 19.102 +kmem_cache_t *vm_area_cachep; 19.103 + 19.104 +/* SLAB cache for mm_struct structures (tsk->mm) */ 19.105 +static kmem_cache_t *mm_cachep; 19.106 + 19.107 +void free_task(struct task_struct *tsk) 19.108 +{ 19.109 + free_thread_info(tsk->thread_info); 19.110 + free_task_struct(tsk); 19.111 +} 19.112 +EXPORT_SYMBOL(free_task); 19.113 + 19.114 +void __put_task_struct_cb(struct rcu_head *rhp) 19.115 +{ 19.116 + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 19.117 + 19.118 + WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 19.119 + WARN_ON(atomic_read(&tsk->usage)); 19.120 + WARN_ON(tsk == current); 19.121 + 19.122 + if (unlikely(tsk->audit_context)) 19.123 + audit_free(tsk); 19.124 + security_task_free(tsk); 19.125 + free_uid(tsk->user); 19.126 + put_group_info(tsk->group_info); 19.127 + 19.128 + if (!profile_handoff_task(tsk)) 19.129 + free_task(tsk); 19.130 +} 19.131 + 19.132 +void __init fork_init(unsigned long mempages) 19.133 +{ 19.134 +#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 19.135 +#ifndef ARCH_MIN_TASKALIGN 19.136 +#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 19.137 +#endif 19.138 + /* create a slab on which task_structs can be allocated */ 19.139 + task_struct_cachep = 19.140 + kmem_cache_create("task_struct", sizeof(struct task_struct), 19.141 + ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); 19.142 +#endif 19.143 + 19.144 + /* 19.145 + * The default maximum number of threads is set to a safe 19.146 + * value: the thread structures can take up at most half 19.147 + * of memory. 19.148 + */ 19.149 + max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE); 19.150 + 19.151 + /* 19.152 + * we need to allow at least 20 threads to boot a system 19.153 + */ 19.154 + if(max_threads < 20) 19.155 + max_threads = 20; 19.156 + 19.157 + init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; 19.158 + init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 19.159 + init_task.signal->rlim[RLIMIT_SIGPENDING] = 19.160 + init_task.signal->rlim[RLIMIT_NPROC]; 19.161 +} 19.162 + 19.163 +static struct task_struct *dup_task_struct(struct task_struct *orig) 19.164 +{ 19.165 + struct task_struct *tsk; 19.166 + struct thread_info *ti; 19.167 + 19.168 + prepare_to_copy(orig); 19.169 + 19.170 + tsk = alloc_task_struct(); 19.171 + if (!tsk) 19.172 + return NULL; 19.173 + 19.174 + ti = alloc_thread_info(tsk); 19.175 + if (!ti) { 19.176 + free_task_struct(tsk); 19.177 + return NULL; 19.178 + } 19.179 + 19.180 + *tsk = *orig; 19.181 + tsk->thread_info = ti; 19.182 + setup_thread_stack(tsk, orig); 19.183 + 19.184 + /* One for us, one for whoever does the "release_task()" (usually parent) */ 19.185 + atomic_set(&tsk->usage,2); 19.186 + atomic_set(&tsk->fs_excl, 0); 19.187 + return tsk; 19.188 +} 19.189 + 19.190 +#ifdef CONFIG_MMU 19.191 +static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 19.192 +{ 19.193 + struct vm_area_struct *mpnt, *tmp, **pprev; 19.194 + struct rb_node **rb_link, *rb_parent; 19.195 + int retval; 19.196 + unsigned long charge; 19.197 + struct mempolicy *pol; 19.198 + 19.199 + down_write(&oldmm->mmap_sem); 19.200 + flush_cache_mm(oldmm); 19.201 + down_write(&mm->mmap_sem); 19.202 + 19.203 + mm->locked_vm = 0; 19.204 + mm->mmap = NULL; 19.205 + mm->mmap_cache = NULL; 19.206 + mm->free_area_cache = oldmm->mmap_base; 19.207 + mm->cached_hole_size = ~0UL; 19.208 + mm->map_count = 0; 19.209 + cpus_clear(mm->cpu_vm_mask); 19.210 + mm->mm_rb = RB_ROOT; 19.211 + rb_link = &mm->mm_rb.rb_node; 19.212 + rb_parent = NULL; 19.213 + pprev = &mm->mmap; 19.214 + 19.215 + for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 19.216 + struct file *file; 19.217 + 19.218 + if (mpnt->vm_flags & VM_DONTCOPY) { 19.219 + long pages = vma_pages(mpnt); 19.220 + mm->total_vm -= pages; 19.221 + vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, 19.222 + -pages); 19.223 + continue; 19.224 + } 19.225 + charge = 0; 19.226 + if (mpnt->vm_flags & VM_ACCOUNT) { 19.227 + unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 19.228 + if (security_vm_enough_memory(len)) 19.229 + goto fail_nomem; 19.230 + charge = len; 19.231 + } 19.232 + tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); 19.233 + if (!tmp) 19.234 + goto fail_nomem; 19.235 + *tmp = *mpnt; 19.236 + pol = mpol_copy(vma_policy(mpnt)); 19.237 + retval = PTR_ERR(pol); 19.238 + if (IS_ERR(pol)) 19.239 + goto fail_nomem_policy; 19.240 + vma_set_policy(tmp, pol); 19.241 + tmp->vm_flags &= ~VM_LOCKED; 19.242 + tmp->vm_mm = mm; 19.243 + tmp->vm_next = NULL; 19.244 + anon_vma_link(tmp); 19.245 + file = tmp->vm_file; 19.246 + if (file) { 19.247 + struct inode *inode = file->f_dentry->d_inode; 19.248 + get_file(file); 19.249 + if (tmp->vm_flags & VM_DENYWRITE) 19.250 + atomic_dec(&inode->i_writecount); 19.251 + 19.252 + /* insert tmp into the share list, just after mpnt */ 19.253 + spin_lock(&file->f_mapping->i_mmap_lock); 19.254 + tmp->vm_truncate_count = mpnt->vm_truncate_count; 19.255 + flush_dcache_mmap_lock(file->f_mapping); 19.256 + vma_prio_tree_add(tmp, mpnt); 19.257 + flush_dcache_mmap_unlock(file->f_mapping); 19.258 + spin_unlock(&file->f_mapping->i_mmap_lock); 19.259 + } 19.260 + 19.261 + /* 19.262 + * Link in the new vma and copy the page table entries. 19.263 + */ 19.264 + *pprev = tmp; 19.265 + pprev = &tmp->vm_next; 19.266 + 19.267 + __vma_link_rb(mm, tmp, rb_link, rb_parent); 19.268 + rb_link = &tmp->vm_rb.rb_right; 19.269 + rb_parent = &tmp->vm_rb; 19.270 + 19.271 + mm->map_count++; 19.272 + retval = copy_page_range(mm, oldmm, mpnt); 19.273 + 19.274 + if (tmp->vm_ops && tmp->vm_ops->open) 19.275 + tmp->vm_ops->open(tmp); 19.276 + 19.277 + if (retval) 19.278 + goto out; 19.279 + } 19.280 +#ifdef arch_dup_mmap 19.281 + arch_dup_mmap(mm, oldmm); 19.282 +#endif 19.283 + retval = 0; 19.284 +out: 19.285 + up_write(&mm->mmap_sem); 19.286 + flush_tlb_mm(oldmm); 19.287 + up_write(&oldmm->mmap_sem); 19.288 + return retval; 19.289 +fail_nomem_policy: 19.290 + kmem_cache_free(vm_area_cachep, tmp); 19.291 +fail_nomem: 19.292 + retval = -ENOMEM; 19.293 + vm_unacct_memory(charge); 19.294 + goto out; 19.295 +} 19.296 + 19.297 +static inline int mm_alloc_pgd(struct mm_struct * mm) 19.298 +{ 19.299 + mm->pgd = pgd_alloc(mm); 19.300 + if (unlikely(!mm->pgd)) 19.301 + return -ENOMEM; 19.302 + return 0; 19.303 +} 19.304 + 19.305 +static inline void mm_free_pgd(struct mm_struct * mm) 19.306 +{ 19.307 + pgd_free(mm->pgd); 19.308 +} 19.309 +#else 19.310 +#define dup_mmap(mm, oldmm) (0) 19.311 +#define mm_alloc_pgd(mm) (0) 19.312 +#define mm_free_pgd(mm) 19.313 +#endif /* CONFIG_MMU */ 19.314 + 19.315 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); 19.316 + 19.317 +#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) 19.318 +#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) 19.319 + 19.320 +#include <linux/init_task.h> 19.321 + 19.322 +static struct mm_struct * mm_init(struct mm_struct * mm) 19.323 +{ 19.324 + atomic_set(&mm->mm_users, 1); 19.325 + atomic_set(&mm->mm_count, 1); 19.326 + init_rwsem(&mm->mmap_sem); 19.327 + INIT_LIST_HEAD(&mm->mmlist); 19.328 + mm->core_waiters = 0; 19.329 + mm->nr_ptes = 0; 19.330 + set_mm_counter(mm, file_rss, 0); 19.331 + set_mm_counter(mm, anon_rss, 0); 19.332 + spin_lock_init(&mm->page_table_lock); 19.333 + rwlock_init(&mm->ioctx_list_lock); 19.334 + mm->ioctx_list = NULL; 19.335 + mm->free_area_cache = TASK_UNMAPPED_BASE; 19.336 + mm->cached_hole_size = ~0UL; 19.337 + 19.338 + if (likely(!mm_alloc_pgd(mm))) { 19.339 + mm->def_flags = 0; 19.340 + return mm; 19.341 + } 19.342 + free_mm(mm); 19.343 + return NULL; 19.344 +} 19.345 + 19.346 +/* 19.347 + * Allocate and initialize an mm_struct. 19.348 + */ 19.349 +struct mm_struct * mm_alloc(void) 19.350 +{ 19.351 + struct mm_struct * mm; 19.352 + 19.353 + mm = allocate_mm(); 19.354 + if (mm) { 19.355 + memset(mm, 0, sizeof(*mm)); 19.356 + mm = mm_init(mm); 19.357 + } 19.358 + return mm; 19.359 +} 19.360 + 19.361 +/* 19.362 + * Called when the last reference to the mm 19.363 + * is dropped: either by a lazy thread or by 19.364 + * mmput. Free the page directory and the mm. 19.365 + */ 19.366 +void fastcall __mmdrop(struct mm_struct *mm) 19.367 +{ 19.368 + BUG_ON(mm == &init_mm); 19.369 + mm_free_pgd(mm); 19.370 + destroy_context(mm); 19.371 + free_mm(mm); 19.372 +} 19.373 + 19.374 +/* 19.375 + * Decrement the use count and release all resources for an mm. 19.376 + */ 19.377 +void mmput(struct mm_struct *mm) 19.378 +{ 19.379 + if (atomic_dec_and_test(&mm->mm_users)) { 19.380 + exit_aio(mm); 19.381 + exit_mmap(mm); 19.382 + if (!list_empty(&mm->mmlist)) { 19.383 + spin_lock(&mmlist_lock); 19.384 + list_del(&mm->mmlist); 19.385 + spin_unlock(&mmlist_lock); 19.386 + } 19.387 + put_swap_token(mm); 19.388 + mmdrop(mm); 19.389 + } 19.390 +} 19.391 +EXPORT_SYMBOL_GPL(mmput); 19.392 + 19.393 +/** 19.394 + * get_task_mm - acquire a reference to the task's mm 19.395 + * 19.396 + * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning 19.397 + * this kernel workthread has transiently adopted a user mm with use_mm, 19.398 + * to do its AIO) is not set and if so returns a reference to it, after 19.399 + * bumping up the use count. User must release the mm via mmput() 19.400 + * after use. Typically used by /proc and ptrace. 19.401 + */ 19.402 +struct mm_struct *get_task_mm(struct task_struct *task) 19.403 +{ 19.404 + struct mm_struct *mm; 19.405 + 19.406 + task_lock(task); 19.407 + mm = task->mm; 19.408 + if (mm) { 19.409 + if (task->flags & PF_BORROWED_MM) 19.410 + mm = NULL; 19.411 + else 19.412 + atomic_inc(&mm->mm_users); 19.413 + } 19.414 + task_unlock(task); 19.415 + return mm; 19.416 +} 19.417 +EXPORT_SYMBOL_GPL(get_task_mm); 19.418 + 19.419 +/* Please note the differences between mmput and mm_release. 19.420 + * mmput is called whenever we stop holding onto a mm_struct, 19.421 + * error success whatever. 19.422 + * 19.423 + * mm_release is called after a mm_struct has been removed 19.424 + * from the current process. 19.425 + * 19.426 + * This difference is important for error handling, when we 19.427 + * only half set up a mm_struct for a new process and need to restore 19.428 + * the old one. Because we mmput the new mm_struct before 19.429 + * restoring the old one. . . 19.430 + * Eric Biederman 10 January 1998 19.431 + */ 19.432 +void mm_release(struct task_struct *tsk, struct mm_struct *mm) 19.433 +{ 19.434 + struct completion *vfork_done = tsk->vfork_done; 19.435 + 19.436 + /* Get rid of any cached register state */ 19.437 + deactivate_mm(tsk, mm); 19.438 + 19.439 + /* notify parent sleeping on vfork() */ 19.440 + if (vfork_done) { 19.441 + tsk->vfork_done = NULL; 19.442 + complete(vfork_done); 19.443 + } 19.444 + if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) { 19.445 + u32 __user * tidptr = tsk->clear_child_tid; 19.446 + tsk->clear_child_tid = NULL; 19.447 + 19.448 + /* 19.449 + * We don't check the error code - if userspace has 19.450 + * not set up a proper pointer then tough luck. 19.451 + */ 19.452 + put_user(0, tidptr); 19.453 + sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0); 19.454 + } 19.455 +} 19.456 + 19.457 +/* 19.458 + * Allocate a new mm structure and copy contents from the 19.459 + * mm structure of the passed in task structure. 19.460 + */ 19.461 +static struct mm_struct *dup_mm(struct task_struct *tsk) 19.462 +{ 19.463 + struct mm_struct *mm, *oldmm = current->mm; 19.464 + int err; 19.465 + 19.466 + if (!oldmm) 19.467 + return NULL; 19.468 + 19.469 + mm = allocate_mm(); 19.470 + if (!mm) 19.471 + goto fail_nomem; 19.472 + 19.473 + memcpy(mm, oldmm, sizeof(*mm)); 19.474 + 19.475 + if (!mm_init(mm)) 19.476 + goto fail_nomem; 19.477 + 19.478 + if (init_new_context(tsk, mm)) 19.479 + goto fail_nocontext; 19.480 + 19.481 + err = dup_mmap(mm, oldmm); 19.482 + if (err) 19.483 + goto free_pt; 19.484 + 19.485 + mm->hiwater_rss = get_mm_rss(mm); 19.486 + mm->hiwater_vm = mm->total_vm; 19.487 + 19.488 + return mm; 19.489 + 19.490 +free_pt: 19.491 + mmput(mm); 19.492 + 19.493 +fail_nomem: 19.494 + return NULL; 19.495 + 19.496 +fail_nocontext: 19.497 + /* 19.498 + * If init_new_context() failed, we cannot use mmput() to free the mm 19.499 + * because it calls destroy_context() 19.500 + */ 19.501 + mm_free_pgd(mm); 19.502 + free_mm(mm); 19.503 + return NULL; 19.504 +} 19.505 + 19.506 +static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 19.507 +{ 19.508 + struct mm_struct * mm, *oldmm; 19.509 + int retval; 19.510 + 19.511 + tsk->min_flt = tsk->maj_flt = 0; 19.512 + tsk->nvcsw = tsk->nivcsw = 0; 19.513 + 19.514 + tsk->mm = NULL; 19.515 + tsk->active_mm = NULL; 19.516 + 19.517 + /* 19.518 + * Are we cloning a kernel thread? 19.519 + * 19.520 + * We need to steal a active VM for that.. 19.521 + */ 19.522 + oldmm = current->mm; 19.523 + if (!oldmm) 19.524 + return 0; 19.525 + 19.526 + if (clone_flags & CLONE_VM) { 19.527 + atomic_inc(&oldmm->mm_users); 19.528 + mm = oldmm; 19.529 + goto good_mm; 19.530 + } 19.531 + 19.532 + retval = -ENOMEM; 19.533 + mm = dup_mm(tsk); 19.534 + if (!mm) 19.535 + goto fail_nomem; 19.536 + 19.537 +good_mm: 19.538 + tsk->mm = mm; 19.539 + tsk->active_mm = mm; 19.540 + return 0; 19.541 + 19.542 +fail_nomem: 19.543 + return retval; 19.544 +} 19.545 + 19.546 +static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 19.547 +{ 19.548 + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); 19.549 + /* We don't need to lock fs - think why ;-) */ 19.550 + if (fs) { 19.551 + atomic_set(&fs->count, 1); 19.552 + rwlock_init(&fs->lock); 19.553 + fs->umask = old->umask; 19.554 + read_lock(&old->lock); 19.555 + fs->rootmnt = mntget(old->rootmnt); 19.556 + fs->root = dget(old->root); 19.557 + fs->pwdmnt = mntget(old->pwdmnt); 19.558 + fs->pwd = dget(old->pwd); 19.559 + if (old->altroot) { 19.560 + fs->altrootmnt = mntget(old->altrootmnt); 19.561 + fs->altroot = dget(old->altroot); 19.562 + } else { 19.563 + fs->altrootmnt = NULL; 19.564 + fs->altroot = NULL; 19.565 + } 19.566 + read_unlock(&old->lock); 19.567 + } 19.568 + return fs; 19.569 +} 19.570 + 19.571 +struct fs_struct *copy_fs_struct(struct fs_struct *old) 19.572 +{ 19.573 + return __copy_fs_struct(old); 19.574 +} 19.575 + 19.576 +EXPORT_SYMBOL_GPL(copy_fs_struct); 19.577 + 19.578 +static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) 19.579 +{ 19.580 + if (clone_flags & CLONE_FS) { 19.581 + atomic_inc(¤t->fs->count); 19.582 + return 0; 19.583 + } 19.584 + tsk->fs = __copy_fs_struct(current->fs); 19.585 + if (!tsk->fs) 19.586 + return -ENOMEM; 19.587 + return 0; 19.588 +} 19.589 + 19.590 +static int count_open_files(struct fdtable *fdt) 19.591 +{ 19.592 + int size = fdt->max_fdset; 19.593 + int i; 19.594 + 19.595 + /* Find the last open fd */ 19.596 + for (i = size/(8*sizeof(long)); i > 0; ) { 19.597 + if (fdt->open_fds->fds_bits[--i]) 19.598 + break; 19.599 + } 19.600 + i = (i+1) * 8 * sizeof(long); 19.601 + return i; 19.602 +} 19.603 + 19.604 +static struct files_struct *alloc_files(void) 19.605 +{ 19.606 + struct files_struct *newf; 19.607 + struct fdtable *fdt; 19.608 + 19.609 + newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); 19.610 + if (!newf) 19.611 + goto out; 19.612 + 19.613 + atomic_set(&newf->count, 1); 19.614 + 19.615 + spin_lock_init(&newf->file_lock); 19.616 + fdt = &newf->fdtab; 19.617 + fdt->next_fd = 0; 19.618 + fdt->max_fds = NR_OPEN_DEFAULT; 19.619 + fdt->max_fdset = __FD_SETSIZE; 19.620 + fdt->close_on_exec = &newf->close_on_exec_init; 19.621 + fdt->open_fds = &newf->open_fds_init; 19.622 + fdt->fd = &newf->fd_array[0]; 19.623 + INIT_RCU_HEAD(&fdt->rcu); 19.624 + fdt->free_files = NULL; 19.625 + fdt->next = NULL; 19.626 + rcu_assign_pointer(newf->fdt, fdt); 19.627 +out: 19.628 + return newf; 19.629 +} 19.630 + 19.631 +/* 19.632 + * Allocate a new files structure and copy contents from the 19.633 + * passed in files structure. 19.634 + */ 19.635 +static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) 19.636 +{ 19.637 + struct files_struct *newf; 19.638 + struct file **old_fds, **new_fds; 19.639 + int open_files, size, i, expand; 19.640 + struct fdtable *old_fdt, *new_fdt; 19.641 + 19.642 + newf = alloc_files(); 19.643 + if (!newf) 19.644 + goto out; 19.645 + 19.646 + spin_lock(&oldf->file_lock); 19.647 + old_fdt = files_fdtable(oldf); 19.648 + new_fdt = files_fdtable(newf); 19.649 + size = old_fdt->max_fdset; 19.650 + open_files = count_open_files(old_fdt); 19.651 + expand = 0; 19.652 + 19.653 + /* 19.654 + * Check whether we need to allocate a larger fd array or fd set. 19.655 + * Note: we're not a clone task, so the open count won't change. 19.656 + */ 19.657 + if (open_files > new_fdt->max_fdset) { 19.658 + new_fdt->max_fdset = 0; 19.659 + expand = 1; 19.660 + } 19.661 + if (open_files > new_fdt->max_fds) { 19.662 + new_fdt->max_fds = 0; 19.663 + expand = 1; 19.664 + } 19.665 + 19.666 + /* if the old fdset gets grown now, we'll only copy up to "size" fds */ 19.667 + if (expand) { 19.668 + spin_unlock(&oldf->file_lock); 19.669 + spin_lock(&newf->file_lock); 19.670 + *errorp = expand_files(newf, open_files-1); 19.671 + spin_unlock(&newf->file_lock); 19.672 + if (*errorp < 0) 19.673 + goto out_release; 19.674 + new_fdt = files_fdtable(newf); 19.675 + /* 19.676 + * Reacquire the oldf lock and a pointer to its fd table 19.677 + * who knows it may have a new bigger fd table. We need 19.678 + * the latest pointer. 19.679 + */ 19.680 + spin_lock(&oldf->file_lock); 19.681 + old_fdt = files_fdtable(oldf); 19.682 + } 19.683 + 19.684 + old_fds = old_fdt->fd; 19.685 + new_fds = new_fdt->fd; 19.686 + 19.687 + memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8); 19.688 + memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); 19.689 + 19.690 + for (i = open_files; i != 0; i--) { 19.691 + struct file *f = *old_fds++; 19.692 + if (f) { 19.693 + get_file(f); 19.694 + } else { 19.695 + /* 19.696 + * The fd may be claimed in the fd bitmap but not yet 19.697 + * instantiated in the files array if a sibling thread 19.698 + * is partway through open(). So make sure that this 19.699 + * fd is available to the new process. 19.700 + */ 19.701 + FD_CLR(open_files - i, new_fdt->open_fds); 19.702 + } 19.703 + rcu_assign_pointer(*new_fds++, f); 19.704 + } 19.705 + spin_unlock(&oldf->file_lock); 19.706 + 19.707 + /* compute the remainder to be cleared */ 19.708 + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); 19.709 + 19.710 + /* This is long word aligned thus could use a optimized version */ 19.711 + memset(new_fds, 0, size); 19.712 + 19.713 + if (new_fdt->max_fdset > open_files) { 19.714 + int left = (new_fdt->max_fdset-open_files)/8; 19.715 + int start = open_files / (8 * sizeof(unsigned long)); 19.716 + 19.717 + memset(&new_fdt->open_fds->fds_bits[start], 0, left); 19.718 + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 19.719 + } 19.720 + 19.721 +out: 19.722 + return newf; 19.723 + 19.724 +out_release: 19.725 + free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 19.726 + free_fdset (new_fdt->open_fds, new_fdt->max_fdset); 19.727 + free_fd_array(new_fdt->fd, new_fdt->max_fds); 19.728 + kmem_cache_free(files_cachep, newf); 19.729 + return NULL; 19.730 +} 19.731 + 19.732 +static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 19.733 +{ 19.734 + struct files_struct *oldf, *newf; 19.735 + int error = 0; 19.736 + 19.737 + /* 19.738 + * A background process may not have any files ... 19.739 + */ 19.740 + oldf = current->files; 19.741 + if (!oldf) 19.742 + goto out; 19.743 + 19.744 + if (clone_flags & CLONE_FILES) { 19.745 + atomic_inc(&oldf->count); 19.746 + goto out; 19.747 + } 19.748 + 19.749 + /* 19.750 + * Note: we may be using current for both targets (See exec.c) 19.751 + * This works because we cache current->files (old) as oldf. Don't 19.752 + * break this. 19.753 + */ 19.754 + tsk->files = NULL; 19.755 + error = -ENOMEM; 19.756 + newf = dup_fd(oldf, &error); 19.757 + if (!newf) 19.758 + goto out; 19.759 + 19.760 + tsk->files = newf; 19.761 + error = 0; 19.762 +out: 19.763 + return error; 19.764 +} 19.765 + 19.766 +/* 19.767 + * Helper to unshare the files of the current task. 19.768 + * We don't want to expose copy_files internals to 19.769 + * the exec layer of the kernel. 19.770 + */ 19.771 + 19.772 +int unshare_files(void) 19.773 +{ 19.774 + struct files_struct *files = current->files; 19.775 + int rc; 19.776 + 19.777 + if(!files) 19.778 + BUG(); 19.779 + 19.780 + /* This can race but the race causes us to copy when we don't 19.781 + need to and drop the copy */ 19.782 + if(atomic_read(&files->count) == 1) 19.783 + { 19.784 + atomic_inc(&files->count); 19.785 + return 0; 19.786 + } 19.787 + rc = copy_files(0, current); 19.788 + if(rc) 19.789 + current->files = files; 19.790 + return rc; 19.791 +} 19.792 + 19.793 +EXPORT_SYMBOL(unshare_files); 19.794 + 19.795 +void sighand_free_cb(struct rcu_head *rhp) 19.796 +{ 19.797 + struct sighand_struct *sp; 19.798 + 19.799 + sp = container_of(rhp, struct sighand_struct, rcu); 19.800 + kmem_cache_free(sighand_cachep, sp); 19.801 +} 19.802 + 19.803 +static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) 19.804 +{ 19.805 + struct sighand_struct *sig; 19.806 + 19.807 + if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { 19.808 + atomic_inc(¤t->sighand->count); 19.809 + return 0; 19.810 + } 19.811 + sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 19.812 + rcu_assign_pointer(tsk->sighand, sig); 19.813 + if (!sig) 19.814 + return -ENOMEM; 19.815 + spin_lock_init(&sig->siglock); 19.816 + atomic_set(&sig->count, 1); 19.817 + memcpy(sig->action, current->sighand->action, sizeof(sig->action)); 19.818 + return 0; 19.819 +} 19.820 + 19.821 +static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) 19.822 +{ 19.823 + struct signal_struct *sig; 19.824 + int ret; 19.825 + 19.826 + if (clone_flags & CLONE_THREAD) { 19.827 + atomic_inc(¤t->signal->count); 19.828 + atomic_inc(¤t->signal->live); 19.829 + return 0; 19.830 + } 19.831 + sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 19.832 + tsk->signal = sig; 19.833 + if (!sig) 19.834 + return -ENOMEM; 19.835 + 19.836 + ret = copy_thread_group_keys(tsk); 19.837 + if (ret < 0) { 19.838 + kmem_cache_free(signal_cachep, sig); 19.839 + return ret; 19.840 + } 19.841 + 19.842 + atomic_set(&sig->count, 1); 19.843 + atomic_set(&sig->live, 1); 19.844 + init_waitqueue_head(&sig->wait_chldexit); 19.845 + sig->flags = 0; 19.846 + sig->group_exit_code = 0; 19.847 + sig->group_exit_task = NULL; 19.848 + sig->group_stop_count = 0; 19.849 + sig->curr_target = NULL; 19.850 + init_sigpending(&sig->shared_pending); 19.851 + INIT_LIST_HEAD(&sig->posix_timers); 19.852 + 19.853 + hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); 19.854 + sig->it_real_incr.tv64 = 0; 19.855 + sig->real_timer.function = it_real_fn; 19.856 + sig->real_timer.data = tsk; 19.857 + 19.858 + sig->it_virt_expires = cputime_zero; 19.859 + sig->it_virt_incr = cputime_zero; 19.860 + sig->it_prof_expires = cputime_zero; 19.861 + sig->it_prof_incr = cputime_zero; 19.862 + 19.863 + sig->leader = 0; /* session leadership doesn't inherit */ 19.864 + sig->tty_old_pgrp = 0; 19.865 + 19.866 + sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; 19.867 + sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; 19.868 + sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; 19.869 + sig->sched_time = 0; 19.870 + INIT_LIST_HEAD(&sig->cpu_timers[0]); 19.871 + INIT_LIST_HEAD(&sig->cpu_timers[1]); 19.872 + INIT_LIST_HEAD(&sig->cpu_timers[2]); 19.873 + 19.874 + task_lock(current->group_leader); 19.875 + memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 19.876 + task_unlock(current->group_leader); 19.877 + 19.878 + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { 19.879 + /* 19.880 + * New sole thread in the process gets an expiry time 19.881 + * of the whole CPU time limit. 19.882 + */ 19.883 + tsk->it_prof_expires = 19.884 + secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); 19.885 + } 19.886 + 19.887 + return 0; 19.888 +} 19.889 + 19.890 +static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) 19.891 +{ 19.892 + unsigned long new_flags = p->flags; 19.893 + 19.894 + new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); 19.895 + new_flags |= PF_FORKNOEXEC; 19.896 + if (!(clone_flags & CLONE_PTRACE)) 19.897 + p->ptrace = 0; 19.898 + p->flags = new_flags; 19.899 +} 19.900 + 19.901 +asmlinkage long sys_set_tid_address(int __user *tidptr) 19.902 +{ 19.903 + current->clear_child_tid = tidptr; 19.904 + 19.905 + return current->pid; 19.906 +} 19.907 + 19.908 +/* 19.909 + * This creates a new process as a copy of the old one, 19.910 + * but does not actually start it yet. 19.911 + * 19.912 + * It copies the registers, and all the appropriate 19.913 + * parts of the process environment (as per the clone 19.914 + * flags). The actual kick-off is left to the caller. 19.915 + */ 19.916 +static task_t *copy_process(unsigned long clone_flags, 19.917 + unsigned long stack_start, 19.918 + struct pt_regs *regs, 19.919 + unsigned long stack_size, 19.920 + int __user *parent_tidptr, 19.921 + int __user *child_tidptr, 19.922 + int pid) 19.923 +{ 19.924 + int retval; 19.925 + struct task_struct *p = NULL; 19.926 + 19.927 + if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 19.928 + return ERR_PTR(-EINVAL); 19.929 + 19.930 + /* 19.931 + * Thread groups must share signals as well, and detached threads 19.932 + * can only be started up within the thread group. 19.933 + */ 19.934 + if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 19.935 + return ERR_PTR(-EINVAL); 19.936 + 19.937 + /* 19.938 + * Shared signal handlers imply shared VM. By way of the above, 19.939 + * thread groups also imply shared VM. Blocking this case allows 19.940 + * for various simplifications in other code. 19.941 + */ 19.942 + if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 19.943 + return ERR_PTR(-EINVAL); 19.944 + 19.945 + retval = security_task_create(clone_flags); 19.946 + if (retval) 19.947 + goto fork_out; 19.948 + 19.949 + retval = -ENOMEM; 19.950 + p = dup_task_struct(current); 19.951 + if (!p) 19.952 + goto fork_out; 19.953 + 19.954 + retval = -EAGAIN; 19.955 + if (atomic_read(&p->user->processes) >= 19.956 + p->signal->rlim[RLIMIT_NPROC].rlim_cur) { 19.957 + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 19.958 + p->user != &root_user) 19.959 + goto bad_fork_free; 19.960 + } 19.961 + 19.962 + atomic_inc(&p->user->__count); 19.963 + atomic_inc(&p->user->processes); 19.964 + get_group_info(p->group_info); 19.965 + 19.966 + /* 19.967 + * If multiple threads are within copy_process(), then this check 19.968 + * triggers too late. This doesn't hurt, the check is only there 19.969 + * to stop root fork bombs. 19.970 + */ 19.971 + if (nr_threads >= max_threads) 19.972 + goto bad_fork_cleanup_count; 19.973 + 19.974 + if (!try_module_get(task_thread_info(p)->exec_domain->module)) 19.975 + goto bad_fork_cleanup_count; 19.976 + 19.977 + if (p->binfmt && !try_module_get(p->binfmt->module)) 19.978 + goto bad_fork_cleanup_put_domain; 19.979 + 19.980 + p->did_exec = 0; 19.981 + copy_flags(clone_flags, p); 19.982 + p->pid = pid; 19.983 + retval = -EFAULT; 19.984 + if (clone_flags & CLONE_PARENT_SETTID) 19.985 + if (put_user(p->pid, parent_tidptr)) 19.986 + goto bad_fork_cleanup; 19.987 + 19.988 + p->proc_dentry = NULL; 19.989 + 19.990 + INIT_LIST_HEAD(&p->children); 19.991 + INIT_LIST_HEAD(&p->sibling); 19.992 + p->vfork_done = NULL; 19.993 + spin_lock_init(&p->alloc_lock); 19.994 + spin_lock_init(&p->proc_lock); 19.995 + 19.996 + clear_tsk_thread_flag(p, TIF_SIGPENDING); 19.997 + init_sigpending(&p->pending); 19.998 + 19.999 + p->utime = cputime_zero; 19.1000 + p->stime = cputime_zero; 19.1001 + p->sched_time = 0; 19.1002 + p->rchar = 0; /* I/O counter: bytes read */ 19.1003 + p->wchar = 0; /* I/O counter: bytes written */ 19.1004 + p->syscr = 0; /* I/O counter: read syscalls */ 19.1005 + p->syscw = 0; /* I/O counter: write syscalls */ 19.1006 + acct_clear_integrals(p); 19.1007 + 19.1008 + p->it_virt_expires = cputime_zero; 19.1009 + p->it_prof_expires = cputime_zero; 19.1010 + p->it_sched_expires = 0; 19.1011 + INIT_LIST_HEAD(&p->cpu_timers[0]); 19.1012 + INIT_LIST_HEAD(&p->cpu_timers[1]); 19.1013 + INIT_LIST_HEAD(&p->cpu_timers[2]); 19.1014 + 19.1015 + p->lock_depth = -1; /* -1 = no lock */ 19.1016 + do_posix_clock_monotonic_gettime(&p->start_time); 19.1017 + p->security = NULL; 19.1018 + p->io_context = NULL; 19.1019 + p->io_wait = NULL; 19.1020 + p->audit_context = NULL; 19.1021 + cpuset_fork(p); 19.1022 +#ifdef CONFIG_NUMA 19.1023 + p->mempolicy = mpol_copy(p->mempolicy); 19.1024 + if (IS_ERR(p->mempolicy)) { 19.1025 + retval = PTR_ERR(p->mempolicy); 19.1026 + p->mempolicy = NULL; 19.1027 + goto bad_fork_cleanup_cpuset; 19.1028 + } 19.1029 +#endif 19.1030 + 19.1031 +#ifdef CONFIG_DEBUG_MUTEXES 19.1032 + p->blocked_on = NULL; /* not blocked yet */ 19.1033 +#endif 19.1034 + 19.1035 + p->tgid = p->pid; 19.1036 + if (clone_flags & CLONE_THREAD) 19.1037 + p->tgid = current->tgid; 19.1038 + 19.1039 + if ((retval = security_task_alloc(p))) 19.1040 + goto bad_fork_cleanup_policy; 19.1041 + if ((retval = audit_alloc(p))) 19.1042 + goto bad_fork_cleanup_security; 19.1043 + /* copy all the process information */ 19.1044 + if ((retval = copy_semundo(clone_flags, p))) 19.1045 + goto bad_fork_cleanup_audit; 19.1046 + if ((retval = copy_files(clone_flags, p))) 19.1047 + goto bad_fork_cleanup_semundo; 19.1048 + if ((retval = copy_fs(clone_flags, p))) 19.1049 + goto bad_fork_cleanup_files; 19.1050 + if ((retval = copy_sighand(clone_flags, p))) 19.1051 + goto bad_fork_cleanup_fs; 19.1052 + if ((retval = copy_signal(clone_flags, p))) 19.1053 + goto bad_fork_cleanup_sighand; 19.1054 + if ((retval = copy_mm(clone_flags, p))) 19.1055 + goto bad_fork_cleanup_signal; 19.1056 + if ((retval = copy_keys(clone_flags, p))) 19.1057 + goto bad_fork_cleanup_mm; 19.1058 + if ((retval = copy_namespace(clone_flags, p))) 19.1059 + goto bad_fork_cleanup_keys; 19.1060 + retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); 19.1061 + if (retval) 19.1062 + goto bad_fork_cleanup_namespace; 19.1063 + 19.1064 + p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 19.1065 + /* 19.1066 + * Clear TID on mm_release()? 19.1067 + */ 19.1068 + p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 19.1069 + 19.1070 + /* 19.1071 + * sigaltstack should be cleared when sharing the same VM 19.1072 + */ 19.1073 + if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) 19.1074 + p->sas_ss_sp = p->sas_ss_size = 0; 19.1075 + 19.1076 + /* 19.1077 + * Syscall tracing should be turned off in the child regardless 19.1078 + * of CLONE_PTRACE. 19.1079 + */ 19.1080 + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 19.1081 +#ifdef TIF_SYSCALL_EMU 19.1082 + clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 19.1083 +#endif 19.1084 + 19.1085 + /* Our parent execution domain becomes current domain 19.1086 + These must match for thread signalling to apply */ 19.1087 + 19.1088 + p->parent_exec_id = p->self_exec_id; 19.1089 + 19.1090 + /* ok, now we should be set up.. */ 19.1091 + p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 19.1092 + p->pdeath_signal = 0; 19.1093 + p->exit_state = 0; 19.1094 + 19.1095 + /* 19.1096 + * Ok, make it visible to the rest of the system. 19.1097 + * We dont wake it up yet. 19.1098 + */ 19.1099 + p->group_leader = p; 19.1100 + INIT_LIST_HEAD(&p->ptrace_children); 19.1101 + INIT_LIST_HEAD(&p->ptrace_list); 19.1102 + 19.1103 + /* Perform scheduler related setup. Assign this task to a CPU. */ 19.1104 + sched_fork(p, clone_flags); 19.1105 + 19.1106 + /* Need tasklist lock for parent etc handling! */ 19.1107 + write_lock_irq(&tasklist_lock); 19.1108 + 19.1109 + /* 19.1110 + * The task hasn't been attached yet, so its cpus_allowed mask will 19.1111 + * not be changed, nor will its assigned CPU. 19.1112 + * 19.1113 + * The cpus_allowed mask of the parent may have changed after it was 19.1114 + * copied first time - so re-copy it here, then check the child's CPU 19.1115 + * to ensure it is on a valid CPU (and if not, just force it back to 19.1116 + * parent's CPU). This avoids alot of nasty races. 19.1117 + */ 19.1118 + p->cpus_allowed = current->cpus_allowed; 19.1119 + if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || 19.1120 + !cpu_online(task_cpu(p)))) 19.1121 + set_task_cpu(p, smp_processor_id()); 19.1122 + 19.1123 + /* 19.1124 + * Check for pending SIGKILL! The new thread should not be allowed 19.1125 + * to slip out of an OOM kill. (or normal SIGKILL.) 19.1126 + */ 19.1127 + if (sigismember(¤t->pending.signal, SIGKILL)) { 19.1128 + write_unlock_irq(&tasklist_lock); 19.1129 + retval = -EINTR; 19.1130 + goto bad_fork_cleanup_namespace; 19.1131 + } 19.1132 + 19.1133 + /* CLONE_PARENT re-uses the old parent */ 19.1134 + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) 19.1135 + p->real_parent = current->real_parent; 19.1136 + else 19.1137 + p->real_parent = current; 19.1138 + p->parent = p->real_parent; 19.1139 + 19.1140 + spin_lock(¤t->sighand->siglock); 19.1141 + if (clone_flags & CLONE_THREAD) { 19.1142 + /* 19.1143 + * Important: if an exit-all has been started then 19.1144 + * do not create this new thread - the whole thread 19.1145 + * group is supposed to exit anyway. 19.1146 + */ 19.1147 + if (current->signal->flags & SIGNAL_GROUP_EXIT) { 19.1148 + spin_unlock(¤t->sighand->siglock); 19.1149 + write_unlock_irq(&tasklist_lock); 19.1150 + retval = -EAGAIN; 19.1151 + goto bad_fork_cleanup_namespace; 19.1152 + } 19.1153 + p->group_leader = current->group_leader; 19.1154 + 19.1155 + if (current->signal->group_stop_count > 0) { 19.1156 + /* 19.1157 + * There is an all-stop in progress for the group. 19.1158 + * We ourselves will stop as soon as we check signals. 19.1159 + * Make the new thread part of that group stop too. 19.1160 + */ 19.1161 + current->signal->group_stop_count++; 19.1162 + set_tsk_thread_flag(p, TIF_SIGPENDING); 19.1163 + } 19.1164 + 19.1165 + if (!cputime_eq(current->signal->it_virt_expires, 19.1166 + cputime_zero) || 19.1167 + !cputime_eq(current->signal->it_prof_expires, 19.1168 + cputime_zero) || 19.1169 + current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || 19.1170 + !list_empty(¤t->signal->cpu_timers[0]) || 19.1171 + !list_empty(¤t->signal->cpu_timers[1]) || 19.1172 + !list_empty(¤t->signal->cpu_timers[2])) { 19.1173 + /* 19.1174 + * Have child wake up on its first tick to check 19.1175 + * for process CPU timers. 19.1176 + */ 19.1177 + p->it_prof_expires = jiffies_to_cputime(1); 19.1178 + } 19.1179 + } 19.1180 + 19.1181 + /* 19.1182 + * inherit ioprio 19.1183 + */ 19.1184 + p->ioprio = current->ioprio; 19.1185 + 19.1186 + SET_LINKS(p); 19.1187 + if (unlikely(p->ptrace & PT_PTRACED)) 19.1188 + __ptrace_link(p, current->parent); 19.1189 + 19.1190 + if (thread_group_leader(p)) { 19.1191 + p->signal->tty = current->signal->tty; 19.1192 + p->signal->pgrp = process_group(current); 19.1193 + p->signal->session = current->signal->session; 19.1194 + attach_pid(p, PIDTYPE_PGID, process_group(p)); 19.1195 + attach_pid(p, PIDTYPE_SID, p->signal->session); 19.1196 + if (p->pid) 19.1197 + __get_cpu_var(process_counts)++; 19.1198 + } 19.1199 + attach_pid(p, PIDTYPE_TGID, p->tgid); 19.1200 + attach_pid(p, PIDTYPE_PID, p->pid); 19.1201 + 19.1202 + nr_threads++; 19.1203 + total_forks++; 19.1204 + spin_unlock(¤t->sighand->siglock); 19.1205 + write_unlock_irq(&tasklist_lock); 19.1206 + proc_fork_connector(p); 19.1207 + return p; 19.1208 + 19.1209 +bad_fork_cleanup_namespace: 19.1210 + exit_namespace(p); 19.1211 +bad_fork_cleanup_keys: 19.1212 + exit_keys(p); 19.1213 +bad_fork_cleanup_mm: 19.1214 + if (p->mm) 19.1215 + mmput(p->mm); 19.1216 +bad_fork_cleanup_signal: 19.1217 + exit_signal(p); 19.1218 +bad_fork_cleanup_sighand: 19.1219 + exit_sighand(p); 19.1220 +bad_fork_cleanup_fs: 19.1221 + exit_fs(p); /* blocking */ 19.1222 +bad_fork_cleanup_files: 19.1223 + exit_files(p); /* blocking */ 19.1224 +bad_fork_cleanup_semundo: 19.1225 + exit_sem(p); 19.1226 +bad_fork_cleanup_audit: 19.1227 + audit_free(p); 19.1228 +bad_fork_cleanup_security: 19.1229 + security_task_free(p); 19.1230 +bad_fork_cleanup_policy: 19.1231 +#ifdef CONFIG_NUMA 19.1232 + mpol_free(p->mempolicy); 19.1233 +bad_fork_cleanup_cpuset: 19.1234 +#endif 19.1235 + cpuset_exit(p); 19.1236 +bad_fork_cleanup: 19.1237 + if (p->binfmt) 19.1238 + module_put(p->binfmt->module); 19.1239 +bad_fork_cleanup_put_domain: 19.1240 + module_put(task_thread_info(p)->exec_domain->module); 19.1241 +bad_fork_cleanup_count: 19.1242 + put_group_info(p->group_info); 19.1243 + atomic_dec(&p->user->processes); 19.1244 + free_uid(p->user); 19.1245 +bad_fork_free: 19.1246 + free_task(p); 19.1247 +fork_out: 19.1248 + return ERR_PTR(retval); 19.1249 +} 19.1250 + 19.1251 +struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) 19.1252 +{ 19.1253 + memset(regs, 0, sizeof(struct pt_regs)); 19.1254 + return regs; 19.1255 +} 19.1256 + 19.1257 +task_t * __devinit fork_idle(int cpu) 19.1258 +{ 19.1259 + task_t *task; 19.1260 + struct pt_regs regs; 19.1261 + 19.1262 + task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); 19.1263 + if (!task) 19.1264 + return ERR_PTR(-ENOMEM); 19.1265 + init_idle(task, cpu); 19.1266 + unhash_process(task); 19.1267 + return task; 19.1268 +} 19.1269 + 19.1270 +static inline int fork_traceflag (unsigned clone_flags) 19.1271 +{ 19.1272 + if (clone_flags & CLONE_UNTRACED) 19.1273 + return 0; 19.1274 + else if (clone_flags & CLONE_VFORK) { 19.1275 + if (current->ptrace & PT_TRACE_VFORK) 19.1276 + return PTRACE_EVENT_VFORK; 19.1277 + } else if ((clone_flags & CSIGNAL) != SIGCHLD) { 19.1278 + if (current->ptrace & PT_TRACE_CLONE) 19.1279 + return PTRACE_EVENT_CLONE; 19.1280 + } else if (current->ptrace & PT_TRACE_FORK) 19.1281 + return PTRACE_EVENT_FORK; 19.1282 + 19.1283 + return 0; 19.1284 +} 19.1285 + 19.1286 +/* 19.1287 + * Ok, this is the main fork-routine. 19.1288 + * 19.1289 + * It copies the process, and if successful kick-starts 19.1290 + * it and waits for it to finish using the VM if required. 19.1291 + */ 19.1292 +long do_fork(unsigned long clone_flags, 19.1293 + unsigned long stack_start, 19.1294 + struct pt_regs *regs, 19.1295 + unsigned long stack_size, 19.1296 + int __user *parent_tidptr, 19.1297 + int __user *child_tidptr) 19.1298 +{ 19.1299 + struct task_struct *p; 19.1300 + int trace = 0; 19.1301 + long pid = alloc_pidmap(); 19.1302 + 19.1303 + if (pid < 0) 19.1304 + return -EAGAIN; 19.1305 + if (unlikely(current->ptrace)) { 19.1306 + trace = fork_traceflag (clone_flags); 19.1307 + if (trace) 19.1308 + clone_flags |= CLONE_PTRACE; 19.1309 + } 19.1310 + 19.1311 + p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); 19.1312 + /* 19.1313 + * Do this prior waking up the new thread - the thread pointer 19.1314 + * might get invalid after that point, if the thread exits quickly. 19.1315 + */ 19.1316 + if (!IS_ERR(p)) { 19.1317 + struct completion vfork; 19.1318 + 19.1319 + if (clone_flags & CLONE_VFORK) { 19.1320 + p->vfork_done = &vfork; 19.1321 + init_completion(&vfork); 19.1322 + } 19.1323 + 19.1324 + if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { 19.1325 + /* 19.1326 + * We'll start up with an immediate SIGSTOP. 19.1327 + */ 19.1328 + sigaddset(&p->pending.signal, SIGSTOP); 19.1329 + set_tsk_thread_flag(p, TIF_SIGPENDING); 19.1330 + } 19.1331 + 19.1332 + if (!(clone_flags & CLONE_STOPPED)) 19.1333 + wake_up_new_task(p, clone_flags); 19.1334 + else 19.1335 + p->state = TASK_STOPPED; 19.1336 + 19.1337 + if (unlikely (trace)) { 19.1338 + current->ptrace_message = pid; 19.1339 + ptrace_notify ((trace << 8) | SIGTRAP); 19.1340 + } 19.1341 + 19.1342 + if (clone_flags & CLONE_VFORK) { 19.1343 + wait_for_completion(&vfork); 19.1344 + if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 19.1345 + ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 19.1346 + } 19.1347 + } else { 19.1348 + free_pidmap(pid); 19.1349 + pid = PTR_ERR(p); 19.1350 + } 19.1351 + return pid; 19.1352 +} 19.1353 + 19.1354 +#ifndef ARCH_MIN_MMSTRUCT_ALIGN 19.1355 +#define ARCH_MIN_MMSTRUCT_ALIGN 0 19.1356 +#endif 19.1357 + 19.1358 +void __init proc_caches_init(void) 19.1359 +{ 19.1360 + sighand_cachep = kmem_cache_create("sighand_cache", 19.1361 + sizeof(struct sighand_struct), 0, 19.1362 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 19.1363 + signal_cachep = kmem_cache_create("signal_cache", 19.1364 + sizeof(struct signal_struct), 0, 19.1365 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 19.1366 + files_cachep = kmem_cache_create("files_cache", 19.1367 + sizeof(struct files_struct), 0, 19.1368 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 19.1369 + fs_cachep = kmem_cache_create("fs_cache", 19.1370 + sizeof(struct fs_struct), 0, 19.1371 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 19.1372 + vm_area_cachep = kmem_cache_create("vm_area_struct", 19.1373 + sizeof(struct vm_area_struct), 0, 19.1374 + SLAB_PANIC, NULL, NULL); 19.1375 + mm_cachep = kmem_cache_create("mm_struct", 19.1376 + sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 19.1377 + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 19.1378 +} 19.1379 + 19.1380 + 19.1381 +/* 19.1382 + * Check constraints on flags passed to the unshare system call and 19.1383 + * force unsharing of additional process context as appropriate. 19.1384 + */ 19.1385 +static inline void check_unshare_flags(unsigned long *flags_ptr) 19.1386 +{ 19.1387 + /* 19.1388 + * If unsharing a thread from a thread group, must also 19.1389 + * unshare vm. 19.1390 + */ 19.1391 + if (*flags_ptr & CLONE_THREAD) 19.1392 + *flags_ptr |= CLONE_VM; 19.1393 + 19.1394 + /* 19.1395 + * If unsharing vm, must also unshare signal handlers. 19.1396 + */ 19.1397 + if (*flags_ptr & CLONE_VM) 19.1398 + *flags_ptr |= CLONE_SIGHAND; 19.1399 + 19.1400 + /* 19.1401 + * If unsharing signal handlers and the task was created 19.1402 + * using CLONE_THREAD, then must unshare the thread 19.1403 + */ 19.1404 + if ((*flags_ptr & CLONE_SIGHAND) && 19.1405 + (atomic_read(¤t->signal->count) > 1)) 19.1406 + *flags_ptr |= CLONE_THREAD; 19.1407 + 19.1408 + /* 19.1409 + * If unsharing namespace, must also unshare filesystem information. 19.1410 + */ 19.1411 + if (*flags_ptr & CLONE_NEWNS) 19.1412 + *flags_ptr |= CLONE_FS; 19.1413 +} 19.1414 + 19.1415 +/* 19.1416 + * Unsharing of tasks created with CLONE_THREAD is not supported yet 19.1417 + */ 19.1418 +static int unshare_thread(unsigned long unshare_flags) 19.1419 +{ 19.1420 + if (unshare_flags & CLONE_THREAD) 19.1421 + return -EINVAL; 19.1422 + 19.1423 + return 0; 19.1424 +} 19.1425 + 19.1426 +/* 19.1427 + * Unshare the filesystem structure if it is being shared 19.1428 + */ 19.1429 +static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) 19.1430 +{ 19.1431 + struct fs_struct *fs = current->fs; 19.1432 + 19.1433 + if ((unshare_flags & CLONE_FS) && 19.1434 + (fs && atomic_read(&fs->count) > 1)) { 19.1435 + *new_fsp = __copy_fs_struct(current->fs); 19.1436 + if (!*new_fsp) 19.1437 + return -ENOMEM; 19.1438 + } 19.1439 + 19.1440 + return 0; 19.1441 +} 19.1442 + 19.1443 +/* 19.1444 + * Unshare the namespace structure if it is being shared 19.1445 + */ 19.1446 +static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) 19.1447 +{ 19.1448 + struct namespace *ns = current->namespace; 19.1449 + 19.1450 + if ((unshare_flags & CLONE_NEWNS) && 19.1451 + (ns && atomic_read(&ns->count) > 1)) { 19.1452 + if (!capable(CAP_SYS_ADMIN)) 19.1453 + return -EPERM; 19.1454 + 19.1455 + *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); 19.1456 + if (!*new_nsp) 19.1457 + return -ENOMEM; 19.1458 + } 19.1459 + 19.1460 + return 0; 19.1461 +} 19.1462 + 19.1463 +/* 19.1464 + * Unsharing of sighand for tasks created with CLONE_SIGHAND is not 19.1465 + * supported yet 19.1466 + */ 19.1467 +static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) 19.1468 +{ 19.1469 + struct sighand_struct *sigh = current->sighand; 19.1470 + 19.1471 + if ((unshare_flags & CLONE_SIGHAND) && 19.1472 + (sigh && atomic_read(&sigh->count) > 1)) 19.1473 + return -EINVAL; 19.1474 + else 19.1475 + return 0; 19.1476 +} 19.1477 + 19.1478 +/* 19.1479 + * Unshare vm if it is being shared 19.1480 + */ 19.1481 +static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) 19.1482 +{ 19.1483 + struct mm_struct *mm = current->mm; 19.1484 + 19.1485 + if ((unshare_flags & CLONE_VM) && 19.1486 + (mm && atomic_read(&mm->mm_users) > 1)) { 19.1487 + return -EINVAL; 19.1488 + } 19.1489 + 19.1490 + return 0; 19.1491 +} 19.1492 + 19.1493 +/* 19.1494 + * Unshare file descriptor table if it is being shared 19.1495 + */ 19.1496 +static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) 19.1497 +{ 19.1498 + struct files_struct *fd = current->files; 19.1499 + int error = 0; 19.1500 + 19.1501 + if ((unshare_flags & CLONE_FILES) && 19.1502 + (fd && atomic_read(&fd->count) > 1)) { 19.1503 + *new_fdp = dup_fd(fd, &error); 19.1504 + if (!*new_fdp) 19.1505 + return error; 19.1506 + } 19.1507 + 19.1508 + return 0; 19.1509 +} 19.1510 + 19.1511 +/* 19.1512 + * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not 19.1513 + * supported yet 19.1514 + */ 19.1515 +static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) 19.1516 +{ 19.1517 + if (unshare_flags & CLONE_SYSVSEM) 19.1518 + return -EINVAL; 19.1519 + 19.1520 + return 0; 19.1521 +} 19.1522 + 19.1523 +/* 19.1524 + * unshare allows a process to 'unshare' part of the process 19.1525 + * context which was originally shared using clone. copy_* 19.1526 + * functions used by do_fork() cannot be used here directly 19.1527 + * because they modify an inactive task_struct that is being 19.1528 + * constructed. Here we are modifying the current, active, 19.1529 + * task_struct. 19.1530 + */ 19.1531 +asmlinkage long sys_unshare(unsigned long unshare_flags) 19.1532 +{ 19.1533 + int err = 0; 19.1534 + struct fs_struct *fs, *new_fs = NULL; 19.1535 + struct namespace *ns, *new_ns = NULL; 19.1536 + struct sighand_struct *sigh, *new_sigh = NULL; 19.1537 + struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; 19.1538 + struct files_struct *fd, *new_fd = NULL; 19.1539 + struct sem_undo_list *new_ulist = NULL; 19.1540 + 19.1541 + check_unshare_flags(&unshare_flags); 19.1542 + 19.1543 + if ((err = unshare_thread(unshare_flags))) 19.1544 + goto bad_unshare_out; 19.1545 + if ((err = unshare_fs(unshare_flags, &new_fs))) 19.1546 + goto bad_unshare_cleanup_thread; 19.1547 + if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) 19.1548 + goto bad_unshare_cleanup_fs; 19.1549 + if ((err = unshare_sighand(unshare_flags, &new_sigh))) 19.1550 + goto bad_unshare_cleanup_ns; 19.1551 + if ((err = unshare_vm(unshare_flags, &new_mm))) 19.1552 + goto bad_unshare_cleanup_sigh; 19.1553 + if ((err = unshare_fd(unshare_flags, &new_fd))) 19.1554 + goto bad_unshare_cleanup_vm; 19.1555 + if ((err = unshare_semundo(unshare_flags, &new_ulist))) 19.1556 + goto bad_unshare_cleanup_fd; 19.1557 + 19.1558 + if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { 19.1559 + 19.1560 + task_lock(current); 19.1561 + 19.1562 + if (new_fs) { 19.1563 + fs = current->fs; 19.1564 + current->fs = new_fs; 19.1565 + new_fs = fs; 19.1566 + } 19.1567 + 19.1568 + if (new_ns) { 19.1569 + ns = current->namespace; 19.1570 + current->namespace = new_ns; 19.1571 + new_ns = ns; 19.1572 + } 19.1573 + 19.1574 + if (new_sigh) { 19.1575 + sigh = current->sighand; 19.1576 + rcu_assign_pointer(current->sighand, new_sigh); 19.1577 + new_sigh = sigh; 19.1578 + } 19.1579 + 19.1580 + if (new_mm) { 19.1581 + mm = current->mm; 19.1582 + active_mm = current->active_mm; 19.1583 + current->mm = new_mm; 19.1584 + current->active_mm = new_mm; 19.1585 + activate_mm(active_mm, new_mm); 19.1586 + new_mm = mm; 19.1587 + } 19.1588 + 19.1589 + if (new_fd) { 19.1590 + fd = current->files; 19.1591 + current->files = new_fd; 19.1592 + new_fd = fd; 19.1593 + } 19.1594 + 19.1595 + task_unlock(current); 19.1596 + } 19.1597 + 19.1598 +bad_unshare_cleanup_fd: 19.1599 + if (new_fd) 19.1600 + put_files_struct(new_fd); 19.1601 + 19.1602 +bad_unshare_cleanup_vm: 19.1603 + if (new_mm) 19.1604 + mmput(new_mm); 19.1605 + 19.1606 +bad_unshare_cleanup_sigh: 19.1607 + if (new_sigh) 19.1608 + if (atomic_dec_and_test(&new_sigh->count)) 19.1609 + kmem_cache_free(sighand_cachep, new_sigh); 19.1610 + 19.1611 +bad_unshare_cleanup_ns: 19.1612 + if (new_ns) 19.1613 + put_namespace(new_ns); 19.1614 + 19.1615 +bad_unshare_cleanup_fs: 19.1616 + if (new_fs) 19.1617 + put_fs_struct(new_fs); 19.1618 + 19.1619 +bad_unshare_cleanup_thread: 19.1620 +bad_unshare_out: 19.1621 + return err; 19.1622 +}
20.1 --- a/tools/console/daemon/io.c Thu Jun 15 10:02:53 2006 -0600 20.2 +++ b/tools/console/daemon/io.c Thu Jun 15 10:23:57 2006 -0600 20.3 @@ -24,8 +24,8 @@ 20.4 #include "io.h" 20.5 #include <xenctrl.h> 20.6 #include <xs.h> 20.7 -#include <xen/linux/evtchn.h> 20.8 #include <xen/io/console.h> 20.9 +#include <xenctrl.h> 20.10 20.11 #include <malloc.h> 20.12 #include <stdlib.h> 20.13 @@ -36,7 +36,6 @@ 20.14 #include <unistd.h> 20.15 #include <termios.h> 20.16 #include <stdarg.h> 20.17 -#include <sys/ioctl.h> 20.18 #include <sys/mman.h> 20.19 20.20 #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 20.21 @@ -64,19 +63,12 @@ struct domain 20.22 char *conspath; 20.23 int ring_ref; 20.24 evtchn_port_t local_port; 20.25 - int evtchn_fd; 20.26 + int xce_handle; 20.27 struct xencons_interface *interface; 20.28 }; 20.29 20.30 static struct domain *dom_head; 20.31 20.32 -static void evtchn_notify(struct domain *dom) 20.33 -{ 20.34 - struct ioctl_evtchn_notify notify; 20.35 - notify.port = dom->local_port; 20.36 - (void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify); 20.37 -} 20.38 - 20.39 static void buffer_append(struct domain *dom) 20.40 { 20.41 struct buffer *buffer = &dom->buffer; 20.42 @@ -106,7 +98,7 @@ static void buffer_append(struct domain 20.43 20.44 mb(); 20.45 intf->out_cons = cons; 20.46 - evtchn_notify(dom); 20.47 + xc_evtchn_notify(dom->xce_handle, dom->local_port); 20.48 20.49 if (buffer->max_capacity && 20.50 buffer->size > buffer->max_capacity) { 20.51 @@ -234,7 +226,6 @@ int xs_gather(struct xs_handle *xs, cons 20.52 static int domain_create_ring(struct domain *dom) 20.53 { 20.54 int err, remote_port, ring_ref, rc; 20.55 - struct ioctl_evtchn_bind_interdomain bind; 20.56 20.57 err = xs_gather(xs, dom->conspath, 20.58 "ring-ref", "%u", &ring_ref, 20.59 @@ -258,24 +249,24 @@ static int domain_create_ring(struct dom 20.60 } 20.61 20.62 dom->local_port = -1; 20.63 - if (dom->evtchn_fd != -1) 20.64 - close(dom->evtchn_fd); 20.65 + if (dom->xce_handle != -1) 20.66 + xc_evtchn_close(dom->xce_handle); 20.67 20.68 /* Opening evtchn independently for each console is a bit 20.69 * wasteful, but that's how the code is structured... */ 20.70 - dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR); 20.71 - if (dom->evtchn_fd == -1) { 20.72 + dom->xce_handle = xc_evtchn_open(); 20.73 + if (dom->xce_handle == -1) { 20.74 err = errno; 20.75 goto out; 20.76 } 20.77 20.78 - bind.remote_domain = dom->domid; 20.79 - bind.remote_port = remote_port; 20.80 - rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); 20.81 + rc = xc_evtchn_bind_interdomain(dom->xce_handle, 20.82 + dom->domid, remote_port); 20.83 + 20.84 if (rc == -1) { 20.85 err = errno; 20.86 - close(dom->evtchn_fd); 20.87 - dom->evtchn_fd = -1; 20.88 + xc_evtchn_close(dom->xce_handle); 20.89 + dom->xce_handle = -1; 20.90 goto out; 20.91 } 20.92 dom->local_port = rc; 20.93 @@ -285,8 +276,8 @@ static int domain_create_ring(struct dom 20.94 20.95 if (dom->tty_fd == -1) { 20.96 err = errno; 20.97 - close(dom->evtchn_fd); 20.98 - dom->evtchn_fd = -1; 20.99 + xc_evtchn_close(dom->xce_handle); 20.100 + dom->xce_handle = -1; 20.101 dom->local_port = -1; 20.102 goto out; 20.103 } 20.104 @@ -344,7 +335,7 @@ static struct domain *create_domain(int 20.105 dom->ring_ref = -1; 20.106 dom->local_port = -1; 20.107 dom->interface = NULL; 20.108 - dom->evtchn_fd = -1; 20.109 + dom->xce_handle = -1; 20.110 20.111 if (!watch_domain(dom, true)) 20.112 goto out; 20.113 @@ -409,9 +400,9 @@ static void shutdown_domain(struct domai 20.114 if (d->interface != NULL) 20.115 munmap(d->interface, getpagesize()); 20.116 d->interface = NULL; 20.117 - if (d->evtchn_fd != -1) 20.118 - close(d->evtchn_fd); 20.119 - d->evtchn_fd = -1; 20.120 + if (d->xce_handle != -1) 20.121 + xc_evtchn_close(d->xce_handle); 20.122 + d->xce_handle = -1; 20.123 cleanup_domain(d); 20.124 } 20.125 20.126 @@ -483,7 +474,7 @@ static void handle_tty_read(struct domai 20.127 } 20.128 wmb(); 20.129 intf->in_prod = prod; 20.130 - evtchn_notify(dom); 20.131 + xc_evtchn_notify(dom->xce_handle, dom->local_port); 20.132 } else { 20.133 close(dom->tty_fd); 20.134 dom->tty_fd = -1; 20.135 @@ -516,14 +507,14 @@ static void handle_tty_write(struct doma 20.136 20.137 static void handle_ring_read(struct domain *dom) 20.138 { 20.139 - evtchn_port_t v; 20.140 + evtchn_port_t port; 20.141 20.142 - if (!read_sync(dom->evtchn_fd, &v, sizeof(v))) 20.143 + if ((port = xc_evtchn_pending(dom->xce_handle)) == -1) 20.144 return; 20.145 20.146 buffer_append(dom); 20.147 20.148 - (void)write_sync(dom->evtchn_fd, &v, sizeof(v)); 20.149 + (void)xc_evtchn_unmask(dom->xce_handle, port); 20.150 } 20.151 20.152 static void handle_xs(void) 20.153 @@ -566,9 +557,10 @@ void handle_io(void) 20.154 max_fd = MAX(xs_fileno(xs), max_fd); 20.155 20.156 for (d = dom_head; d; d = d->next) { 20.157 - if (d->evtchn_fd != -1) { 20.158 - FD_SET(d->evtchn_fd, &readfds); 20.159 - max_fd = MAX(d->evtchn_fd, max_fd); 20.160 + if (d->xce_handle != -1) { 20.161 + int evtchn_fd = xc_evtchn_fd(d->xce_handle); 20.162 + FD_SET(evtchn_fd, &readfds); 20.163 + max_fd = MAX(evtchn_fd, max_fd); 20.164 } 20.165 20.166 if (d->tty_fd != -1) { 20.167 @@ -588,8 +580,8 @@ void handle_io(void) 20.168 20.169 for (d = dom_head; d; d = n) { 20.170 n = d->next; 20.171 - if (d->evtchn_fd != -1 && 20.172 - FD_ISSET(d->evtchn_fd, &readfds)) 20.173 + if (d->xce_handle != -1 && 20.174 + FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds)) 20.175 handle_ring_read(d); 20.176 20.177 if (d->tty_fd != -1) {
21.1 --- a/tools/console/daemon/utils.c Thu Jun 15 10:02:53 2006 -0600 21.2 +++ b/tools/console/daemon/utils.c Thu Jun 15 10:23:57 2006 -0600 21.3 @@ -39,32 +39,6 @@ 21.4 struct xs_handle *xs; 21.5 int xc; 21.6 21.7 -bool _read_write_sync(int fd, void *data, size_t size, bool do_read) 21.8 -{ 21.9 - size_t offset = 0; 21.10 - ssize_t len; 21.11 - 21.12 - while (offset < size) { 21.13 - if (do_read) { 21.14 - len = read(fd, data + offset, size - offset); 21.15 - } else { 21.16 - len = write(fd, data + offset, size - offset); 21.17 - } 21.18 - 21.19 - if (len < 1) { 21.20 - if (len == -1 && (errno == EAGAIN || errno == EINTR)) { 21.21 - continue; 21.22 - } else { 21.23 - return false; 21.24 - } 21.25 - } else { 21.26 - offset += len; 21.27 - } 21.28 - } 21.29 - 21.30 - return true; 21.31 -} 21.32 - 21.33 static void child_exit(int sig) 21.34 { 21.35 while (waitpid(-1, NULL, WNOHANG) > 0);
22.1 --- a/tools/console/daemon/utils.h Thu Jun 15 10:02:53 2006 -0600 22.2 +++ b/tools/console/daemon/utils.h Thu Jun 15 10:23:57 2006 -0600 22.3 @@ -29,9 +29,6 @@ 22.4 22.5 void daemonize(const char *pidfile); 22.6 bool xen_setup(void); 22.7 -#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true) 22.8 -#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false) 22.9 -bool _read_write_sync(int fd, void *data, size_t size, bool do_read); 22.10 22.11 extern struct xs_handle *xs; 22.12 extern int xc;
23.1 --- a/tools/ioemu/sdl.c Thu Jun 15 10:02:53 2006 -0600 23.2 +++ b/tools/ioemu/sdl.c Thu Jun 15 10:23:57 2006 -0600 23.3 @@ -376,13 +376,18 @@ static void sdl_update_caption(void) 23.4 23.5 static void sdl_hide_cursor(void) 23.6 { 23.7 - SDL_SetCursor(sdl_cursor_hidden); 23.8 + if (kbd_mouse_is_absolute()) { 23.9 + SDL_ShowCursor(1); 23.10 + SDL_SetCursor(sdl_cursor_hidden); 23.11 + } else { 23.12 + SDL_ShowCursor(0); 23.13 + } 23.14 } 23.15 23.16 static void sdl_show_cursor(void) 23.17 { 23.18 if (!kbd_mouse_is_absolute()) { 23.19 - SDL_SetCursor(sdl_cursor_normal); 23.20 + SDL_ShowCursor(1); 23.21 } 23.22 } 23.23
24.1 --- a/tools/ioemu/target-i386-dm/helper2.c Thu Jun 15 10:02:53 2006 -0600 24.2 +++ b/tools/ioemu/target-i386-dm/helper2.c Thu Jun 15 10:23:57 2006 -0600 24.3 @@ -47,11 +47,9 @@ 24.4 24.5 #include <limits.h> 24.6 #include <fcntl.h> 24.7 -#include <sys/ioctl.h> 24.8 24.9 #include <xenctrl.h> 24.10 #include <xen/hvm/ioreq.h> 24.11 -#include <xen/linux/evtchn.h> 24.12 24.13 #include "cpu.h" 24.14 #include "exec-all.h" 24.15 @@ -123,7 +121,7 @@ target_ulong cpu_get_phys_page_debug(CPU 24.16 } 24.17 24.18 //the evtchn fd for polling 24.19 -int evtchn_fd = -1; 24.20 +int xce_handle = -1; 24.21 24.22 //which vcpu we are serving 24.23 int send_vcpu = 0; 24.24 @@ -170,11 +168,10 @@ static ioreq_t* __cpu_get_ioreq(int vcpu 24.25 //retval--the number of ioreq packet 24.26 static ioreq_t* cpu_get_ioreq(void) 24.27 { 24.28 - int i, rc; 24.29 + int i; 24.30 evtchn_port_t port; 24.31 24.32 - rc = read(evtchn_fd, &port, sizeof(port)); 24.33 - if ( rc == sizeof(port) ) { 24.34 + if ( (port = xc_evtchn_pending(xce_handle)) != -1 ) { 24.35 for ( i = 0; i < vcpus; i++ ) 24.36 if ( shared_page->vcpu_iodata[i].dm_eport == port ) 24.37 break; 24.38 @@ -184,8 +181,7 @@ static ioreq_t* cpu_get_ioreq(void) 24.39 exit(1); 24.40 } 24.41 24.42 - // unmask the wanted port again 24.43 - write(evtchn_fd, &port, sizeof(port)); 24.44 + xc_evtchn_unmask(xce_handle, port); 24.45 24.46 //get the io packet from shared memory 24.47 send_vcpu = i; 24.48 @@ -436,6 +432,7 @@ int main_loop(void) 24.49 extern int shutdown_requested; 24.50 CPUState *env = global_env; 24.51 int retval; 24.52 + int evtchn_fd = xc_evtchn_fd(xce_handle); 24.53 extern void main_loop_wait(int); 24.54 24.55 /* Watch stdin (fd 0) to see when it has input. */ 24.56 @@ -475,11 +472,9 @@ int main_loop(void) 24.57 main_loop_wait(0); 24.58 24.59 if (env->send_event) { 24.60 - struct ioctl_evtchn_notify notify; 24.61 - 24.62 env->send_event = 0; 24.63 - notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport; 24.64 - (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify); 24.65 + (void)xc_evtchn_notify(xce_handle, 24.66 + shared_page->vcpu_iodata[send_vcpu].dm_eport); 24.67 } 24.68 } 24.69 destroy_hvm_domain(); 24.70 @@ -511,7 +506,6 @@ static void qemu_hvm_reset(void *unused) 24.71 CPUState * cpu_init() 24.72 { 24.73 CPUX86State *env; 24.74 - struct ioctl_evtchn_bind_interdomain bind; 24.75 int i, rc; 24.76 24.77 cpu_exec_init(); 24.78 @@ -523,21 +517,19 @@ CPUState * cpu_init() 24.79 24.80 cpu_single_env = env; 24.81 24.82 - if (evtchn_fd != -1)//the evtchn has been opened by another cpu object 24.83 + if (xce_handle != -1)//the evtchn has been opened by another cpu object 24.84 return NULL; 24.85 24.86 - //use nonblock reading not polling, may change in future. 24.87 - evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK); 24.88 - if (evtchn_fd == -1) { 24.89 + xce_handle = xc_evtchn_open(); 24.90 + if (xce_handle == -1) { 24.91 fprintf(logfile, "open evtchn device error %d\n", errno); 24.92 return NULL; 24.93 } 24.94 24.95 /* FIXME: how about if we overflow the page here? */ 24.96 - bind.remote_domain = domid; 24.97 for ( i = 0; i < vcpus; i++ ) { 24.98 - bind.remote_port = shared_page->vcpu_iodata[i].vp_eport; 24.99 - rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); 24.100 + rc = xc_evtchn_bind_interdomain(xce_handle, domid, 24.101 + shared_page->vcpu_iodata[i].vp_eport); 24.102 if ( rc == -1 ) { 24.103 fprintf(logfile, "bind interdomain ioctl error %d\n", errno); 24.104 return NULL;
25.1 --- a/tools/libxc/xc_elf.h Thu Jun 15 10:02:53 2006 -0600 25.2 +++ b/tools/libxc/xc_elf.h Thu Jun 15 10:23:57 2006 -0600 25.3 @@ -170,13 +170,14 @@ typedef struct { 25.4 #define EM_PARISC 15 /* HPPA */ 25.5 #define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */ 25.6 #define EM_PPC 20 /* PowerPC */ 25.7 +#define EM_PPC64 21 /* PowerPC 64-bit */ 25.8 #define EM_ARM 40 /* Advanced RISC Machines ARM */ 25.9 #define EM_ALPHA 41 /* DEC ALPHA */ 25.10 #define EM_SPARCV9 43 /* SPARC version 9 */ 25.11 #define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */ 25.12 +#define EM_IA_64 50 /* Intel Merced */ 25.13 #define EM_X86_64 62 /* AMD x86-64 architecture */ 25.14 #define EM_VAX 75 /* DEC VAX */ 25.15 -#define EM_NUM 15 /* number of machine types */ 25.16 25.17 /* Version */ 25.18 #define EV_NONE 0 /* Invalid */
26.1 --- a/tools/libxc/xc_linux.c Thu Jun 15 10:02:53 2006 -0600 26.2 +++ b/tools/libxc/xc_linux.c Thu Jun 15 10:23:57 2006 -0600 26.3 @@ -103,6 +103,124 @@ int do_xen_hypercall(int xc_handle, priv 26.4 (unsigned long)hypercall); 26.5 } 26.6 26.7 +#define EVTCHN_DEV_NAME "/dev/xen/evtchn" 26.8 +#define EVTCHN_DEV_MAJOR 10 26.9 +#define EVTCHN_DEV_MINOR 201 26.10 + 26.11 +int xc_evtchn_open(void) 26.12 +{ 26.13 + struct stat st; 26.14 + int fd; 26.15 + 26.16 + /* Make sure any existing device file links to correct device. */ 26.17 + if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || 26.18 + (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) 26.19 + (void)unlink(EVTCHN_DEV_NAME); 26.20 + 26.21 +reopen: 26.22 + if ( (fd = open(EVTCHN_DEV_NAME, O_RDWR)) == -1 ) 26.23 + { 26.24 + if ( (errno == ENOENT) && 26.25 + ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && 26.26 + (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, 26.27 + makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0) ) 26.28 + goto reopen; 26.29 + 26.30 + PERROR("Could not open event channel interface"); 26.31 + return -1; 26.32 + } 26.33 + 26.34 + return fd; 26.35 +} 26.36 + 26.37 +int xc_evtchn_close(int xce_handle) 26.38 +{ 26.39 + return close(xce_handle); 26.40 +} 26.41 + 26.42 +int xc_evtchn_fd(int xce_handle) 26.43 +{ 26.44 + return xce_handle; 26.45 +} 26.46 + 26.47 +int xc_evtchn_notify(int xce_handle, evtchn_port_t port) 26.48 +{ 26.49 + struct ioctl_evtchn_notify notify; 26.50 + 26.51 + notify.port = port; 26.52 + 26.53 + return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, ¬ify); 26.54 +} 26.55 + 26.56 +evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid, 26.57 + evtchn_port_t remote_port) 26.58 +{ 26.59 + struct ioctl_evtchn_bind_interdomain bind; 26.60 + 26.61 + bind.remote_domain = domid; 26.62 + bind.remote_port = remote_port; 26.63 + 26.64 + return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); 26.65 +} 26.66 + 26.67 +int xc_evtchn_unbind(int xce_handle, evtchn_port_t port) 26.68 +{ 26.69 + struct ioctl_evtchn_unbind unbind; 26.70 + 26.71 + unbind.port = port; 26.72 + 26.73 + return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind); 26.74 +} 26.75 + 26.76 +evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq) 26.77 +{ 26.78 + struct ioctl_evtchn_bind_virq bind; 26.79 + 26.80 + bind.virq = virq; 26.81 + 26.82 + return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind); 26.83 +} 26.84 + 26.85 +static int dorw(int fd, char *data, size_t size, int do_write) 26.86 +{ 26.87 + size_t offset = 0; 26.88 + ssize_t len; 26.89 + 26.90 + while ( offset < size ) 26.91 + { 26.92 + if (do_write) 26.93 + len = write(fd, data + offset, size - offset); 26.94 + else 26.95 + len = read(fd, data + offset, size - offset); 26.96 + 26.97 + if ( len == -1 ) 26.98 + { 26.99 + if ( errno == EINTR ) 26.100 + continue; 26.101 + return -1; 26.102 + } 26.103 + 26.104 + offset += len; 26.105 + } 26.106 + 26.107 + return 0; 26.108 +} 26.109 + 26.110 +evtchn_port_t xc_evtchn_pending(int xce_handle) 26.111 +{ 26.112 + evtchn_port_t port; 26.113 + 26.114 + if ( dorw(xce_handle, (char *)&port, sizeof(port), 0) == -1 ) 26.115 + return -1; 26.116 + 26.117 + return port; 26.118 +} 26.119 + 26.120 +int xc_evtchn_unmask(int xce_handle, evtchn_port_t port) 26.121 +{ 26.122 + return dorw(xce_handle, (char *)&port, sizeof(port), 1); 26.123 +} 26.124 + 26.125 /* 26.126 * Local variables: 26.127 * mode: C
27.1 --- a/tools/libxc/xc_linux_restore.c Thu Jun 15 10:02:53 2006 -0600 27.2 +++ b/tools/libxc/xc_linux_restore.c Thu Jun 15 10:23:57 2006 -0600 27.3 @@ -456,6 +456,15 @@ int xc_linux_restore(int xc_handle, int 27.4 n+= j; /* crude stats */ 27.5 } 27.6 27.7 + /* 27.8 + * Ensure we flush all machphys updates before potential PAE-specific 27.9 + * reallocations below. 27.10 + */ 27.11 + if (xc_finish_mmu_updates(xc_handle, mmu)) { 27.12 + ERR("Error doing finish_mmu_updates()"); 27.13 + goto out; 27.14 + } 27.15 + 27.16 DPRINTF("Received all pages (%d races)\n", nraces); 27.17 27.18 if ((pt_levels == 3) && !pae_extended_cr3) { 27.19 @@ -550,15 +559,12 @@ int xc_linux_restore(int xc_handle, int 27.20 } 27.21 } 27.22 27.23 + if (xc_finish_mmu_updates(xc_handle, mmu)) { 27.24 + ERR("Error doing finish_mmu_updates()"); 27.25 + goto out; 27.26 + } 27.27 } 27.28 27.29 - 27.30 - if (xc_finish_mmu_updates(xc_handle, mmu)) { 27.31 - ERR("Error doing finish_mmu_updates()"); 27.32 - goto out; 27.33 - } 27.34 - 27.35 - 27.36 /* 27.37 * Pin page tables. Do this after writing to them as otherwise Xen 27.38 * will barf when doing the type-checking.
28.1 --- a/tools/libxc/xc_load_elf.c Thu Jun 15 10:02:53 2006 -0600 28.2 +++ b/tools/libxc/xc_load_elf.c Thu Jun 15 10:23:57 2006 -0600 28.3 @@ -22,6 +22,24 @@ loadelfsymtab( 28.4 const char *image, int xch, uint32_t dom, xen_pfn_t *parray, 28.5 struct domain_setup_info *dsi); 28.6 28.7 +/* 28.8 + * Elf header attributes we require for each supported host platform. 28.9 + * These are checked in parseelfimage(). 28.10 + */ 28.11 +#if defined(__ia64__) 28.12 +#define ELFCLASS ELFCLASS64 28.13 +#define ELFDATA ELFDATA2LSB 28.14 +#define ELFMACHINE EM_IA_64 28.15 +#elif defined(__i386__) 28.16 +#define ELFCLASS ELFCLASS32 28.17 +#define ELFDATA ELFDATA2LSB 28.18 +#define ELFMACHINE EM_386 28.19 +#elif defined(__x86_64__) 28.20 +#define ELFCLASS ELFCLASS64 28.21 +#define ELFDATA ELFDATA2LSB 28.22 +#define ELFMACHINE EM_X86_64 28.23 +#endif 28.24 + 28.25 int probe_elf(const char *image, 28.26 unsigned long image_size, 28.27 struct load_funcs *load_funcs) 28.28 @@ -61,16 +79,10 @@ static int parseelfimage(const char *ima 28.29 return -EINVAL; 28.30 } 28.31 28.32 - if ( 28.33 -#if defined(__i386__) 28.34 - (ehdr->e_ident[EI_CLASS] != ELFCLASS32) || 28.35 - (ehdr->e_machine != EM_386) || 28.36 -#elif defined(__x86_64__) 28.37 - (ehdr->e_ident[EI_CLASS] != ELFCLASS64) || 28.38 - (ehdr->e_machine != EM_X86_64) || 28.39 -#endif 28.40 - (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) || 28.41 - (ehdr->e_type != ET_EXEC) ) 28.42 + if ( (ehdr->e_ident[EI_CLASS] != ELFCLASS) || 28.43 + (ehdr->e_machine != ELFMACHINE) || 28.44 + (ehdr->e_ident[EI_DATA] != ELFDATA) || 28.45 + (ehdr->e_type != ET_EXEC) ) 28.46 { 28.47 ERROR("Kernel not a Xen-compatible Elf image."); 28.48 return -EINVAL;
29.1 --- a/tools/libxc/xenctrl.h Thu Jun 15 10:02:53 2006 -0600 29.2 +++ b/tools/libxc/xenctrl.h Thu Jun 15 10:23:57 2006 -0600 29.3 @@ -604,4 +604,58 @@ int xc_finish_mmu_updates(int xc_handle, 29.4 29.5 int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size); 29.6 29.7 +/* 29.8 + * Return a handle to the event channel driver, or -1 on failure, in which case 29.9 + * errno will be set appropriately. 29.10 + */ 29.11 +int xc_evtchn_open(void); 29.12 + 29.13 +/* 29.14 + * Close a handle previously allocated with xc_evtchn_open(). 29.15 + */ 29.16 +int xc_evtchn_close(int xce_handle); 29.17 + 29.18 +/* 29.19 + * Return an fd that can be select()ed on for further calls to 29.20 + * xc_evtchn_pending(). 29.21 + */ 29.22 +int xc_evtchn_fd(int xce_handle); 29.23 + 29.24 +/* 29.25 + * Notify the given event channel. Returns -1 on failure, in which case 29.26 + * errno will be set appropriately. 29.27 + */ 29.28 +int xc_evtchn_notify(int xce_handle, evtchn_port_t port); 29.29 + 29.30 +/* 29.31 + * Returns a new event port bound to the remote port for the given domain ID, 29.32 + * or -1 on failure, in which case errno will be set appropriately. 29.33 + */ 29.34 +evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid, 29.35 + evtchn_port_t remote_port); 29.36 + 29.37 +/* 29.38 + * Unbind the given event channel. Returns -1 on failure, in which case errno 29.39 + * will be set appropriately. 29.40 + */ 29.41 +int xc_evtchn_unbind(int xce_handle, evtchn_port_t port); 29.42 + 29.43 +/* 29.44 + * Bind an event channel to the given VIRQ. Returns the event channel bound to 29.45 + * the VIRQ, or -1 on failure, in which case errno will be set appropriately. 29.46 + */ 29.47 +evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq); 29.48 + 29.49 +/* 29.50 + * Return the next event channel to become pending, or -1 on failure, in which 29.51 + * case errno will be set appropriately. 29.52 + */ 29.53 +evtchn_port_t xc_evtchn_pending(int xce_handle); 29.54 + 29.55 +/* 29.56 + * Unmask the given event channel. Returns -1 on failure, in which case errno 29.57 + * will be set appropriately. 29.58 + */ 29.59 +int xc_evtchn_unmask(int xce_handle, evtchn_port_t port); 29.60 + 29.61 #endif
30.1 --- a/tools/python/xen/util/security.py Thu Jun 15 10:02:53 2006 -0600 30.2 +++ b/tools/python/xen/util/security.py Thu Jun 15 10:23:57 2006 -0600 30.3 @@ -52,7 +52,8 @@ empty_line_re = re.compile("^\s*$") 30.4 binary_name_re = re.compile(".*[chwall|ste|chwall_ste].*\.bin", re.IGNORECASE) 30.5 policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE) 30.6 30.7 - 30.8 +#other global variables 30.9 +NULL_SSIDREF = 0 30.10 30.11 log = logging.getLogger("xend.util.security") 30.12 30.13 @@ -255,6 +256,8 @@ def ssidref2label(ssidref_var): 30.14 #2. get labelnames for both ssidref parts 30.15 pri_ssid = ssidref & 0xffff 30.16 sec_ssid = ssidref >> 16 30.17 + pri_null_ssid = NULL_SSIDREF & 0xffff 30.18 + sec_null_ssid = NULL_SSIDREF >> 16 30.19 pri_labels = [] 30.20 sec_labels = [] 30.21 labels = [] 30.22 @@ -270,7 +273,11 @@ def ssidref2label(ssidref_var): 30.23 f.close() 30.24 30.25 #3. get the label that is in both lists (combination must be a single label) 30.26 - if secondary == "NULL": 30.27 + if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid != sec_null_ssid): 30.28 + labels = sec_labels 30.29 + elif (secondary == "CHWALL") and (pri_ssid != pri_null_ssid) and (sec_ssid == sec_null_ssid): 30.30 + labels = pri_labels 30.31 + elif secondary == "NULL": 30.32 labels = pri_labels 30.33 else: 30.34 for i in pri_labels: 30.35 @@ -285,7 +292,7 @@ def ssidref2label(ssidref_var): 30.36 30.37 30.38 30.39 -def label2ssidref(labelname, policyname): 30.40 +def label2ssidref(labelname, policyname, type): 30.41 """ 30.42 returns ssidref corresponding to labelname; 30.43 maps current policy to default directory 30.44 @@ -294,6 +301,14 @@ def label2ssidref(labelname, policyname) 30.45 if policyname in ['NULL', 'INACTIVE', 'DEFAULT']: 30.46 err("Cannot translate labels for \'" + policyname + "\' policy.") 30.47 30.48 + allowed_types = ['ANY'] 30.49 + if type == 'dom': 30.50 + allowed_types.append('VM') 30.51 + elif type == 'res': 30.52 + allowed_types.append('RES') 30.53 + else: 30.54 + err("Invalid type. Must specify 'dom' or 'res'.") 30.55 + 30.56 (primary, secondary, f, pol_exists) = getmapfile(policyname) 30.57 30.58 #2. get labelnames for ssidref parts and find a common label 30.59 @@ -303,11 +318,15 @@ def label2ssidref(labelname, policyname) 30.60 l = line.split() 30.61 if (len(l) < 5) or (l[0] != "LABEL->SSID"): 30.62 continue 30.63 - if primary and (l[2] == primary) and (l[3] == labelname): 30.64 + if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3] == labelname): 30.65 pri_ssid.append(int(l[4], 16)) 30.66 - if secondary and (l[2] == secondary) and (l[3] == labelname): 30.67 + if secondary and (l[1] in allowed_types) and (l[2] == secondary) and (l[3] == labelname): 30.68 sec_ssid.append(int(l[4], 16)) 30.69 f.close() 30.70 + if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0): 30.71 + pri_ssid.append(NULL_SSIDREF) 30.72 + elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0): 30.73 + sec_ssid.append(NULL_SSIDREF) 30.74 30.75 #3. sanity check and composition of ssidref 30.76 if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary != "NULL")): 30.77 @@ -360,7 +379,7 @@ def refresh_ssidref(config): 30.78 err("Policy \'" + policyname + "\' in label does not match active policy \'" 30.79 + active_policy +"\'!") 30.80 30.81 - new_ssidref = label2ssidref(labelname, policyname) 30.82 + new_ssidref = label2ssidref(labelname, policyname, 'dom') 30.83 if not new_ssidref: 30.84 err("SSIDREF refresh failed!") 30.85 30.86 @@ -409,7 +428,7 @@ def get_decision(arg1, arg2): 30.87 enables domains to retrieve access control decisions from 30.88 the hypervisor Access Control Module. 30.89 IN: args format = ['domid', id] or ['ssidref', ssidref] 30.90 - or ['access_control', ['policy', policy], ['label', label]] 30.91 + or ['access_control', ['policy', policy], ['label', label], ['type', type]] 30.92 """ 30.93 30.94 if not on(): 30.95 @@ -417,14 +436,14 @@ def get_decision(arg1, arg2): 30.96 30.97 #translate labels before calling low-level function 30.98 if arg1[0] == 'access_control': 30.99 - if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') : 30.100 + if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') or (arg1[3][0] != 'type'): 30.101 err("Argument type not supported.") 30.102 - ssidref = label2ssidref(arg1[2][1], arg1[1][1]) 30.103 + ssidref = label2ssidref(arg1[2][1], arg1[1][1], arg1[3][1]) 30.104 arg1 = ['ssidref', str(ssidref)] 30.105 if arg2[0] == 'access_control': 30.106 - if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') : 30.107 + if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') or (arg2[3][0] != 'type'): 30.108 err("Argument type not supported.") 30.109 - ssidref = label2ssidref(arg2[2][1], arg2[1][1]) 30.110 + ssidref = label2ssidref(arg2[2][1], arg2[1][1], arg2[3][1]) 30.111 arg2 = ['ssidref', str(ssidref)] 30.112 30.113 # accept only int or string types for domid and ssidref
31.1 --- a/tools/python/xen/xm/addlabel.py Thu Jun 15 10:02:53 2006 -0600 31.2 +++ b/tools/python/xen/xm/addlabel.py Thu Jun 15 10:23:57 2006 -0600 31.3 @@ -50,7 +50,7 @@ def main(argv): 31.4 err("No active policy. Policy must be specified in command line.") 31.5 31.6 #sanity checks: make sure this label can be instantiated later on 31.7 - ssidref = label2ssidref(label, policyref) 31.8 + ssidref = label2ssidref(label, policyref, 'dom') 31.9 31.10 new_label = "access_control = ['policy=%s,label=%s']\n" % (policyref, label) 31.11 if not os.path.isfile(configfile):
32.1 --- a/tools/python/xen/xm/create.py Thu Jun 15 10:02:53 2006 -0600 32.2 +++ b/tools/python/xen/xm/create.py Thu Jun 15 10:23:57 2006 -0600 32.3 @@ -541,7 +541,7 @@ def configure_security(config, vals): 32.4 if sxp.child_value(config, 'ssidref'): 32.5 err("ERROR: SSIDREF and access_control are mutually exclusive but both specified!") 32.6 #else calculate ssidre from label 32.7 - ssidref = security.label2ssidref(label, policy) 32.8 + ssidref = security.label2ssidref(label, policy, 'dom') 32.9 if not ssidref : 32.10 err("ERROR calculating ssidref from access_control.") 32.11 security_label = ['security', [ config_access_control, ['ssidref' , ssidref ] ] ]
33.1 --- a/tools/python/xen/xm/main.py Thu Jun 15 10:02:53 2006 -0600 33.2 +++ b/tools/python/xen/xm/main.py Thu Jun 15 10:23:57 2006 -0600 33.3 @@ -1193,6 +1193,9 @@ def main(argv=sys.argv): 33.4 else: 33.5 print >>sys.stderr, "Error: %s" % ex.faultString 33.6 sys.exit(1) 33.7 + except (ValueError, OverflowError): 33.8 + err("Invalid argument.") 33.9 + usage(argv[1]) 33.10 except: 33.11 print "Unexpected error:", sys.exc_info()[0] 33.12 print
34.1 --- a/tools/security/Makefile Thu Jun 15 10:02:53 2006 -0600 34.2 +++ b/tools/security/Makefile Thu Jun 15 10:23:57 2006 -0600 34.3 @@ -33,7 +33,7 @@ OBJS_XML2BIN := $(patsubst %.c,%.o,$(fil 34.4 34.5 ACM_INST_TOOLS = xensec_tool xensec_xml2bin xensec_gen 34.6 ACM_OBJS = $(OBJS_TOOL) $(OBJS_XML2BIN) $(OBJS_GETD) 34.7 -ACM_SCRIPTS = python/xensec_tools/acm_getlabel python/xensec_tools/acm_getdecision 34.8 +ACM_SCRIPTS = python/xensec_tools/acm_getlabel 34.9 34.10 ACM_CONFIG_DIR = /etc/xen/acm-security 34.11 ACM_POLICY_DIR = $(ACM_CONFIG_DIR)/policies
35.1 --- a/tools/security/python/xensec_gen/cgi-bin/policy.cgi Thu Jun 15 10:02:53 2006 -0600 35.2 +++ b/tools/security/python/xensec_gen/cgi-bin/policy.cgi Thu Jun 15 10:23:57 2006 -0600 35.3 @@ -406,7 +406,7 @@ def parsePolicyXml( ): 35.4 msg = msg + 'Please validate the Policy file used.' 35.5 formatXmlError( msg ) 35.6 35.7 - allCSMTypes[csName][1] = csMemberList 35.8 + allCSMTypes[csName][1] = csMemberList 35.9 35.10 if pOrder != '': 35.11 formPolicyOrder[1] = pOrder
36.1 --- a/tools/security/python/xensec_tools/acm_getdecision Thu Jun 15 10:02:53 2006 -0600 36.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 36.3 @@ -1,55 +0,0 @@ 36.4 -#!/usr/bin/env python 36.5 -# -*- mode: python; -*- 36.6 -import sys 36.7 -import traceback 36.8 -import getopt 36.9 - 36.10 -# add fallback path for non-native python path installs if needed 36.11 -sys.path.insert(-1, '/usr/lib/python') 36.12 -sys.path.insert(-1, '/usr/lib64/python') 36.13 - 36.14 -from xen.util.security import ACMError, err, get_decision, active_policy 36.15 - 36.16 -def usage(): 36.17 - print "Usage: acm_getdecision -i domainid --label labelname" 36.18 - print " Test program illustrating the retrieval of" 36.19 - print " access control decisions from Xen. At this time," 36.20 - print " only sharing (STE) policy decisions are supported." 36.21 - print " Arguments are two paramters in any combination:" 36.22 - print "\t -i domain_id or --domid domain_id" 36.23 - print "\t -l labelname or --label labelname" 36.24 - print " Return value:" 36.25 - print "\t PERMITTED if access is permitted" 36.26 - print "\t DENIED if access is denied" 36.27 - print "\t ACMError -- e.g., unknown label or domain id" 36.28 - err("Usage") 36.29 - 36.30 -try: 36.31 - 36.32 - if len(sys.argv) != 5: 36.33 - usage() 36.34 - 36.35 - decision_args = [] 36.36 - 36.37 - for idx in range(1, len(sys.argv), 2): 36.38 - if sys.argv[idx] in ['-i', '--domid']: 36.39 - decision_args.append(['domid', sys.argv[idx+1]]) 36.40 - elif sys.argv[idx] in ['-l', '--label']: 36.41 - decision_args.append(['access_control', 36.42 - ['policy', active_policy], 36.43 - ['label', sys.argv[idx+1]] 36.44 - ]) 36.45 - else: 36.46 - print "unknown argument %s" % sys.argv[idx] 36.47 - usage() 36.48 - 36.49 - if len(decision_args) != 2: 36.50 - print "too many arguments" 36.51 - usage() 36.52 - 36.53 - print get_decision(decision_args[0], decision_args[1]) 36.54 - 36.55 -except ACMError: 36.56 - pass 36.57 -except: 36.58 - traceback.print_exc(limit=1)
37.1 --- a/tools/security/secpol_xml2bin.c Thu Jun 15 10:02:53 2006 -0600 37.2 +++ b/tools/security/secpol_xml2bin.c Thu Jun 15 10:23:57 2006 -0600 37.3 @@ -44,6 +44,8 @@ 37.4 37.5 #define DEBUG 0 37.6 37.7 +#define NULL_LABEL_NAME "__NULL_LABEL__" 37.8 + 37.9 /* primary / secondary policy component setting */ 37.10 enum policycomponent { CHWALL, STE, NULLPOLICY } 37.11 primary = NULLPOLICY, secondary = NULLPOLICY; 37.12 @@ -467,7 +469,7 @@ int init_ssid_queues(void) 37.13 return -ENOMEM; 37.14 37.15 /* default chwall ssid */ 37.16 - default_ssid_chwall->name = "DEFAULT"; 37.17 + default_ssid_chwall->name = NULL_LABEL_NAME; 37.18 default_ssid_chwall->num = max_chwall_ssids++; 37.19 default_ssid_chwall->is_ref = 0; 37.20 default_ssid_chwall->type = ANY; 37.21 @@ -484,7 +486,7 @@ int init_ssid_queues(void) 37.22 max_chwall_labels++; 37.23 37.24 /* default ste ssid */ 37.25 - default_ssid_ste->name = "DEFAULT"; 37.26 + default_ssid_ste->name = NULL_LABEL_NAME; 37.27 default_ssid_ste->num = max_ste_ssids++; 37.28 default_ssid_ste->is_ref = 0; 37.29 default_ssid_ste->type = ANY;
38.1 --- a/tools/xenmon/xenbaked.c Thu Jun 15 10:02:53 2006 -0600 38.2 +++ b/tools/xenmon/xenbaked.c Thu Jun 15 10:23:57 2006 -0600 38.3 @@ -33,9 +33,6 @@ 38.4 #include <stdlib.h> 38.5 #include <stdio.h> 38.6 #include <sys/mman.h> 38.7 -#include <sys/stat.h> 38.8 -#include <sys/types.h> 38.9 -#include <sys/ioctl.h> 38.10 #include <fcntl.h> 38.11 #include <unistd.h> 38.12 #include <errno.h> 38.13 @@ -45,7 +42,6 @@ 38.14 #include <xen/xen.h> 38.15 #include <string.h> 38.16 #include <sys/select.h> 38.17 -#include <xen/linux/evtchn.h> 38.18 38.19 #define PERROR(_m, _a...) \ 38.20 do { \ 38.21 @@ -256,51 +252,29 @@ void log_event(int event_id) 38.22 stat_map[0].event_count++; // other 38.23 } 38.24 38.25 -#define EVTCHN_DEV_NAME "/dev/xen/evtchn" 38.26 -#define EVTCHN_DEV_MAJOR 10 38.27 -#define EVTCHN_DEV_MINOR 201 38.28 - 38.29 int virq_port; 38.30 -int eventchn_fd = -1; 38.31 +int xce_handle = -1; 38.32 38.33 /* Returns the event channel handle. */ 38.34 /* Stolen from xenstore code */ 38.35 int eventchn_init(void) 38.36 { 38.37 - struct stat st; 38.38 - struct ioctl_evtchn_bind_virq bind; 38.39 int rc; 38.40 38.41 // to revert to old way: 38.42 if (0) 38.43 return -1; 38.44 38.45 - /* Make sure any existing device file links to correct device. */ 38.46 - if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || 38.47 - (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) 38.48 - (void)unlink(EVTCHN_DEV_NAME); 38.49 - 38.50 - reopen: 38.51 - eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); 38.52 - if (eventchn_fd == -1) { 38.53 - if ((errno == ENOENT) && 38.54 - ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && 38.55 - (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, 38.56 - makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0)) 38.57 - goto reopen; 38.58 - return -errno; 38.59 - } 38.60 - 38.61 - if (eventchn_fd < 0) 38.62 + xce_handle = xc_evtchn_open(); 38.63 + 38.64 + if (xce_handle < 0) 38.65 perror("Failed to open evtchn device"); 38.66 38.67 - bind.virq = VIRQ_TBUF; 38.68 - rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind); 38.69 - if (rc == -1) 38.70 + if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1) 38.71 perror("Failed to bind to domain exception virq port"); 38.72 virq_port = rc; 38.73 38.74 - return eventchn_fd; 38.75 + return xce_handle; 38.76 } 38.77 38.78 void wait_for_event(void) 38.79 @@ -309,27 +283,30 @@ void wait_for_event(void) 38.80 fd_set inset; 38.81 evtchn_port_t port; 38.82 struct timeval tv; 38.83 + int evtchn_fd; 38.84 38.85 - if (eventchn_fd < 0) { 38.86 + if (xce_handle < 0) { 38.87 nanosleep(&opts.poll_sleep, NULL); 38.88 return; 38.89 } 38.90 38.91 + evtchn_fd = xc_evtchn_fd(xce_handle); 38.92 + 38.93 FD_ZERO(&inset); 38.94 - FD_SET(eventchn_fd, &inset); 38.95 + FD_SET(evtchn_fd, &inset); 38.96 tv.tv_sec = 1; 38.97 tv.tv_usec = 0; 38.98 // tv = millis_to_timespec(&opts.poll_sleep); 38.99 - ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv); 38.100 + ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv); 38.101 38.102 - if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) { 38.103 - if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port)) 38.104 + if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) { 38.105 + if ((port = xc_evtchn_pending(xce_handle)) == -1) 38.106 perror("Failed to read from event fd"); 38.107 38.108 // if (port == virq_port) 38.109 // printf("got the event I was looking for\r\n"); 38.110 - 38.111 - if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port)) 38.112 + 38.113 + if (xc_evtchn_unmask(xce_handle, port) == -1) 38.114 perror("Failed to write to event fd"); 38.115 } 38.116 }
39.1 --- a/tools/xenstat/libxenstat/src/xenstat.c Thu Jun 15 10:02:53 2006 -0600 39.2 +++ b/tools/xenstat/libxenstat/src/xenstat.c Thu Jun 15 10:23:57 2006 -0600 39.3 @@ -223,18 +223,20 @@ xenstat_node *xenstat_get_node(xenstat_h 39.4 39.5 num_domains = 0; 39.6 do { 39.7 - xenstat_domain *domain; 39.8 + xenstat_domain *domain, *tmp; 39.9 39.10 new_domains = xc_domain_getinfolist(handle->xc_handle, 39.11 num_domains, DOMAIN_CHUNK_SIZE, domaininfo); 39.12 39.13 - node->domains = realloc(node->domains, 39.14 - (num_domains + new_domains) 39.15 - * sizeof(xenstat_domain)); 39.16 - if (node->domains == NULL) { 39.17 + tmp = realloc(node->domains, 39.18 + (num_domains + new_domains) 39.19 + * sizeof(xenstat_domain)); 39.20 + if (tmp == NULL) { 39.21 + free(node->domains); 39.22 free(node); 39.23 return NULL; 39.24 } 39.25 + node->domains = tmp; 39.26 39.27 domain = node->domains + num_domains; 39.28 39.29 @@ -582,11 +584,14 @@ static int xenstat_collect_networks(xens 39.30 domain->num_networks = 1; 39.31 domain->networks = malloc(sizeof(xenstat_network)); 39.32 } else { 39.33 + struct xenstat_network *tmp; 39.34 domain->num_networks++; 39.35 - domain->networks = 39.36 - realloc(domain->networks, 39.37 - domain->num_networks * 39.38 - sizeof(xenstat_network)); 39.39 + tmp = realloc(domain->networks, 39.40 + domain->num_networks * 39.41 + sizeof(xenstat_network)); 39.42 + if (tmp == NULL) 39.43 + free(domain->networks); 39.44 + domain->networks = tmp; 39.45 } 39.46 if (domain->networks == NULL) 39.47 return 0;
40.1 --- a/tools/xenstore/fake_libxc.c Thu Jun 15 10:02:53 2006 -0600 40.2 +++ b/tools/xenstore/fake_libxc.c Thu Jun 15 10:23:57 2006 -0600 40.3 @@ -37,7 +37,7 @@ static int xs_test_pid; 40.4 static evtchn_port_t port; 40.5 40.6 /* The event channel maps to a signal, shared page to an mmapped file. */ 40.7 -void evtchn_notify(int local_port) 40.8 +void xc_evtchn_notify(int xce_handle, int local_port) 40.9 { 40.10 assert(local_port == port); 40.11 if (kill(xs_test_pid, SIGUSR2) != 0) 40.12 @@ -124,7 +124,7 @@ void fake_ack_event(void) 40.13 signal(SIGUSR2, send_to_fd); 40.14 } 40.15 40.16 -int fake_open_eventchn(void) 40.17 +int xc_evtchn_open(void) 40.18 { 40.19 int fds[2]; 40.20
41.1 --- a/tools/xenstore/xenstored_core.c Thu Jun 15 10:02:53 2006 -0600 41.2 +++ b/tools/xenstore/xenstored_core.c Thu Jun 15 10:23:57 2006 -0600 41.3 @@ -54,7 +54,7 @@ 41.4 #include "hashtable.h" 41.5 41.6 41.7 -extern int eventchn_fd; /* in xenstored_domain.c */ 41.8 +extern int xce_handle; /* in xenstored_domain.c */ 41.9 41.10 static bool verbose = false; 41.11 LIST_HEAD(connections); 41.12 @@ -353,8 +353,11 @@ static int initialize_set(fd_set *inset, 41.13 41.14 set_fd(sock, inset, &max); 41.15 set_fd(ro_sock, inset, &max); 41.16 - set_fd(eventchn_fd, inset, &max); 41.17 set_fd(reopen_log_pipe[0], inset, &max); 41.18 + 41.19 + if (xce_handle != -1) 41.20 + set_fd(xc_evtchn_fd(xce_handle), inset, &max); 41.21 + 41.22 list_for_each_entry(i, &connections, list) { 41.23 if (i->domain) 41.24 continue; 41.25 @@ -1769,6 +1772,7 @@ int main(int argc, char *argv[]) 41.26 bool outputpid = false; 41.27 bool no_domain_init = false; 41.28 const char *pidfile = NULL; 41.29 + int evtchn_fd = -1; 41.30 41.31 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options, 41.32 NULL)) != -1) { 41.33 @@ -1907,6 +1911,9 @@ int main(int argc, char *argv[]) 41.34 signal(SIGUSR1, stop_failtest); 41.35 #endif 41.36 41.37 + if (xce_handle != -1) 41.38 + evtchn_fd = xc_evtchn_fd(xce_handle); 41.39 + 41.40 /* Get ready to listen to the tools. */ 41.41 max = initialize_set(&inset, &outset, *sock, *ro_sock); 41.42 41.43 @@ -1934,7 +1941,7 @@ int main(int argc, char *argv[]) 41.44 if (FD_ISSET(*ro_sock, &inset)) 41.45 accept_connection(*ro_sock, false); 41.46 41.47 - if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset)) 41.48 + if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset)) 41.49 handle_event(); 41.50 41.51 list_for_each_entry(i, &connections, list) {
42.1 --- a/tools/xenstore/xenstored_domain.c Thu Jun 15 10:02:53 2006 -0600 42.2 +++ b/tools/xenstore/xenstored_domain.c Thu Jun 15 10:23:57 2006 -0600 42.3 @@ -18,15 +18,10 @@ 42.4 */ 42.5 42.6 #include <stdio.h> 42.7 -#include <linux/ioctl.h> 42.8 -#include <sys/ioctl.h> 42.9 #include <sys/mman.h> 42.10 #include <unistd.h> 42.11 #include <stdlib.h> 42.12 #include <stdarg.h> 42.13 -#include <sys/types.h> 42.14 -#include <sys/stat.h> 42.15 -#include <fcntl.h> 42.16 42.17 //#define DEBUG 42.18 #include "utils.h" 42.19 @@ -37,12 +32,11 @@ 42.20 #include "xenstored_test.h" 42.21 42.22 #include <xenctrl.h> 42.23 -#include <xen/sys/evtchn.h> 42.24 42.25 static int *xc_handle; 42.26 static evtchn_port_t virq_port; 42.27 42.28 -int eventchn_fd = -1; 42.29 +int xce_handle = -1; 42.30 42.31 struct domain 42.32 { 42.33 @@ -83,19 +77,6 @@ struct domain 42.34 42.35 static LIST_HEAD(domains); 42.36 42.37 -#ifndef TESTING 42.38 -static void evtchn_notify(int port) 42.39 -{ 42.40 - int rc; 42.41 - 42.42 - struct ioctl_evtchn_notify notify; 42.43 - notify.port = port; 42.44 - rc = ioctl(eventchn_fd, IOCTL_EVTCHN_NOTIFY, ¬ify); 42.45 -} 42.46 -#else 42.47 -extern void evtchn_notify(int port); 42.48 -#endif 42.49 - 42.50 /* FIXME: Mark connection as broken (close it?) when this happens. */ 42.51 static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) 42.52 { 42.53 @@ -146,7 +127,7 @@ static int writechn(struct connection *c 42.54 mb(); 42.55 intf->rsp_prod += len; 42.56 42.57 - evtchn_notify(conn->domain->port); 42.58 + xc_evtchn_notify(xce_handle, conn->domain->port); 42.59 42.60 return len; 42.61 } 42.62 @@ -176,7 +157,7 @@ static int readchn(struct connection *co 42.63 mb(); 42.64 intf->req_cons += len; 42.65 42.66 - evtchn_notify(conn->domain->port); 42.67 + xc_evtchn_notify(xce_handle, conn->domain->port); 42.68 42.69 return len; 42.70 } 42.71 @@ -184,13 +165,11 @@ static int readchn(struct connection *co 42.72 static int destroy_domain(void *_domain) 42.73 { 42.74 struct domain *domain = _domain; 42.75 - struct ioctl_evtchn_unbind unbind; 42.76 42.77 list_del(&domain->list); 42.78 42.79 if (domain->port) { 42.80 - unbind.port = domain->port; 42.81 - if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1) 42.82 + if (xc_evtchn_unbind(xce_handle, domain->port) == -1) 42.83 eprintf("> Unbinding port %i failed!\n", domain->port); 42.84 } 42.85 42.86 @@ -231,14 +210,14 @@ void handle_event(void) 42.87 { 42.88 evtchn_port_t port; 42.89 42.90 - if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port)) 42.91 + if ((port = xc_evtchn_pending(xce_handle)) == -1) 42.92 barf_perror("Failed to read from event fd"); 42.93 42.94 if (port == virq_port) 42.95 domain_cleanup(); 42.96 42.97 #ifndef TESTING 42.98 - if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port)) 42.99 + if (xc_evtchn_unmask(xce_handle, port) == -1) 42.100 barf_perror("Failed to write to event fd"); 42.101 #endif 42.102 } 42.103 @@ -269,7 +248,6 @@ static struct domain *new_domain(void *c 42.104 int port) 42.105 { 42.106 struct domain *domain; 42.107 - struct ioctl_evtchn_bind_interdomain bind; 42.108 int rc; 42.109 42.110 42.111 @@ -283,9 +261,7 @@ static struct domain *new_domain(void *c 42.112 talloc_set_destructor(domain, destroy_domain); 42.113 42.114 /* Tell kernel we're interested in this event. */ 42.115 - bind.remote_domain = domid; 42.116 - bind.remote_port = port; 42.117 - rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); 42.118 + rc = xc_evtchn_bind_interdomain(xce_handle, domid, port); 42.119 if (rc == -1) 42.120 return NULL; 42.121 domain->port = rc; 42.122 @@ -490,23 +466,14 @@ static int dom0_init(void) 42.123 42.124 talloc_steal(dom0->conn, dom0); 42.125 42.126 - evtchn_notify(dom0->port); 42.127 + xc_evtchn_notify(xce_handle, dom0->port); 42.128 42.129 return 0; 42.130 } 42.131 42.132 - 42.133 - 42.134 -#define EVTCHN_DEV_NAME "/dev/xen/evtchn" 42.135 -#define EVTCHN_DEV_MAJOR 10 42.136 -#define EVTCHN_DEV_MINOR 201 42.137 - 42.138 - 42.139 /* Returns the event channel handle. */ 42.140 int domain_init(void) 42.141 { 42.142 - struct stat st; 42.143 - struct ioctl_evtchn_bind_virq bind; 42.144 int rc; 42.145 42.146 xc_handle = talloc(talloc_autofree_context(), int); 42.147 @@ -519,39 +486,19 @@ int domain_init(void) 42.148 42.149 talloc_set_destructor(xc_handle, close_xc_handle); 42.150 42.151 -#ifdef TESTING 42.152 - eventchn_fd = fake_open_eventchn(); 42.153 - (void)&st; 42.154 -#else 42.155 - /* Make sure any existing device file links to correct device. */ 42.156 - if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) || 42.157 - (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR))) 42.158 - (void)unlink(EVTCHN_DEV_NAME); 42.159 + xce_handle = xc_evtchn_open(); 42.160 42.161 - reopen: 42.162 - eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR); 42.163 - if (eventchn_fd == -1) { 42.164 - if ((errno == ENOENT) && 42.165 - ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) && 42.166 - (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600, 42.167 - makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0)) 42.168 - goto reopen; 42.169 - return -errno; 42.170 - } 42.171 -#endif 42.172 - if (eventchn_fd < 0) 42.173 + if (xce_handle < 0) 42.174 barf_perror("Failed to open evtchn device"); 42.175 42.176 if (dom0_init() != 0) 42.177 barf_perror("Failed to initialize dom0 state"); 42.178 42.179 - bind.virq = VIRQ_DOM_EXC; 42.180 - rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind); 42.181 - if (rc == -1) 42.182 + if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1) 42.183 barf_perror("Failed to bind to domain exception virq port"); 42.184 virq_port = rc; 42.185 42.186 - return eventchn_fd; 42.187 + return xce_handle; 42.188 } 42.189 42.190 void domain_entry_inc(struct connection *conn)
43.1 --- a/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py Thu Jun 15 10:02:53 2006 -0600 43.2 +++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py Thu Jun 15 10:23:57 2006 -0600 43.3 @@ -31,7 +31,7 @@ traceCommand("cat /dev/urandom > /dev/ra 43.4 43.5 s, o = traceCommand("md5sum /dev/ram1") 43.6 43.7 -dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o) 43.8 +dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M) 43.9 43.10 block_attach(domain, "phy:ram1", "hda1") 43.11 43.12 @@ -40,7 +40,7 @@ try: 43.13 except ConsoleError, e: 43.14 FAIL(str(e)) 43.15 43.16 -domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"]) 43.17 +domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M) 43.18 43.19 domain.closeConsole() 43.20
44.1 --- a/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py Thu Jun 15 10:02:53 2006 -0600 44.2 +++ b/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py Thu Jun 15 10:23:57 2006 -0600 44.3 @@ -37,7 +37,7 @@ try: 44.4 except ConsoleError, e: 44.5 FAIL(str(e)) 44.6 44.7 -domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"]) 44.8 +domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M) 44.9 44.10 domain.closeConsole() 44.11 44.12 @@ -45,7 +45,7 @@ domain.stop() 44.13 44.14 s, o = traceCommand("md5sum /dev/ram1") 44.15 44.16 -dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o) 44.17 +dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M) 44.18 44.19 if domU_md5sum_match == None: 44.20 FAIL("Failed to get md5sum of data written in domU.")
45.1 --- a/xen/arch/x86/traps.c Thu Jun 15 10:02:53 2006 -0600 45.2 +++ b/xen/arch/x86/traps.c Thu Jun 15 10:23:57 2006 -0600 45.3 @@ -1279,7 +1279,7 @@ asmlinkage int do_general_protection(str 45.4 static void nmi_softirq(void) 45.5 { 45.6 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */ 45.7 - evtchn_notify(dom0->vcpu[0]); 45.8 + vcpu_kick(dom0->vcpu[0]); 45.9 } 45.10 45.11 static void nmi_dom0_report(unsigned int reason_idx)
46.1 --- a/xen/common/event_channel.c Thu Jun 15 10:02:53 2006 -0600 46.2 +++ b/xen/common/event_channel.c Thu Jun 15 10:23:57 2006 -0600 46.3 @@ -493,10 +493,9 @@ void evtchn_set_pending(struct vcpu *v, 46.4 46.5 if ( !test_bit (port, s->evtchn_mask) && 46.6 !test_and_set_bit(port / BITS_PER_LONG, 46.7 - &v->vcpu_info->evtchn_pending_sel) && 46.8 - !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) 46.9 + &v->vcpu_info->evtchn_pending_sel) ) 46.10 { 46.11 - evtchn_notify(v); 46.12 + vcpu_mark_events_pending(v); 46.13 } 46.14 46.15 /* Check if some VCPU might be polling for this event. */ 46.16 @@ -682,10 +681,9 @@ static long evtchn_unmask(evtchn_unmask_ 46.17 if ( test_and_clear_bit(port, s->evtchn_mask) && 46.18 test_bit (port, s->evtchn_pending) && 46.19 !test_and_set_bit (port / BITS_PER_LONG, 46.20 - &v->vcpu_info->evtchn_pending_sel) && 46.21 - !test_and_set_bit (0, &v->vcpu_info->evtchn_upcall_pending) ) 46.22 + &v->vcpu_info->evtchn_pending_sel) ) 46.23 { 46.24 - evtchn_notify(v); 46.25 + vcpu_mark_events_pending(v); 46.26 } 46.27 46.28 spin_unlock(&d->evtchn_lock);
47.1 --- a/xen/include/asm-ia64/event.h Thu Jun 15 10:02:53 2006 -0600 47.2 +++ b/xen/include/asm-ia64/event.h Thu Jun 15 10:23:57 2006 -0600 47.3 @@ -12,7 +12,7 @@ 47.4 #include <public/arch-ia64.h> 47.5 #include <asm/vcpu.h> 47.6 47.7 -static inline void evtchn_notify(struct vcpu *v) 47.8 +static inline void vcpu_kick(struct vcpu *v) 47.9 { 47.10 /* 47.11 * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of 47.12 @@ -32,6 +32,12 @@ static inline void evtchn_notify(struct 47.13 vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector); 47.14 } 47.15 47.16 +static inline void vcpu_mark_events_pending(struct vcpu *v) 47.17 +{ 47.18 + if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) 47.19 + vcpu_kick(v); 47.20 +} 47.21 + 47.22 /* Note: Bitwise operations result in fast code with no branches. */ 47.23 #define event_pending(v) \ 47.24 (!!(v)->vcpu_info->evtchn_upcall_pending & \
48.1 --- a/xen/include/asm-x86/event.h Thu Jun 15 10:02:53 2006 -0600 48.2 +++ b/xen/include/asm-x86/event.h Thu Jun 15 10:23:57 2006 -0600 48.3 @@ -9,7 +9,7 @@ 48.4 #ifndef __ASM_EVENT_H__ 48.5 #define __ASM_EVENT_H__ 48.6 48.7 -static inline void evtchn_notify(struct vcpu *v) 48.8 +static inline void vcpu_kick(struct vcpu *v) 48.9 { 48.10 /* 48.11 * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of 48.12 @@ -26,6 +26,12 @@ static inline void evtchn_notify(struct 48.13 smp_send_event_check_cpu(v->processor); 48.14 } 48.15 48.16 +static inline void vcpu_mark_events_pending(struct vcpu *v) 48.17 +{ 48.18 + if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) 48.19 + vcpu_kick(v); 48.20 +} 48.21 + 48.22 static inline int local_events_need_delivery(void) 48.23 { 48.24 struct vcpu *v = current;
49.1 --- a/xen/include/xen/elf.h Thu Jun 15 10:02:53 2006 -0600 49.2 +++ b/xen/include/xen/elf.h Thu Jun 15 10:23:57 2006 -0600 49.3 @@ -178,9 +178,9 @@ typedef struct { 49.4 #define EM_ALPHA 41 /* DEC ALPHA */ 49.5 #define EM_SPARCV9 43 /* SPARC version 9 */ 49.6 #define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */ 49.7 +#define EM_IA_64 50 /* Intel Merced */ 49.8 #define EM_X86_64 62 /* AMD x86-64 architecture */ 49.9 #define EM_VAX 75 /* DEC VAX */ 49.10 -#define EM_NUM 15 /* number of machine types */ 49.11 49.12 /* Version */ 49.13 #define EV_NONE 0 /* Invalid */