ia64/xen-unstable

changeset 10414:fbc0e953732e

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Thu Jun 15 10:23:57 2006 -0600 (2006-06-15)
parents 7f67c15e2c91 73c73fb8875c
children 08378b83ea1e
files tools/security/python/xensec_tools/acm_getdecision
line diff
     1.1 --- a/extras/mini-os/events.c	Thu Jun 15 10:02:53 2006 -0600
     1.2 +++ b/extras/mini-os/events.c	Thu Jun 15 10:23:57 2006 -0600
     1.3 @@ -35,24 +35,29 @@ int do_event(u32 port, struct pt_regs *r
     1.4      ev_action_t  *action;
     1.5      if (port >= NR_EVS) {
     1.6          printk("Port number too large: %d\n", port);
     1.7 -        return 0;
     1.8 +        goto out;
     1.9      }
    1.10  
    1.11      action = &ev_actions[port];
    1.12      action->count++;
    1.13  
    1.14      if (!action->handler)
    1.15 +    {
    1.16 +        printk("Spurious event on port %d\n", port);
    1.17          goto out;
    1.18 +    }
    1.19      
    1.20      if (action->status & EVS_DISABLED)
    1.21 +    {
    1.22 +        printk("Event on port %d disabled\n", port);
    1.23          goto out;
    1.24 +    }
    1.25      
    1.26      /* call the handler */
    1.27      action->handler(port, regs);
    1.28 -
    1.29 -	clear_evtchn(port);
    1.30      
    1.31   out:
    1.32 +	clear_evtchn(port);
    1.33      return 1;
    1.34  
    1.35  }
    1.36 @@ -135,6 +140,7 @@ void init_events(void)
    1.37      {
    1.38          ev_actions[i].status  = EVS_DISABLED;
    1.39          ev_actions[i].handler = default_handler;
    1.40 +        mask_evtchn(i);
    1.41      }
    1.42  }
    1.43  
     2.1 --- a/extras/mini-os/include/xenbus.h	Thu Jun 15 10:02:53 2006 -0600
     2.2 +++ b/extras/mini-os/include/xenbus.h	Thu Jun 15 10:23:57 2006 -0600
     2.3 @@ -1,6 +1,34 @@
     2.4  #ifndef XENBUS_H__
     2.5  #define XENBUS_H__
     2.6  
     2.7 +/* Initialize the XenBus system. */
     2.8  void init_xenbus(void);
     2.9  
    2.10 +/* Read the value associated with a path.  Returns a malloc'd error
    2.11 +   string on failure and sets *value to NULL.  On success, *value is
    2.12 +   set to a malloc'd copy of the value. */
    2.13 +char *xenbus_read(const char *path, char **value);
    2.14 +
    2.15 +/* Associates a value with a path.  Returns a malloc'd error string on
    2.16 +   failure. */
    2.17 +char *xenbus_write(const char *path, const char *value);
    2.18 +
    2.19 +/* Removes the value associated with a path.  Returns a malloc'd error
    2.20 +   string on failure. */
    2.21 +char *xenbus_rm(const char *path);
    2.22 +
    2.23 +/* List the contents of a directory.  Returns a malloc'd error string
    2.24 +   on failure and sets *contents to NULL.  On success, *contents is
    2.25 +   set to a malloc'd array of pointers to malloc'd strings.  The array
    2.26 +   is NULL terminated.  May block. */
    2.27 +char *xenbus_ls(const char *prefix, char ***contents);
    2.28 +
    2.29 +/* Reads permissions associated with a path.  Returns a malloc'd error
    2.30 +   string on failure and sets *value to NULL.  On success, *value is
    2.31 +   set to a malloc'd copy of the value. */
    2.32 +char *xenbus_get_perms(const char *path, char **value);
    2.33 +
    2.34 +/* Sets the permissions associated with a path.  Returns a malloc'd
    2.35 +   error string on failure. */
    2.36 +char *xenbus_set_perms(const char *path, domid_t dom, char perm);
    2.37  #endif /* XENBUS_H__ */
     3.1 --- a/extras/mini-os/kernel.c	Thu Jun 15 10:02:53 2006 -0600
     3.2 +++ b/extras/mini-os/kernel.c	Thu Jun 15 10:23:57 2006 -0600
     3.3 @@ -82,17 +82,6 @@ static shared_info_t *map_shared_info(un
     3.4  }
     3.5  
     3.6  
     3.7 -void test_xenbus(void);
     3.8 -
     3.9 -/* Do initialisation from a thread once the scheduler's available */
    3.10 -static void init_xs(void *ign)
    3.11 -{
    3.12 -    init_xenbus();
    3.13 -
    3.14 -    test_xenbus();
    3.15 -}
    3.16 -
    3.17 -
    3.18  u8 xen_features[XENFEAT_NR_SUBMAPS * 32];
    3.19  
    3.20  void setup_xen_features(void)
    3.21 @@ -111,10 +100,18 @@ void setup_xen_features(void)
    3.22      }
    3.23  }
    3.24  
    3.25 +void test_xenbus(void);
    3.26 +
    3.27 +void xenbus_tester(void *p)
    3.28 +{
    3.29 +    test_xenbus();
    3.30 +}
    3.31 +
    3.32  /* This should be overridden by the application we are linked against. */
    3.33  __attribute__((weak)) int app_main(start_info_t *si)
    3.34  {
    3.35      printk("Dummy main: start_info=%p\n", si);
    3.36 +    create_thread("xenbus_tester", xenbus_tester, si);
    3.37      return 0;
    3.38  }
    3.39  
    3.40 @@ -183,8 +180,8 @@ void start_kernel(start_info_t *si)
    3.41      /* Init scheduler. */
    3.42      init_sched();
    3.43   
    3.44 -    /* Init XenBus from a separate thread */
    3.45 -    create_thread("init_xs", init_xs, NULL);
    3.46 +    /* Init XenBus */
    3.47 +    init_xenbus();
    3.48  
    3.49      /* Call (possibly overridden) app_main() */
    3.50      app_main(&start_info);
     4.1 --- a/extras/mini-os/xenbus/xenbus.c	Thu Jun 15 10:02:53 2006 -0600
     4.2 +++ b/extras/mini-os/xenbus/xenbus.c	Thu Jun 15 10:23:57 2006 -0600
     4.3 @@ -3,11 +3,12 @@
     4.4   * (C) 2006 - Cambridge University
     4.5   ****************************************************************************
     4.6   *
     4.7 - *        File: mm.c
     4.8 + *        File: xenbus.c
     4.9   *      Author: Steven Smith (sos22@cam.ac.uk) 
    4.10   *     Changes: Grzegorz Milos (gm281@cam.ac.uk)
    4.11 + *     Changes: John D. Ramsdell
    4.12   *              
    4.13 - *        Date: Mar 2006, chages Aug 2005
    4.14 + *        Date: Jun 2006, chages Aug 2005
    4.15   * 
    4.16   * Environment: Xen Minimal OS
    4.17   * Description: Minimal implementation of xenbus
    4.18 @@ -167,6 +168,7 @@ static int allocate_xenbus_id(void)
    4.19  void init_xenbus(void)
    4.20  {
    4.21      int err;
    4.22 +    printk("Initialising xenbus\n");
    4.23      DEBUG("init_xenbus called.\n");
    4.24      xenstore_buf = mfn_to_virt(start_info.store_mfn);
    4.25      create_thread("xenstore", xenbus_thread_func, NULL);
    4.26 @@ -262,15 +264,15 @@ static void xb_write(int type, int req_i
    4.27  /* Send a mesasge to xenbus, in the same fashion as xb_write, and
    4.28     block waiting for a reply.  The reply is malloced and should be
    4.29     freed by the caller. */
    4.30 -static void *xenbus_msg_reply(int type,
    4.31 +static struct xsd_sockmsg *
    4.32 +xenbus_msg_reply(int type,
    4.33          int trans,
    4.34          struct write_req *io,
    4.35          int nr_reqs)
    4.36  {
    4.37      int id;
    4.38      DEFINE_WAIT(w);
    4.39 -    void *rep;
    4.40 -    struct xsd_sockmsg *repmsg;
    4.41 +    struct xsd_sockmsg *rep;
    4.42  
    4.43      id = allocate_xenbus_id();
    4.44      add_waiter(w, req_info[id].waitq);
    4.45 @@ -281,13 +283,27 @@ static void *xenbus_msg_reply(int type,
    4.46      wake(current);
    4.47  
    4.48      rep = req_info[id].reply;
    4.49 -    repmsg = rep;
    4.50 -    BUG_ON(repmsg->req_id != id);
    4.51 +    BUG_ON(rep->req_id != id);
    4.52      release_xenbus_id(id);
    4.53 -
    4.54      return rep;
    4.55  }
    4.56  
    4.57 +static char *errmsg(struct xsd_sockmsg *rep)
    4.58 +{
    4.59 +    if (!rep) {
    4.60 +	char msg[] = "No reply";
    4.61 +	size_t len = strlen(msg) + 1;
    4.62 +	return memcpy(malloc(len), msg, len);
    4.63 +    }
    4.64 +    if (rep->type != XS_ERROR)
    4.65 +	return NULL;
    4.66 +    char *res = malloc(rep->len + 1);
    4.67 +    memcpy(res, rep + 1, rep->len);
    4.68 +    res[rep->len] = 0;
    4.69 +    free(rep);
    4.70 +    return res;
    4.71 +}	
    4.72 +
    4.73  /* Send a debug message to xenbus.  Can block. */
    4.74  static void xenbus_debug_msg(const char *msg)
    4.75  {
    4.76 @@ -296,27 +312,29 @@ static void xenbus_debug_msg(const char 
    4.77          { "print", sizeof("print") },
    4.78          { msg, len },
    4.79          { "", 1 }};
    4.80 -    void *reply;
    4.81 -    struct xsd_sockmsg *repmsg;
    4.82 +    struct xsd_sockmsg *reply;
    4.83  
    4.84 -    reply = xenbus_msg_reply(XS_DEBUG, 0, req, 3);
    4.85 -    repmsg = reply;
    4.86 +    reply = xenbus_msg_reply(XS_DEBUG, 0, req, ARRAY_SIZE(req));
    4.87      DEBUG("Got a reply, type %d, id %d, len %d.\n",
    4.88 -            repmsg->type, repmsg->req_id, repmsg->len);
    4.89 +            reply->type, reply->req_id, reply->len);
    4.90  }
    4.91  
    4.92  /* List the contents of a directory.  Returns a malloc()ed array of
    4.93     pointers to malloc()ed strings.  The array is NULL terminated.  May
    4.94     block. */
    4.95 -static char **xenbus_ls(const char *pre)
    4.96 +char *xenbus_ls(const char *pre, char ***contents)
    4.97  {
    4.98 -    void *reply;
    4.99 -    struct xsd_sockmsg *repmsg;
   4.100 +    struct xsd_sockmsg *reply, *repmsg;
   4.101      struct write_req req[] = { { pre, strlen(pre)+1 } };
   4.102      int nr_elems, x, i;
   4.103      char **res;
   4.104  
   4.105 -    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, 1);
   4.106 +    repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req));
   4.107 +    char *msg = errmsg(repmsg);
   4.108 +    if (msg) {
   4.109 +	*contents = NULL;
   4.110 +	return msg;
   4.111 +    }
   4.112      reply = repmsg + 1;
   4.113      for (x = nr_elems = 0; x < repmsg->len; x++)
   4.114          nr_elems += (((char *)reply)[x] == 0);
   4.115 @@ -329,20 +347,91 @@ static char **xenbus_ls(const char *pre)
   4.116      }
   4.117      res[i] = NULL;
   4.118      free(repmsg);
   4.119 -    return res;
   4.120 +    *contents = res;
   4.121 +    return NULL;
   4.122  }
   4.123  
   4.124 -static char *xenbus_read(const char *path)
   4.125 +char *xenbus_read(const char *path, char **value)
   4.126  {
   4.127 -    struct write_req req[] = { {path, strlen(path) + 1}};
   4.128 +    struct write_req req[] = { {path, strlen(path) + 1} };
   4.129      struct xsd_sockmsg *rep;
   4.130      char *res;
   4.131 -    rep = xenbus_msg_reply(XS_READ, 0, req, 1);
   4.132 +    rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req));
   4.133 +    char *msg = errmsg(rep);
   4.134 +    if (msg) {
   4.135 +	*value = NULL;
   4.136 +	return msg;
   4.137 +    }
   4.138      res = malloc(rep->len + 1);
   4.139      memcpy(res, rep + 1, rep->len);
   4.140      res[rep->len] = 0;
   4.141      free(rep);
   4.142 -    return res;
   4.143 +    *value = res;
   4.144 +    return NULL;
   4.145 +}
   4.146 +
   4.147 +char *xenbus_write(const char *path, const char *value)
   4.148 +{
   4.149 +    struct write_req req[] = { 
   4.150 +	{path, strlen(path) + 1},
   4.151 +	{value, strlen(value) + 1},
   4.152 +    };
   4.153 +    struct xsd_sockmsg *rep;
   4.154 +    rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req));
   4.155 +    char *msg = errmsg(rep);
   4.156 +    if (msg)
   4.157 +	return msg;
   4.158 +    free(rep);
   4.159 +    return NULL;
   4.160 +}
   4.161 +
   4.162 +char *xenbus_rm(const char *path)
   4.163 +{
   4.164 +    struct write_req req[] = { {path, strlen(path) + 1} };
   4.165 +    struct xsd_sockmsg *rep;
   4.166 +    rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req));
   4.167 +    char *msg = errmsg(rep);
   4.168 +    if (msg)
   4.169 +	return msg;
   4.170 +    free(rep);
   4.171 +    return NULL;
   4.172 +}
   4.173 +
   4.174 +char *xenbus_get_perms(const char *path, char **value)
   4.175 +{
   4.176 +    struct write_req req[] = { {path, strlen(path) + 1} };
   4.177 +    struct xsd_sockmsg *rep;
   4.178 +    char *res;
   4.179 +    rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req));
   4.180 +    char *msg = errmsg(rep);
   4.181 +    if (msg) {
   4.182 +	*value = NULL;
   4.183 +	return msg;
   4.184 +    }
   4.185 +    res = malloc(rep->len + 1);
   4.186 +    memcpy(res, rep + 1, rep->len);
   4.187 +    res[rep->len] = 0;
   4.188 +    free(rep);
   4.189 +    *value = res;
   4.190 +    return NULL;
   4.191 +}
   4.192 +
   4.193 +#define PERM_MAX_SIZE 32
   4.194 +char *xenbus_set_perms(const char *path, domid_t dom, char perm)
   4.195 +{
   4.196 +    char value[PERM_MAX_SIZE];
   4.197 +    snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom);
   4.198 +    struct write_req req[] = { 
   4.199 +	{path, strlen(path) + 1},
   4.200 +	{value, strlen(value) + 1},
   4.201 +    };
   4.202 +    struct xsd_sockmsg *rep;
   4.203 +    rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req));
   4.204 +    char *msg = errmsg(rep);
   4.205 +    if (msg)
   4.206 +	return msg;
   4.207 +    free(rep);
   4.208 +    return NULL;
   4.209  }
   4.210  
   4.211  static void do_ls_test(const char *pre)
   4.212 @@ -351,7 +440,12 @@ static void do_ls_test(const char *pre)
   4.213      int x;
   4.214  
   4.215      DEBUG("ls %s...\n", pre);
   4.216 -    dirs = xenbus_ls(pre);
   4.217 +    char *msg = xenbus_ls(pre, &dirs);
   4.218 +    if (msg) {
   4.219 +	DEBUG("Error in xenbus ls: %s\n", msg);
   4.220 +	free(msg);
   4.221 +	return;
   4.222 +    }
   4.223      for (x = 0; dirs[x]; x++) 
   4.224      {
   4.225          DEBUG("ls %s[%d] -> %s\n", pre, x, dirs[x]);
   4.226 @@ -364,11 +458,40 @@ static void do_read_test(const char *pat
   4.227  {
   4.228      char *res;
   4.229      DEBUG("Read %s...\n", path);
   4.230 -    res = xenbus_read(path);
   4.231 +    char *msg = xenbus_read(path, &res);
   4.232 +    if (msg) {
   4.233 +	DEBUG("Error in xenbus read: %s\n", msg);
   4.234 +	free(msg);
   4.235 +	return;
   4.236 +    }
   4.237      DEBUG("Read %s -> %s.\n", path, res);
   4.238      free(res);
   4.239  }
   4.240  
   4.241 +static void do_write_test(const char *path, const char *val)
   4.242 +{
   4.243 +    DEBUG("Write %s to %s...\n", val, path);
   4.244 +    char *msg = xenbus_write(path, val);
   4.245 +    if (msg) {
   4.246 +	DEBUG("Result %s\n", msg);
   4.247 +	free(msg);
   4.248 +    } else {
   4.249 +	DEBUG("Success.\n");
   4.250 +    }
   4.251 +}
   4.252 +
   4.253 +static void do_rm_test(const char *path)
   4.254 +{
   4.255 +    DEBUG("rm %s...\n", path);
   4.256 +    char *msg = xenbus_rm(path);
   4.257 +    if (msg) {
   4.258 +	DEBUG("Result %s\n", msg);
   4.259 +	free(msg);
   4.260 +    } else {
   4.261 +	DEBUG("Success.\n");
   4.262 +    }
   4.263 +}
   4.264 +
   4.265  /* Simple testing thing */
   4.266  void test_xenbus(void)
   4.267  {
   4.268 @@ -383,5 +506,22 @@ void test_xenbus(void)
   4.269      DEBUG("Doing read test.\n");
   4.270      do_read_test("device/vif/0/mac");
   4.271      do_read_test("device/vif/0/backend");
   4.272 -    printk("Xenbus initialised.\n");
   4.273 +
   4.274 +    DEBUG("Doing write test.\n");
   4.275 +    do_write_test("device/vif/0/flibble", "flobble");
   4.276 +    do_read_test("device/vif/0/flibble");
   4.277 +    do_write_test("device/vif/0/flibble", "widget");
   4.278 +    do_read_test("device/vif/0/flibble");
   4.279 +
   4.280 +    DEBUG("Doing rm test.\n");
   4.281 +    do_rm_test("device/vif/0/flibble");
   4.282 +    do_read_test("device/vif/0/flibble");
   4.283 +    DEBUG("(Should have said ENOENT)\n");
   4.284  }
   4.285 +
   4.286 +/*
   4.287 + * Local variables:
   4.288 + * mode: C
   4.289 + * c-basic-offset: 4
   4.290 + * End:
   4.291 + */
     5.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c	Thu Jun 15 10:02:53 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c	Thu Jun 15 10:23:57 2006 -0600
     5.3 @@ -133,6 +133,7 @@ void xen_tlb_flush(void)
     5.4  	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
     5.5  	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
     5.6  }
     5.7 +EXPORT_SYMBOL(xen_tlb_flush);
     5.8  
     5.9  void xen_invlpg(unsigned long ptr)
    5.10  {
    5.11 @@ -141,6 +142,7 @@ void xen_invlpg(unsigned long ptr)
    5.12  	op.arg1.linear_addr = ptr & PAGE_MASK;
    5.13  	BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
    5.14  }
    5.15 +EXPORT_SYMBOL(xen_invlpg);
    5.16  
    5.17  #ifdef CONFIG_SMP
    5.18  
    5.19 @@ -363,7 +365,8 @@ void xen_destroy_contiguous_region(unsig
    5.20  	};
    5.21  	set_xen_guest_handle(reservation.extent_start, &frame);
    5.22  
    5.23 -	if (xen_feature(XENFEAT_auto_translated_physmap))
    5.24 +	if (xen_feature(XENFEAT_auto_translated_physmap) ||
    5.25 +	    !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
    5.26  		return;
    5.27  
    5.28  	scrub_pages(vstart, 1 << order);
     6.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Thu Jun 15 10:02:53 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Thu Jun 15 10:23:57 2006 -0600
     6.3 @@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
     6.4  #endif
     6.5  				0,
     6.6  				pgd_ctor,
     6.7 -				pgd_dtor);
     6.8 +				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
     6.9  	if (!pgd_cache)
    6.10  		panic("pgtable_cache_init(): Cannot create pgd cache");
    6.11  }
     7.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Thu Jun 15 10:02:53 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Thu Jun 15 10:23:57 2006 -0600
     7.3 @@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
     7.4  	unsigned long flags;
     7.5  
     7.6  	if (PTRS_PER_PMD > 1) {
     7.7 -		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
     7.8 -			int rc = xen_create_contiguous_region(
     7.9 -				(unsigned long)pgd, 0, 32);
    7.10 -			BUG_ON(rc);
    7.11 -		}
    7.12  		if (HAVE_SHARED_KERNEL_PMD)
    7.13  			clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
    7.14  					swapper_pg_dir + USER_PTRS_PER_PGD,
    7.15 @@ -320,69 +315,105 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    7.16  	}
    7.17  }
    7.18  
    7.19 +/* never called when PTRS_PER_PMD > 1 */
    7.20  void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
    7.21  {
    7.22  	unsigned long flags; /* can be called from interrupt context */
    7.23  
    7.24 -	if (PTRS_PER_PMD > 1) {
    7.25 -		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
    7.26 -			xen_destroy_contiguous_region((unsigned long)pgd, 0);
    7.27 -	} else {
    7.28 -		spin_lock_irqsave(&pgd_lock, flags);
    7.29 -		pgd_list_del(pgd);
    7.30 -		spin_unlock_irqrestore(&pgd_lock, flags);
    7.31 +	spin_lock_irqsave(&pgd_lock, flags);
    7.32 +	pgd_list_del(pgd);
    7.33 +	spin_unlock_irqrestore(&pgd_lock, flags);
    7.34  
    7.35 -		pgd_test_and_unpin(pgd);
    7.36 -	}
    7.37 +	pgd_test_and_unpin(pgd);
    7.38  }
    7.39  
    7.40  pgd_t *pgd_alloc(struct mm_struct *mm)
    7.41  {
    7.42  	int i;
    7.43  	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    7.44 +	pmd_t **pmd;
    7.45 +	unsigned long flags;
    7.46  
    7.47  	pgd_test_and_unpin(pgd);
    7.48  
    7.49  	if (PTRS_PER_PMD == 1 || !pgd)
    7.50  		return pgd;
    7.51  
    7.52 -	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
    7.53 -		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
    7.54 -		if (!pmd)
    7.55 -			goto out_oom;
    7.56 -		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
    7.57 -	}
    7.58 -
    7.59 -	if (!HAVE_SHARED_KERNEL_PMD) {
    7.60 -		unsigned long flags;
    7.61 -
    7.62 -		for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
    7.63 +	if (HAVE_SHARED_KERNEL_PMD) {
    7.64 +		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
    7.65  			pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
    7.66  			if (!pmd)
    7.67  				goto out_oom;
    7.68  			set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
    7.69  		}
    7.70 +		return pgd;
    7.71 +	}
    7.72  
    7.73 -		spin_lock_irqsave(&pgd_lock, flags);
    7.74 -		for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
    7.75 -			unsigned long v = (unsigned long)i << PGDIR_SHIFT;
    7.76 -			pgd_t *kpgd = pgd_offset_k(v);
    7.77 -			pud_t *kpud = pud_offset(kpgd, v);
    7.78 -			pmd_t *kpmd = pmd_offset(kpud, v);
    7.79 -			pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
    7.80 -			memcpy(pmd, kpmd, PAGE_SIZE);
    7.81 -			make_lowmem_page_readonly(
    7.82 -				pmd, XENFEAT_writable_page_tables);
    7.83 +	/*
    7.84 +	 * We can race save/restore (if we sleep during a GFP_KERNEL memory
    7.85 +	 * allocation). We therefore store virtual addresses of pmds as they
    7.86 +	 * do not change across save/restore, and poke the machine addresses
    7.87 +	 * into the pgdir under the pgd_lock.
    7.88 +	 */
    7.89 +	pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
    7.90 +	if (!pmd) {
    7.91 +		kmem_cache_free(pgd_cache, pgd);
    7.92 +		return NULL;
    7.93 +	}
    7.94 +
    7.95 +	/* Allocate pmds, remember virtual addresses. */
    7.96 +	for (i = 0; i < PTRS_PER_PGD; ++i) {
    7.97 +		pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
    7.98 +		if (!pmd[i])
    7.99 +			goto out_oom;
   7.100 +	}
   7.101 +
   7.102 +	spin_lock_irqsave(&pgd_lock, flags);
   7.103 +
   7.104 +	/* Protect against save/restore: move below 4GB under pgd_lock. */
   7.105 +	if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
   7.106 +		int rc = xen_create_contiguous_region(
   7.107 +			(unsigned long)pgd, 0, 32);
   7.108 +		if (rc) {
   7.109 +			spin_unlock_irqrestore(&pgd_lock, flags);
   7.110 +			goto out_oom;
   7.111  		}
   7.112 -		pgd_list_add(pgd);
   7.113 -		spin_unlock_irqrestore(&pgd_lock, flags);
   7.114 +	}
   7.115 +
   7.116 +	/* Copy kernel pmd contents and write-protect the new pmds. */
   7.117 +	for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
   7.118 +		unsigned long v = (unsigned long)i << PGDIR_SHIFT;
   7.119 +		pgd_t *kpgd = pgd_offset_k(v);
   7.120 +		pud_t *kpud = pud_offset(kpgd, v);
   7.121 +		pmd_t *kpmd = pmd_offset(kpud, v);
   7.122 +		memcpy(pmd[i], kpmd, PAGE_SIZE);
   7.123 +		make_lowmem_page_readonly(
   7.124 +			pmd[i], XENFEAT_writable_page_tables);
   7.125  	}
   7.126  
   7.127 +	/* It is safe to poke machine addresses of pmds under the pmd_lock. */
   7.128 +	for (i = 0; i < PTRS_PER_PGD; i++)
   7.129 +		set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
   7.130 +
   7.131 +	/* Ensure this pgd gets picked up and pinned on save/restore. */
   7.132 +	pgd_list_add(pgd);
   7.133 +
   7.134 +	spin_unlock_irqrestore(&pgd_lock, flags);
   7.135 +
   7.136 +	kfree(pmd);
   7.137 +
   7.138  	return pgd;
   7.139  
   7.140  out_oom:
   7.141 -	for (i--; i >= 0; i--)
   7.142 -		kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
   7.143 +	if (HAVE_SHARED_KERNEL_PMD) {
   7.144 +		for (i--; i >= 0; i--)
   7.145 +			kmem_cache_free(pmd_cache,
   7.146 +					(void *)__va(pgd_val(pgd[i])-1));
   7.147 +	} else {
   7.148 +		for (i--; i >= 0; i--)
   7.149 +			kmem_cache_free(pmd_cache, pmd[i]);
   7.150 +		kfree(pmd);
   7.151 +	}
   7.152  	kmem_cache_free(pgd_cache, pgd);
   7.153  	return NULL;
   7.154  }
   7.155 @@ -391,6 +422,14 @@ void pgd_free(pgd_t *pgd)
   7.156  {
   7.157  	int i;
   7.158  
   7.159 +	/*
   7.160 +	 * After this the pgd should not be pinned for the duration of this
   7.161 +	 * function's execution. We should never sleep and thus never race:
   7.162 +	 *  1. User pmds will not become write-protected under our feet due
   7.163 +	 *     to a concurrent mm_pin_all().
   7.164 +	 *  2. The machine addresses in PGD entries will not become invalid
   7.165 +	 *     due to a concurrent save/restore.
   7.166 +	 */
   7.167  	pgd_test_and_unpin(pgd);
   7.168  
   7.169  	/* in the PAE case user pgd entries are overwritten before usage */
   7.170 @@ -399,11 +438,13 @@ void pgd_free(pgd_t *pgd)
   7.171  			pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
   7.172  			kmem_cache_free(pmd_cache, pmd);
   7.173  		}
   7.174 +
   7.175  		if (!HAVE_SHARED_KERNEL_PMD) {
   7.176  			unsigned long flags;
   7.177  			spin_lock_irqsave(&pgd_lock, flags);
   7.178  			pgd_list_del(pgd);
   7.179  			spin_unlock_irqrestore(&pgd_lock, flags);
   7.180 +
   7.181  			for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
   7.182  				pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
   7.183  				make_lowmem_page_writable(
   7.184 @@ -411,8 +452,13 @@ void pgd_free(pgd_t *pgd)
   7.185  				memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
   7.186  				kmem_cache_free(pmd_cache, pmd);
   7.187  			}
   7.188 +
   7.189 +			if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
   7.190 +				xen_destroy_contiguous_region(
   7.191 +					(unsigned long)pgd, 0);
   7.192  		}
   7.193  	}
   7.194 +
   7.195  	/* in the non-PAE case, free_pgtables() clears user pgd entries */
   7.196  	kmem_cache_free(pgd_cache, pgd);
   7.197  }
   7.198 @@ -588,7 +634,7 @@ static void pgd_test_and_unpin(pgd_t *pg
   7.199  void mm_pin(struct mm_struct *mm)
   7.200  {
   7.201  	if (xen_feature(XENFEAT_writable_page_tables))
   7.202 -	    return;
   7.203 +		return;
   7.204  	spin_lock(&mm->page_table_lock);
   7.205  	__pgd_pin(mm->pgd);
   7.206  	spin_unlock(&mm->page_table_lock);
   7.207 @@ -597,7 +643,7 @@ void mm_pin(struct mm_struct *mm)
   7.208  void mm_unpin(struct mm_struct *mm)
   7.209  {
   7.210  	if (xen_feature(XENFEAT_writable_page_tables))
   7.211 -	    return;
   7.212 +		return;
   7.213  	spin_lock(&mm->page_table_lock);
   7.214  	__pgd_unpin(mm->pgd);
   7.215  	spin_unlock(&mm->page_table_lock);
   7.216 @@ -607,13 +653,19 @@ void mm_pin_all(void)
   7.217  {
   7.218  	struct page *page;
   7.219  	if (xen_feature(XENFEAT_writable_page_tables))
   7.220 -	    return;
   7.221 +		return;
   7.222  	for (page = pgd_list; page; page = (struct page *)page->index) {
   7.223  		if (!test_bit(PG_pinned, &page->flags))
   7.224  			__pgd_pin((pgd_t *)page_address(page));
   7.225  	}
   7.226  }
   7.227  
   7.228 +void _arch_dup_mmap(struct mm_struct *mm)
   7.229 +{
   7.230 +	if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
   7.231 +		mm_pin(mm);
   7.232 +}
   7.233 +
   7.234  void _arch_exit_mmap(struct mm_struct *mm)
   7.235  {
   7.236  	struct task_struct *tsk = current;
     8.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Thu Jun 15 10:02:53 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Thu Jun 15 10:23:57 2006 -0600
     8.3 @@ -130,6 +130,12 @@ void mm_pin_all(void)
     8.4  				  context.unpinned));
     8.5  }
     8.6  
     8.7 +void _arch_dup_mmap(struct mm_struct *mm)
     8.8 +{
     8.9 +    if (!mm->context.pinned)
    8.10 +        mm_pin(mm);
    8.11 +}
    8.12 +
    8.13  void _arch_exit_mmap(struct mm_struct *mm)
    8.14  {
    8.15      struct task_struct *tsk = current;
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Thu Jun 15 10:02:53 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Thu Jun 15 10:23:57 2006 -0600
     9.3 @@ -342,9 +342,21 @@ static void connect(struct blkfront_info
     9.4  static void blkfront_closing(struct xenbus_device *dev)
     9.5  {
     9.6  	struct blkfront_info *info = dev->dev.driver_data;
     9.7 +	unsigned long flags;
     9.8  
     9.9  	DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
    9.10  
    9.11 +	if (info->rq == NULL)
    9.12 +		return;
    9.13 +
    9.14 +	spin_lock_irqsave(&blkif_io_lock, flags);
    9.15 +	/* No more blkif_request(). */
    9.16 +	blk_stop_queue(info->rq);
    9.17 +	/* No more gnttab callback work. */
    9.18 +	gnttab_cancel_free_callback(&info->callback);
    9.19 +	flush_scheduled_work();
    9.20 +	spin_unlock_irqrestore(&blkif_io_lock, flags);
    9.21 +
    9.22  	xlvbd_del(info);
    9.23  
    9.24  	xenbus_switch_state(dev, XenbusStateClosed);
    9.25 @@ -407,7 +419,8 @@ static void blkif_restart_queue(void *ar
    9.26  {
    9.27  	struct blkfront_info *info = (struct blkfront_info *)arg;
    9.28  	spin_lock_irq(&blkif_io_lock);
    9.29 -	kick_pending_request_queues(info);
    9.30 +	if (info->connected == BLKIF_STATE_CONNECTED)
    9.31 +		kick_pending_request_queues(info);
    9.32  	spin_unlock_irq(&blkif_io_lock);
    9.33  }
    9.34  
    9.35 @@ -695,6 +708,12 @@ static void blkif_free(struct blkfront_i
    9.36  	spin_lock_irq(&blkif_io_lock);
    9.37  	info->connected = suspend ?
    9.38  		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
    9.39 +	/* No more blkif_request(). */
    9.40 +	if (info->rq)
    9.41 +		blk_stop_queue(info->rq);
    9.42 +	/* No more gnttab callback work. */
    9.43 +	gnttab_cancel_free_callback(&info->callback);
    9.44 +	flush_scheduled_work();
    9.45  	spin_unlock_irq(&blkif_io_lock);
    9.46  
    9.47  	/* Free resources associated with old device channel. */
    9.48 @@ -768,17 +787,17 @@ static void blkif_recover(struct blkfron
    9.49  
    9.50  	(void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
    9.51  
    9.52 +	spin_lock_irq(&blkif_io_lock);
    9.53 +
    9.54  	/* Now safe for us to use the shared ring */
    9.55 -	spin_lock_irq(&blkif_io_lock);
    9.56  	info->connected = BLKIF_STATE_CONNECTED;
    9.57 -	spin_unlock_irq(&blkif_io_lock);
    9.58  
    9.59  	/* Send off requeued requests */
    9.60  	flush_requests(info);
    9.61  
    9.62  	/* Kick any other new requests queued since we resumed */
    9.63 -	spin_lock_irq(&blkif_io_lock);
    9.64  	kick_pending_request_queues(info);
    9.65 +
    9.66  	spin_unlock_irq(&blkif_io_lock);
    9.67  }
    9.68  
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 15 10:02:53 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Thu Jun 15 10:23:57 2006 -0600
    10.3 @@ -334,6 +334,21 @@ out:
    10.4  }
    10.5  EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
    10.6  
    10.7 +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
    10.8 +{
    10.9 +	struct gnttab_free_callback **pcb;
   10.10 +	unsigned long flags;
   10.11 +
   10.12 +	spin_lock_irqsave(&gnttab_list_lock, flags);
   10.13 +	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
   10.14 +		if (*pcb == callback) {
   10.15 +			*pcb = callback->next;
   10.16 +			break;
   10.17 +		}
   10.18 +	}
   10.19 +	spin_unlock_irqrestore(&gnttab_list_lock, flags);
   10.20 +}
   10.21 +
   10.22  #ifndef __ia64__
   10.23  static int map_pte_fn(pte_t *pte, struct page *pmd_page,
   10.24  		      unsigned long addr, void *data)
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c	Thu Jun 15 10:02:53 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c	Thu Jun 15 10:23:57 2006 -0600
    11.3 @@ -121,8 +121,15 @@ static int __init skbuff_init(void)
    11.4  	for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
    11.5  		size = PAGE_SIZE << order;
    11.6  		sprintf(name[order], "xen-skb-%lu", size);
    11.7 -		skbuff_order_cachep[order] = kmem_cache_create(
    11.8 -			name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
    11.9 +		if (is_running_on_xen() &&
   11.10 +		    (xen_start_info->flags & SIF_PRIVILEGED))
   11.11 +			skbuff_order_cachep[order] = kmem_cache_create(
   11.12 +				name[order], size, size, 0,
   11.13 +				skbuff_ctor, skbuff_dtor);
   11.14 +		else
   11.15 +			skbuff_order_cachep[order] = kmem_cache_create(
   11.16 +				name[order], size, size, 0, NULL, NULL);
   11.17 +			
   11.18  	}
   11.19  
   11.20  	skbuff_cachep = skbuff_order_cachep[0];
    12.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu Jun 15 10:02:53 2006 -0600
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu Jun 15 10:23:57 2006 -0600
    12.3 @@ -1072,68 +1072,39 @@ static void xennet_set_features(struct n
    12.4  
    12.5  static void network_connect(struct net_device *dev)
    12.6  {
    12.7 -	struct netfront_info *np;
    12.8 +	struct netfront_info *np = netdev_priv(dev);
    12.9  	int i, requeue_idx;
   12.10 -	struct netif_tx_request *tx;
   12.11  	struct sk_buff *skb;
   12.12  
   12.13  	xennet_set_features(dev);
   12.14  
   12.15 -	np = netdev_priv(dev);
   12.16  	spin_lock_irq(&np->tx_lock);
   12.17  	spin_lock(&np->rx_lock);
   12.18  
   12.19 -	/* Recovery procedure: */
   12.20 -
   12.21  	/*
   12.22 -	 * Step 1: Rebuild the RX and TX ring contents.
   12.23 -	 * NB. We could just free the queued TX packets now but we hope
   12.24 -	 * that sending them out might do some good.  We have to rebuild
   12.25 -	 * the RX ring because some of our pages are currently flipped out
   12.26 -	 * so we can't just free the RX skbs.
   12.27 -	 * NB2. Freelist index entries are always going to be less than
   12.28 +         * Recovery procedure:
   12.29 +	 *  NB. Freelist index entries are always going to be less than
   12.30  	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
   12.31 -	 * greater than PAGE_OFFSET: we use this property to distinguish
   12.32 -	 * them.
   12.33 -	 */
   12.34 +	 *  greater than PAGE_OFFSET: we use this property to distinguish
   12.35 +	 *  them.
   12.36 +         */
   12.37  
   12.38 -	/*
   12.39 -	 * Rebuild the TX buffer freelist and the TX ring itself.
   12.40 -	 * NB. This reorders packets.  We could keep more private state
   12.41 -	 * to avoid this but maybe it doesn't matter so much given the
   12.42 -	 * interface has been down.
   12.43 -	 */
   12.44 +	/* Step 1: Discard all pending TX packet fragments. */
   12.45  	for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
   12.46  		if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
   12.47  			continue;
   12.48  
   12.49  		skb = np->tx_skbs[i];
   12.50 -
   12.51 -		tx = RING_GET_REQUEST(&np->tx, requeue_idx);
   12.52 -		requeue_idx++;
   12.53 -
   12.54 -		tx->id = i;
   12.55 -		gnttab_grant_foreign_access_ref(
   12.56 -			np->grant_tx_ref[i], np->xbdev->otherend_id,
   12.57 -			virt_to_mfn(np->tx_skbs[i]->data),
   12.58 -			GNTMAP_readonly);
   12.59 -		tx->gref = np->grant_tx_ref[i];
   12.60 -		tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
   12.61 -		tx->size = skb->len;
   12.62 -		tx->flags = 0;
   12.63 -		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
   12.64 -			tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
   12.65 -		if (skb->proto_data_valid) /* remote but checksummed? */
   12.66 -			tx->flags |= NETTXF_data_validated;
   12.67 -
   12.68 -		np->stats.tx_bytes += skb->len;
   12.69 -		np->stats.tx_packets++;
   12.70 +		gnttab_end_foreign_access_ref(
   12.71 +			np->grant_tx_ref[i], GNTMAP_readonly);
   12.72 +		gnttab_release_grant_reference(
   12.73 +			&np->gref_tx_head, np->grant_tx_ref[i]);
   12.74 +		np->grant_tx_ref[i] = GRANT_INVALID_REF;
   12.75 +		add_id_to_freelist(np->tx_skbs, i);
   12.76 +		dev_kfree_skb_irq(skb);
   12.77  	}
   12.78  
   12.79 -	np->tx.req_prod_pvt = requeue_idx;
   12.80 -	RING_PUSH_REQUESTS(&np->tx);
   12.81 -
   12.82 -	/* Rebuild the RX buffer freelist and the RX ring itself. */
   12.83 +	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
   12.84  	for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
   12.85  		if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
   12.86  			continue;
   12.87 @@ -1150,7 +1121,7 @@ static void network_connect(struct net_d
   12.88  	RING_PUSH_REQUESTS(&np->rx);
   12.89  
   12.90  	/*
   12.91 -	 * Step 2: All public and private state should now be sane.  Get
   12.92 +	 * Step 3: All public and private state should now be sane.  Get
   12.93  	 * ready to start sending and receiving packets and give the driver
   12.94  	 * domain a kick because we've probably just requeued some
   12.95  	 * packets.
   12.96 @@ -1158,6 +1129,7 @@ static void network_connect(struct net_d
   12.97  	netif_carrier_on(dev);
   12.98  	notify_remote_via_irq(np->irq);
   12.99  	network_tx_buf_gc(dev);
  12.100 +	network_alloc_rx_buffers(dev);
  12.101  
  12.102  	spin_unlock(&np->rx_lock);
  12.103  	spin_unlock_irq(&np->tx_lock);
    13.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h	Thu Jun 15 10:02:53 2006 -0600
    13.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu.h	Thu Jun 15 10:23:57 2006 -0600
    13.3 @@ -18,4 +18,8 @@ typedef struct {
    13.4  extern void _arch_exit_mmap(struct mm_struct *mm);
    13.5  #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
    13.6  
    13.7 +/* kernel/fork.c:dup_mmap hook */
    13.8 +extern void _arch_dup_mmap(struct mm_struct *mm);
    13.9 +#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
   13.10 +
   13.11  #endif
    14.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h	Thu Jun 15 10:02:53 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/mmu_context.h	Thu Jun 15 10:23:57 2006 -0600
    14.3 @@ -51,8 +51,7 @@ static inline void switch_mm(struct mm_s
    14.4  	struct mmuext_op _op[2], *op = _op;
    14.5  
    14.6  	if (likely(prev != next)) {
    14.7 -		if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
    14.8 -			mm_pin(next);
    14.9 +		BUG_ON(!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags));
   14.10  
   14.11  		/* stop flush ipis for the previous mm */
   14.12  		cpu_clear(cpu, prev->cpu_vm_mask);
   14.13 @@ -99,7 +98,11 @@ static inline void switch_mm(struct mm_s
   14.14  #define deactivate_mm(tsk, mm) \
   14.15  	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
   14.16  
   14.17 -#define activate_mm(prev, next) \
   14.18 -	switch_mm((prev),(next),NULL)
   14.19 +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
   14.20 +{
   14.21 +	if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
   14.22 +		mm_pin(next);
   14.23 +	switch_mm(prev, next, NULL);
   14.24 +}
   14.25  
   14.26  #endif
    15.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Thu Jun 15 10:02:53 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Thu Jun 15 10:23:57 2006 -0600
    15.3 @@ -25,9 +25,9 @@ static char * __init machine_specific_me
    15.4  	if ( rc == -ENOSYS ) {
    15.5  		memmap.nr_entries = 1;
    15.6  		map[0].addr = 0ULL;
    15.7 -		map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
    15.8 +		map[0].size = PFN_PHYS(xen_start_info->nr_pages);
    15.9  		/* 8MB slack (to balance backend allocations). */
   15.10 -		map[0].size += 8 << 20;
   15.11 +		map[0].size += 8ULL << 20;
   15.12  		map[0].type = E820_RAM;
   15.13  		rc = 0;
   15.14  	}
    16.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h	Thu Jun 15 10:02:53 2006 -0600
    16.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu.h	Thu Jun 15 10:23:57 2006 -0600
    16.3 @@ -28,6 +28,10 @@ extern spinlock_t mm_unpinned_lock;
    16.4  /* mm/memory.c:exit_mmap hook */
    16.5  extern void _arch_exit_mmap(struct mm_struct *mm);
    16.6  #define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
    16.7 +
    16.8 +/* kernel/fork.c:dup_mmap hook */
    16.9 +extern void _arch_dup_mmap(struct mm_struct *mm);
   16.10 +#define arch_dup_mmap(mm, oldmm) ((void)(oldmm), _arch_dup_mmap(mm))
   16.11  #endif
   16.12  
   16.13  #endif
    17.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h	Thu Jun 15 10:02:53 2006 -0600
    17.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/mmu_context.h	Thu Jun 15 10:23:57 2006 -0600
    17.3 @@ -73,8 +73,7 @@ static inline void switch_mm(struct mm_s
    17.4  	struct mmuext_op _op[3], *op = _op;
    17.5  
    17.6  	if (likely(prev != next)) {
    17.7 -		if (!next->context.pinned)
    17.8 -			mm_pin(next);
    17.9 +		BUG_ON(!next->context.pinned);
   17.10  
   17.11  		/* stop flush ipis for the previous mm */
   17.12  		clear_bit(cpu, &prev->cpu_vm_mask);
   17.13 @@ -127,8 +126,11 @@ static inline void switch_mm(struct mm_s
   17.14  	asm volatile("movl %0,%%fs"::"r"(0));  \
   17.15  } while(0)
   17.16  
   17.17 -#define activate_mm(prev, next) do {		\
   17.18 -	switch_mm((prev),(next),NULL);		\
   17.19 -} while (0)
   17.20 +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
   17.21 +{
   17.22 +	if (!next->context.pinned)
   17.23 +		mm_pin(next);
   17.24 +	switch_mm(prev, next, NULL);
   17.25 +}
   17.26  
   17.27  #endif
    18.1 --- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Thu Jun 15 10:02:53 2006 -0600
    18.2 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Thu Jun 15 10:23:57 2006 -0600
    18.3 @@ -100,6 +100,7 @@ void gnttab_release_grant_reference(gran
    18.4  
    18.5  void gnttab_request_free_callback(struct gnttab_free_callback *callback,
    18.6  				  void (*fn)(void *), void *arg, u16 count);
    18.7 +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
    18.8  
    18.9  void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
   18.10  				     unsigned long frame, int readonly);
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/linux-2.6-xen-sparse/kernel/fork.c	Thu Jun 15 10:23:57 2006 -0600
    19.3 @@ -0,0 +1,1619 @@
    19.4 +/*
    19.5 + *  linux/kernel/fork.c
    19.6 + *
    19.7 + *  Copyright (C) 1991, 1992  Linus Torvalds
    19.8 + */
    19.9 +
   19.10 +/*
   19.11 + *  'fork.c' contains the help-routines for the 'fork' system call
   19.12 + * (see also entry.S and others).
   19.13 + * Fork is rather simple, once you get the hang of it, but the memory
   19.14 + * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   19.15 + */
   19.16 +
   19.17 +#include <linux/config.h>
   19.18 +#include <linux/slab.h>
   19.19 +#include <linux/init.h>
   19.20 +#include <linux/unistd.h>
   19.21 +#include <linux/smp_lock.h>
   19.22 +#include <linux/module.h>
   19.23 +#include <linux/vmalloc.h>
   19.24 +#include <linux/completion.h>
   19.25 +#include <linux/namespace.h>
   19.26 +#include <linux/personality.h>
   19.27 +#include <linux/mempolicy.h>
   19.28 +#include <linux/sem.h>
   19.29 +#include <linux/file.h>
   19.30 +#include <linux/key.h>
   19.31 +#include <linux/binfmts.h>
   19.32 +#include <linux/mman.h>
   19.33 +#include <linux/fs.h>
   19.34 +#include <linux/capability.h>
   19.35 +#include <linux/cpu.h>
   19.36 +#include <linux/cpuset.h>
   19.37 +#include <linux/security.h>
   19.38 +#include <linux/swap.h>
   19.39 +#include <linux/syscalls.h>
   19.40 +#include <linux/jiffies.h>
   19.41 +#include <linux/futex.h>
   19.42 +#include <linux/rcupdate.h>
   19.43 +#include <linux/ptrace.h>
   19.44 +#include <linux/mount.h>
   19.45 +#include <linux/audit.h>
   19.46 +#include <linux/profile.h>
   19.47 +#include <linux/rmap.h>
   19.48 +#include <linux/acct.h>
   19.49 +#include <linux/cn_proc.h>
   19.50 +
   19.51 +#include <asm/pgtable.h>
   19.52 +#include <asm/pgalloc.h>
   19.53 +#include <asm/uaccess.h>
   19.54 +#include <asm/mmu_context.h>
   19.55 +#include <asm/cacheflush.h>
   19.56 +#include <asm/tlbflush.h>
   19.57 +
   19.58 +/*
   19.59 + * Protected counters by write_lock_irq(&tasklist_lock)
   19.60 + */
   19.61 +unsigned long total_forks;	/* Handle normal Linux uptimes. */
   19.62 +int nr_threads; 		/* The idle threads do not count.. */
   19.63 +
   19.64 +int max_threads;		/* tunable limit on nr_threads */
   19.65 +
   19.66 +DEFINE_PER_CPU(unsigned long, process_counts) = 0;
   19.67 +
   19.68 + __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
   19.69 +
   19.70 +EXPORT_SYMBOL(tasklist_lock);
   19.71 +
   19.72 +int nr_processes(void)
   19.73 +{
   19.74 +	int cpu;
   19.75 +	int total = 0;
   19.76 +
   19.77 +	for_each_online_cpu(cpu)
   19.78 +		total += per_cpu(process_counts, cpu);
   19.79 +
   19.80 +	return total;
   19.81 +}
   19.82 +
   19.83 +#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
   19.84 +# define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
   19.85 +# define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
   19.86 +static kmem_cache_t *task_struct_cachep;
   19.87 +#endif
   19.88 +
   19.89 +/* SLAB cache for signal_struct structures (tsk->signal) */
   19.90 +kmem_cache_t *signal_cachep;
   19.91 +
   19.92 +/* SLAB cache for sighand_struct structures (tsk->sighand) */
   19.93 +kmem_cache_t *sighand_cachep;
   19.94 +
   19.95 +/* SLAB cache for files_struct structures (tsk->files) */
   19.96 +kmem_cache_t *files_cachep;
   19.97 +
   19.98 +/* SLAB cache for fs_struct structures (tsk->fs) */
   19.99 +kmem_cache_t *fs_cachep;
  19.100 +
  19.101 +/* SLAB cache for vm_area_struct structures */
  19.102 +kmem_cache_t *vm_area_cachep;
  19.103 +
  19.104 +/* SLAB cache for mm_struct structures (tsk->mm) */
  19.105 +static kmem_cache_t *mm_cachep;
  19.106 +
  19.107 +void free_task(struct task_struct *tsk)
  19.108 +{
  19.109 +	free_thread_info(tsk->thread_info);
  19.110 +	free_task_struct(tsk);
  19.111 +}
  19.112 +EXPORT_SYMBOL(free_task);
  19.113 +
  19.114 +void __put_task_struct_cb(struct rcu_head *rhp)
  19.115 +{
  19.116 +	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
  19.117 +
  19.118 +	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
  19.119 +	WARN_ON(atomic_read(&tsk->usage));
  19.120 +	WARN_ON(tsk == current);
  19.121 +
  19.122 +	if (unlikely(tsk->audit_context))
  19.123 +		audit_free(tsk);
  19.124 +	security_task_free(tsk);
  19.125 +	free_uid(tsk->user);
  19.126 +	put_group_info(tsk->group_info);
  19.127 +
  19.128 +	if (!profile_handoff_task(tsk))
  19.129 +		free_task(tsk);
  19.130 +}
  19.131 +
  19.132 +void __init fork_init(unsigned long mempages)
  19.133 +{
  19.134 +#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
  19.135 +#ifndef ARCH_MIN_TASKALIGN
  19.136 +#define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
  19.137 +#endif
  19.138 +	/* create a slab on which task_structs can be allocated */
  19.139 +	task_struct_cachep =
  19.140 +		kmem_cache_create("task_struct", sizeof(struct task_struct),
  19.141 +			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
  19.142 +#endif
  19.143 +
  19.144 +	/*
  19.145 +	 * The default maximum number of threads is set to a safe
  19.146 +	 * value: the thread structures can take up at most half
  19.147 +	 * of memory.
  19.148 +	 */
  19.149 +	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
  19.150 +
  19.151 +	/*
  19.152 +	 * we need to allow at least 20 threads to boot a system
  19.153 +	 */
  19.154 +	if(max_threads < 20)
  19.155 +		max_threads = 20;
  19.156 +
  19.157 +	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  19.158 +	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  19.159 +	init_task.signal->rlim[RLIMIT_SIGPENDING] =
  19.160 +		init_task.signal->rlim[RLIMIT_NPROC];
  19.161 +}
  19.162 +
  19.163 +static struct task_struct *dup_task_struct(struct task_struct *orig)
  19.164 +{
  19.165 +	struct task_struct *tsk;
  19.166 +	struct thread_info *ti;
  19.167 +
  19.168 +	prepare_to_copy(orig);
  19.169 +
  19.170 +	tsk = alloc_task_struct();
  19.171 +	if (!tsk)
  19.172 +		return NULL;
  19.173 +
  19.174 +	ti = alloc_thread_info(tsk);
  19.175 +	if (!ti) {
  19.176 +		free_task_struct(tsk);
  19.177 +		return NULL;
  19.178 +	}
  19.179 +
  19.180 +	*tsk = *orig;
  19.181 +	tsk->thread_info = ti;
  19.182 +	setup_thread_stack(tsk, orig);
  19.183 +
  19.184 +	/* One for us, one for whoever does the "release_task()" (usually parent) */
  19.185 +	atomic_set(&tsk->usage,2);
  19.186 +	atomic_set(&tsk->fs_excl, 0);
  19.187 +	return tsk;
  19.188 +}
  19.189 +
  19.190 +#ifdef CONFIG_MMU
  19.191 +static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  19.192 +{
  19.193 +	struct vm_area_struct *mpnt, *tmp, **pprev;
  19.194 +	struct rb_node **rb_link, *rb_parent;
  19.195 +	int retval;
  19.196 +	unsigned long charge;
  19.197 +	struct mempolicy *pol;
  19.198 +
  19.199 +	down_write(&oldmm->mmap_sem);
  19.200 +	flush_cache_mm(oldmm);
  19.201 +	down_write(&mm->mmap_sem);
  19.202 +
  19.203 +	mm->locked_vm = 0;
  19.204 +	mm->mmap = NULL;
  19.205 +	mm->mmap_cache = NULL;
  19.206 +	mm->free_area_cache = oldmm->mmap_base;
  19.207 +	mm->cached_hole_size = ~0UL;
  19.208 +	mm->map_count = 0;
  19.209 +	cpus_clear(mm->cpu_vm_mask);
  19.210 +	mm->mm_rb = RB_ROOT;
  19.211 +	rb_link = &mm->mm_rb.rb_node;
  19.212 +	rb_parent = NULL;
  19.213 +	pprev = &mm->mmap;
  19.214 +
  19.215 +	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
  19.216 +		struct file *file;
  19.217 +
  19.218 +		if (mpnt->vm_flags & VM_DONTCOPY) {
  19.219 +			long pages = vma_pages(mpnt);
  19.220 +			mm->total_vm -= pages;
  19.221 +			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
  19.222 +								-pages);
  19.223 +			continue;
  19.224 +		}
  19.225 +		charge = 0;
  19.226 +		if (mpnt->vm_flags & VM_ACCOUNT) {
  19.227 +			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
  19.228 +			if (security_vm_enough_memory(len))
  19.229 +				goto fail_nomem;
  19.230 +			charge = len;
  19.231 +		}
  19.232 +		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  19.233 +		if (!tmp)
  19.234 +			goto fail_nomem;
  19.235 +		*tmp = *mpnt;
  19.236 +		pol = mpol_copy(vma_policy(mpnt));
  19.237 +		retval = PTR_ERR(pol);
  19.238 +		if (IS_ERR(pol))
  19.239 +			goto fail_nomem_policy;
  19.240 +		vma_set_policy(tmp, pol);
  19.241 +		tmp->vm_flags &= ~VM_LOCKED;
  19.242 +		tmp->vm_mm = mm;
  19.243 +		tmp->vm_next = NULL;
  19.244 +		anon_vma_link(tmp);
  19.245 +		file = tmp->vm_file;
  19.246 +		if (file) {
  19.247 +			struct inode *inode = file->f_dentry->d_inode;
  19.248 +			get_file(file);
  19.249 +			if (tmp->vm_flags & VM_DENYWRITE)
  19.250 +				atomic_dec(&inode->i_writecount);
  19.251 +      
  19.252 +			/* insert tmp into the share list, just after mpnt */
  19.253 +			spin_lock(&file->f_mapping->i_mmap_lock);
  19.254 +			tmp->vm_truncate_count = mpnt->vm_truncate_count;
  19.255 +			flush_dcache_mmap_lock(file->f_mapping);
  19.256 +			vma_prio_tree_add(tmp, mpnt);
  19.257 +			flush_dcache_mmap_unlock(file->f_mapping);
  19.258 +			spin_unlock(&file->f_mapping->i_mmap_lock);
  19.259 +		}
  19.260 +
  19.261 +		/*
  19.262 +		 * Link in the new vma and copy the page table entries.
  19.263 +		 */
  19.264 +		*pprev = tmp;
  19.265 +		pprev = &tmp->vm_next;
  19.266 +
  19.267 +		__vma_link_rb(mm, tmp, rb_link, rb_parent);
  19.268 +		rb_link = &tmp->vm_rb.rb_right;
  19.269 +		rb_parent = &tmp->vm_rb;
  19.270 +
  19.271 +		mm->map_count++;
  19.272 +		retval = copy_page_range(mm, oldmm, mpnt);
  19.273 +
  19.274 +		if (tmp->vm_ops && tmp->vm_ops->open)
  19.275 +			tmp->vm_ops->open(tmp);
  19.276 +
  19.277 +		if (retval)
  19.278 +			goto out;
  19.279 +	}
  19.280 +#ifdef arch_dup_mmap
  19.281 +	arch_dup_mmap(mm, oldmm);
  19.282 +#endif
  19.283 +	retval = 0;
  19.284 +out:
  19.285 +	up_write(&mm->mmap_sem);
  19.286 +	flush_tlb_mm(oldmm);
  19.287 +	up_write(&oldmm->mmap_sem);
  19.288 +	return retval;
  19.289 +fail_nomem_policy:
  19.290 +	kmem_cache_free(vm_area_cachep, tmp);
  19.291 +fail_nomem:
  19.292 +	retval = -ENOMEM;
  19.293 +	vm_unacct_memory(charge);
  19.294 +	goto out;
  19.295 +}
  19.296 +
  19.297 +static inline int mm_alloc_pgd(struct mm_struct * mm)
  19.298 +{
  19.299 +	mm->pgd = pgd_alloc(mm);
  19.300 +	if (unlikely(!mm->pgd))
  19.301 +		return -ENOMEM;
  19.302 +	return 0;
  19.303 +}
  19.304 +
  19.305 +static inline void mm_free_pgd(struct mm_struct * mm)
  19.306 +{
  19.307 +	pgd_free(mm->pgd);
  19.308 +}
  19.309 +#else
  19.310 +#define dup_mmap(mm, oldmm)	(0)
  19.311 +#define mm_alloc_pgd(mm)	(0)
  19.312 +#define mm_free_pgd(mm)
  19.313 +#endif /* CONFIG_MMU */
  19.314 +
  19.315 + __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
  19.316 +
  19.317 +#define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
  19.318 +#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
  19.319 +
  19.320 +#include <linux/init_task.h>
  19.321 +
  19.322 +static struct mm_struct * mm_init(struct mm_struct * mm)
  19.323 +{
  19.324 +	atomic_set(&mm->mm_users, 1);
  19.325 +	atomic_set(&mm->mm_count, 1);
  19.326 +	init_rwsem(&mm->mmap_sem);
  19.327 +	INIT_LIST_HEAD(&mm->mmlist);
  19.328 +	mm->core_waiters = 0;
  19.329 +	mm->nr_ptes = 0;
  19.330 +	set_mm_counter(mm, file_rss, 0);
  19.331 +	set_mm_counter(mm, anon_rss, 0);
  19.332 +	spin_lock_init(&mm->page_table_lock);
  19.333 +	rwlock_init(&mm->ioctx_list_lock);
  19.334 +	mm->ioctx_list = NULL;
  19.335 +	mm->free_area_cache = TASK_UNMAPPED_BASE;
  19.336 +	mm->cached_hole_size = ~0UL;
  19.337 +
  19.338 +	if (likely(!mm_alloc_pgd(mm))) {
  19.339 +		mm->def_flags = 0;
  19.340 +		return mm;
  19.341 +	}
  19.342 +	free_mm(mm);
  19.343 +	return NULL;
  19.344 +}
  19.345 +
  19.346 +/*
  19.347 + * Allocate and initialize an mm_struct.
  19.348 + */
  19.349 +struct mm_struct * mm_alloc(void)
  19.350 +{
  19.351 +	struct mm_struct * mm;
  19.352 +
  19.353 +	mm = allocate_mm();
  19.354 +	if (mm) {
  19.355 +		memset(mm, 0, sizeof(*mm));
  19.356 +		mm = mm_init(mm);
  19.357 +	}
  19.358 +	return mm;
  19.359 +}
  19.360 +
  19.361 +/*
  19.362 + * Called when the last reference to the mm
  19.363 + * is dropped: either by a lazy thread or by
  19.364 + * mmput. Free the page directory and the mm.
  19.365 + */
  19.366 +void fastcall __mmdrop(struct mm_struct *mm)
  19.367 +{
  19.368 +	BUG_ON(mm == &init_mm);
  19.369 +	mm_free_pgd(mm);
  19.370 +	destroy_context(mm);
  19.371 +	free_mm(mm);
  19.372 +}
  19.373 +
  19.374 +/*
  19.375 + * Decrement the use count and release all resources for an mm.
  19.376 + */
  19.377 +void mmput(struct mm_struct *mm)
  19.378 +{
  19.379 +	if (atomic_dec_and_test(&mm->mm_users)) {
  19.380 +		exit_aio(mm);
  19.381 +		exit_mmap(mm);
  19.382 +		if (!list_empty(&mm->mmlist)) {
  19.383 +			spin_lock(&mmlist_lock);
  19.384 +			list_del(&mm->mmlist);
  19.385 +			spin_unlock(&mmlist_lock);
  19.386 +		}
  19.387 +		put_swap_token(mm);
  19.388 +		mmdrop(mm);
  19.389 +	}
  19.390 +}
  19.391 +EXPORT_SYMBOL_GPL(mmput);
  19.392 +
  19.393 +/**
  19.394 + * get_task_mm - acquire a reference to the task's mm
  19.395 + *
  19.396 + * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
  19.397 + * this kernel workthread has transiently adopted a user mm with use_mm,
  19.398 + * to do its AIO) is not set and if so returns a reference to it, after
  19.399 + * bumping up the use count.  User must release the mm via mmput()
  19.400 + * after use.  Typically used by /proc and ptrace.
  19.401 + */
  19.402 +struct mm_struct *get_task_mm(struct task_struct *task)
  19.403 +{
  19.404 +	struct mm_struct *mm;
  19.405 +
  19.406 +	task_lock(task);
  19.407 +	mm = task->mm;
  19.408 +	if (mm) {
  19.409 +		if (task->flags & PF_BORROWED_MM)
  19.410 +			mm = NULL;
  19.411 +		else
  19.412 +			atomic_inc(&mm->mm_users);
  19.413 +	}
  19.414 +	task_unlock(task);
  19.415 +	return mm;
  19.416 +}
  19.417 +EXPORT_SYMBOL_GPL(get_task_mm);
  19.418 +
  19.419 +/* Please note the differences between mmput and mm_release.
  19.420 + * mmput is called whenever we stop holding onto a mm_struct,
  19.421 + * error success whatever.
  19.422 + *
  19.423 + * mm_release is called after a mm_struct has been removed
  19.424 + * from the current process.
  19.425 + *
  19.426 + * This difference is important for error handling, when we
  19.427 + * only half set up a mm_struct for a new process and need to restore
  19.428 + * the old one.  Because we mmput the new mm_struct before
  19.429 + * restoring the old one. . .
  19.430 + * Eric Biederman 10 January 1998
  19.431 + */
  19.432 +void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  19.433 +{
  19.434 +	struct completion *vfork_done = tsk->vfork_done;
  19.435 +
  19.436 +	/* Get rid of any cached register state */
  19.437 +	deactivate_mm(tsk, mm);
  19.438 +
  19.439 +	/* notify parent sleeping on vfork() */
  19.440 +	if (vfork_done) {
  19.441 +		tsk->vfork_done = NULL;
  19.442 +		complete(vfork_done);
  19.443 +	}
  19.444 +	if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
  19.445 +		u32 __user * tidptr = tsk->clear_child_tid;
  19.446 +		tsk->clear_child_tid = NULL;
  19.447 +
  19.448 +		/*
  19.449 +		 * We don't check the error code - if userspace has
  19.450 +		 * not set up a proper pointer then tough luck.
  19.451 +		 */
  19.452 +		put_user(0, tidptr);
  19.453 +		sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
  19.454 +	}
  19.455 +}
  19.456 +
  19.457 +/*
  19.458 + * Allocate a new mm structure and copy contents from the
  19.459 + * mm structure of the passed in task structure.
  19.460 + */
  19.461 +static struct mm_struct *dup_mm(struct task_struct *tsk)
  19.462 +{
  19.463 +	struct mm_struct *mm, *oldmm = current->mm;
  19.464 +	int err;
  19.465 +
  19.466 +	if (!oldmm)
  19.467 +		return NULL;
  19.468 +
  19.469 +	mm = allocate_mm();
  19.470 +	if (!mm)
  19.471 +		goto fail_nomem;
  19.472 +
  19.473 +	memcpy(mm, oldmm, sizeof(*mm));
  19.474 +
  19.475 +	if (!mm_init(mm))
  19.476 +		goto fail_nomem;
  19.477 +
  19.478 +	if (init_new_context(tsk, mm))
  19.479 +		goto fail_nocontext;
  19.480 +
  19.481 +	err = dup_mmap(mm, oldmm);
  19.482 +	if (err)
  19.483 +		goto free_pt;
  19.484 +
  19.485 +	mm->hiwater_rss = get_mm_rss(mm);
  19.486 +	mm->hiwater_vm = mm->total_vm;
  19.487 +
  19.488 +	return mm;
  19.489 +
  19.490 +free_pt:
  19.491 +	mmput(mm);
  19.492 +
  19.493 +fail_nomem:
  19.494 +	return NULL;
  19.495 +
  19.496 +fail_nocontext:
  19.497 +	/*
  19.498 +	 * If init_new_context() failed, we cannot use mmput() to free the mm
  19.499 +	 * because it calls destroy_context()
  19.500 +	 */
  19.501 +	mm_free_pgd(mm);
  19.502 +	free_mm(mm);
  19.503 +	return NULL;
  19.504 +}
  19.505 +
  19.506 +static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
  19.507 +{
  19.508 +	struct mm_struct * mm, *oldmm;
  19.509 +	int retval;
  19.510 +
  19.511 +	tsk->min_flt = tsk->maj_flt = 0;
  19.512 +	tsk->nvcsw = tsk->nivcsw = 0;
  19.513 +
  19.514 +	tsk->mm = NULL;
  19.515 +	tsk->active_mm = NULL;
  19.516 +
  19.517 +	/*
  19.518 +	 * Are we cloning a kernel thread?
  19.519 +	 *
  19.520 +	 * We need to steal a active VM for that..
  19.521 +	 */
  19.522 +	oldmm = current->mm;
  19.523 +	if (!oldmm)
  19.524 +		return 0;
  19.525 +
  19.526 +	if (clone_flags & CLONE_VM) {
  19.527 +		atomic_inc(&oldmm->mm_users);
  19.528 +		mm = oldmm;
  19.529 +		goto good_mm;
  19.530 +	}
  19.531 +
  19.532 +	retval = -ENOMEM;
  19.533 +	mm = dup_mm(tsk);
  19.534 +	if (!mm)
  19.535 +		goto fail_nomem;
  19.536 +
  19.537 +good_mm:
  19.538 +	tsk->mm = mm;
  19.539 +	tsk->active_mm = mm;
  19.540 +	return 0;
  19.541 +
  19.542 +fail_nomem:
  19.543 +	return retval;
  19.544 +}
  19.545 +
  19.546 +static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
  19.547 +{
  19.548 +	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
  19.549 +	/* We don't need to lock fs - think why ;-) */
  19.550 +	if (fs) {
  19.551 +		atomic_set(&fs->count, 1);
  19.552 +		rwlock_init(&fs->lock);
  19.553 +		fs->umask = old->umask;
  19.554 +		read_lock(&old->lock);
  19.555 +		fs->rootmnt = mntget(old->rootmnt);
  19.556 +		fs->root = dget(old->root);
  19.557 +		fs->pwdmnt = mntget(old->pwdmnt);
  19.558 +		fs->pwd = dget(old->pwd);
  19.559 +		if (old->altroot) {
  19.560 +			fs->altrootmnt = mntget(old->altrootmnt);
  19.561 +			fs->altroot = dget(old->altroot);
  19.562 +		} else {
  19.563 +			fs->altrootmnt = NULL;
  19.564 +			fs->altroot = NULL;
  19.565 +		}
  19.566 +		read_unlock(&old->lock);
  19.567 +	}
  19.568 +	return fs;
  19.569 +}
  19.570 +
  19.571 +struct fs_struct *copy_fs_struct(struct fs_struct *old)
  19.572 +{
  19.573 +	return __copy_fs_struct(old);
  19.574 +}
  19.575 +
  19.576 +EXPORT_SYMBOL_GPL(copy_fs_struct);
  19.577 +
  19.578 +static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
  19.579 +{
  19.580 +	if (clone_flags & CLONE_FS) {
  19.581 +		atomic_inc(&current->fs->count);
  19.582 +		return 0;
  19.583 +	}
  19.584 +	tsk->fs = __copy_fs_struct(current->fs);
  19.585 +	if (!tsk->fs)
  19.586 +		return -ENOMEM;
  19.587 +	return 0;
  19.588 +}
  19.589 +
  19.590 +static int count_open_files(struct fdtable *fdt)
  19.591 +{
  19.592 +	int size = fdt->max_fdset;
  19.593 +	int i;
  19.594 +
  19.595 +	/* Find the last open fd */
  19.596 +	for (i = size/(8*sizeof(long)); i > 0; ) {
  19.597 +		if (fdt->open_fds->fds_bits[--i])
  19.598 +			break;
  19.599 +	}
  19.600 +	i = (i+1) * 8 * sizeof(long);
  19.601 +	return i;
  19.602 +}
  19.603 +
  19.604 +static struct files_struct *alloc_files(void)
  19.605 +{
  19.606 +	struct files_struct *newf;
  19.607 +	struct fdtable *fdt;
  19.608 +
  19.609 +	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
  19.610 +	if (!newf)
  19.611 +		goto out;
  19.612 +
  19.613 +	atomic_set(&newf->count, 1);
  19.614 +
  19.615 +	spin_lock_init(&newf->file_lock);
  19.616 +	fdt = &newf->fdtab;
  19.617 +	fdt->next_fd = 0;
  19.618 +	fdt->max_fds = NR_OPEN_DEFAULT;
  19.619 +	fdt->max_fdset = __FD_SETSIZE;
  19.620 +	fdt->close_on_exec = &newf->close_on_exec_init;
  19.621 +	fdt->open_fds = &newf->open_fds_init;
  19.622 +	fdt->fd = &newf->fd_array[0];
  19.623 +	INIT_RCU_HEAD(&fdt->rcu);
  19.624 +	fdt->free_files = NULL;
  19.625 +	fdt->next = NULL;
  19.626 +	rcu_assign_pointer(newf->fdt, fdt);
  19.627 +out:
  19.628 +	return newf;
  19.629 +}
  19.630 +
  19.631 +/*
  19.632 + * Allocate a new files structure and copy contents from the
  19.633 + * passed in files structure.
  19.634 + */
  19.635 +static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
  19.636 +{
  19.637 +	struct files_struct *newf;
  19.638 +	struct file **old_fds, **new_fds;
  19.639 +	int open_files, size, i, expand;
  19.640 +	struct fdtable *old_fdt, *new_fdt;
  19.641 +
  19.642 +	newf = alloc_files();
  19.643 +	if (!newf)
  19.644 +		goto out;
  19.645 +
  19.646 +	spin_lock(&oldf->file_lock);
  19.647 +	old_fdt = files_fdtable(oldf);
  19.648 +	new_fdt = files_fdtable(newf);
  19.649 +	size = old_fdt->max_fdset;
  19.650 +	open_files = count_open_files(old_fdt);
  19.651 +	expand = 0;
  19.652 +
  19.653 +	/*
  19.654 +	 * Check whether we need to allocate a larger fd array or fd set.
  19.655 +	 * Note: we're not a clone task, so the open count won't  change.
  19.656 +	 */
  19.657 +	if (open_files > new_fdt->max_fdset) {
  19.658 +		new_fdt->max_fdset = 0;
  19.659 +		expand = 1;
  19.660 +	}
  19.661 +	if (open_files > new_fdt->max_fds) {
  19.662 +		new_fdt->max_fds = 0;
  19.663 +		expand = 1;
  19.664 +	}
  19.665 +
  19.666 +	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
  19.667 +	if (expand) {
  19.668 +		spin_unlock(&oldf->file_lock);
  19.669 +		spin_lock(&newf->file_lock);
  19.670 +		*errorp = expand_files(newf, open_files-1);
  19.671 +		spin_unlock(&newf->file_lock);
  19.672 +		if (*errorp < 0)
  19.673 +			goto out_release;
  19.674 +		new_fdt = files_fdtable(newf);
  19.675 +		/*
  19.676 +		 * Reacquire the oldf lock and a pointer to its fd table
  19.677 +		 * who knows it may have a new bigger fd table. We need
  19.678 +		 * the latest pointer.
  19.679 +		 */
  19.680 +		spin_lock(&oldf->file_lock);
  19.681 +		old_fdt = files_fdtable(oldf);
  19.682 +	}
  19.683 +
  19.684 +	old_fds = old_fdt->fd;
  19.685 +	new_fds = new_fdt->fd;
  19.686 +
  19.687 +	memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
  19.688 +	memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
  19.689 +
  19.690 +	for (i = open_files; i != 0; i--) {
  19.691 +		struct file *f = *old_fds++;
  19.692 +		if (f) {
  19.693 +			get_file(f);
  19.694 +		} else {
  19.695 +			/*
  19.696 +			 * The fd may be claimed in the fd bitmap but not yet
  19.697 +			 * instantiated in the files array if a sibling thread
  19.698 +			 * is partway through open().  So make sure that this
  19.699 +			 * fd is available to the new process.
  19.700 +			 */
  19.701 +			FD_CLR(open_files - i, new_fdt->open_fds);
  19.702 +		}
  19.703 +		rcu_assign_pointer(*new_fds++, f);
  19.704 +	}
  19.705 +	spin_unlock(&oldf->file_lock);
  19.706 +
  19.707 +	/* compute the remainder to be cleared */
  19.708 +	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
  19.709 +
  19.710 +	/* This is long word aligned thus could use a optimized version */ 
  19.711 +	memset(new_fds, 0, size); 
  19.712 +
  19.713 +	if (new_fdt->max_fdset > open_files) {
  19.714 +		int left = (new_fdt->max_fdset-open_files)/8;
  19.715 +		int start = open_files / (8 * sizeof(unsigned long));
  19.716 +
  19.717 +		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
  19.718 +		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
  19.719 +	}
  19.720 +
  19.721 +out:
  19.722 +	return newf;
  19.723 +
  19.724 +out_release:
  19.725 +	free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
  19.726 +	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
  19.727 +	free_fd_array(new_fdt->fd, new_fdt->max_fds);
  19.728 +	kmem_cache_free(files_cachep, newf);
  19.729 +	return NULL;
  19.730 +}
  19.731 +
  19.732 +static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
  19.733 +{
  19.734 +	struct files_struct *oldf, *newf;
  19.735 +	int error = 0;
  19.736 +
  19.737 +	/*
  19.738 +	 * A background process may not have any files ...
  19.739 +	 */
  19.740 +	oldf = current->files;
  19.741 +	if (!oldf)
  19.742 +		goto out;
  19.743 +
  19.744 +	if (clone_flags & CLONE_FILES) {
  19.745 +		atomic_inc(&oldf->count);
  19.746 +		goto out;
  19.747 +	}
  19.748 +
  19.749 +	/*
  19.750 +	 * Note: we may be using current for both targets (See exec.c)
  19.751 +	 * This works because we cache current->files (old) as oldf. Don't
  19.752 +	 * break this.
  19.753 +	 */
  19.754 +	tsk->files = NULL;
  19.755 +	error = -ENOMEM;
  19.756 +	newf = dup_fd(oldf, &error);
  19.757 +	if (!newf)
  19.758 +		goto out;
  19.759 +
  19.760 +	tsk->files = newf;
  19.761 +	error = 0;
  19.762 +out:
  19.763 +	return error;
  19.764 +}
  19.765 +
  19.766 +/*
  19.767 + *	Helper to unshare the files of the current task.
  19.768 + *	We don't want to expose copy_files internals to
  19.769 + *	the exec layer of the kernel.
  19.770 + */
  19.771 +
  19.772 +int unshare_files(void)
  19.773 +{
  19.774 +	struct files_struct *files  = current->files;
  19.775 +	int rc;
  19.776 +
  19.777 +	if(!files)
  19.778 +		BUG();
  19.779 +
  19.780 +	/* This can race but the race causes us to copy when we don't
  19.781 +	   need to and drop the copy */
  19.782 +	if(atomic_read(&files->count) == 1)
  19.783 +	{
  19.784 +		atomic_inc(&files->count);
  19.785 +		return 0;
  19.786 +	}
  19.787 +	rc = copy_files(0, current);
  19.788 +	if(rc)
  19.789 +		current->files = files;
  19.790 +	return rc;
  19.791 +}
  19.792 +
  19.793 +EXPORT_SYMBOL(unshare_files);
  19.794 +
  19.795 +void sighand_free_cb(struct rcu_head *rhp)
  19.796 +{
  19.797 +	struct sighand_struct *sp;
  19.798 +
  19.799 +	sp = container_of(rhp, struct sighand_struct, rcu);
  19.800 +	kmem_cache_free(sighand_cachep, sp);
  19.801 +}
  19.802 +
  19.803 +static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
  19.804 +{
  19.805 +	struct sighand_struct *sig;
  19.806 +
  19.807 +	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
  19.808 +		atomic_inc(&current->sighand->count);
  19.809 +		return 0;
  19.810 +	}
  19.811 +	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
  19.812 +	rcu_assign_pointer(tsk->sighand, sig);
  19.813 +	if (!sig)
  19.814 +		return -ENOMEM;
  19.815 +	spin_lock_init(&sig->siglock);
  19.816 +	atomic_set(&sig->count, 1);
  19.817 +	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  19.818 +	return 0;
  19.819 +}
  19.820 +
  19.821 +static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
  19.822 +{
  19.823 +	struct signal_struct *sig;
  19.824 +	int ret;
  19.825 +
  19.826 +	if (clone_flags & CLONE_THREAD) {
  19.827 +		atomic_inc(&current->signal->count);
  19.828 +		atomic_inc(&current->signal->live);
  19.829 +		return 0;
  19.830 +	}
  19.831 +	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
  19.832 +	tsk->signal = sig;
  19.833 +	if (!sig)
  19.834 +		return -ENOMEM;
  19.835 +
  19.836 +	ret = copy_thread_group_keys(tsk);
  19.837 +	if (ret < 0) {
  19.838 +		kmem_cache_free(signal_cachep, sig);
  19.839 +		return ret;
  19.840 +	}
  19.841 +
  19.842 +	atomic_set(&sig->count, 1);
  19.843 +	atomic_set(&sig->live, 1);
  19.844 +	init_waitqueue_head(&sig->wait_chldexit);
  19.845 +	sig->flags = 0;
  19.846 +	sig->group_exit_code = 0;
  19.847 +	sig->group_exit_task = NULL;
  19.848 +	sig->group_stop_count = 0;
  19.849 +	sig->curr_target = NULL;
  19.850 +	init_sigpending(&sig->shared_pending);
  19.851 +	INIT_LIST_HEAD(&sig->posix_timers);
  19.852 +
  19.853 +	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
  19.854 +	sig->it_real_incr.tv64 = 0;
  19.855 +	sig->real_timer.function = it_real_fn;
  19.856 +	sig->real_timer.data = tsk;
  19.857 +
  19.858 +	sig->it_virt_expires = cputime_zero;
  19.859 +	sig->it_virt_incr = cputime_zero;
  19.860 +	sig->it_prof_expires = cputime_zero;
  19.861 +	sig->it_prof_incr = cputime_zero;
  19.862 +
  19.863 +	sig->leader = 0;	/* session leadership doesn't inherit */
  19.864 +	sig->tty_old_pgrp = 0;
  19.865 +
  19.866 +	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
  19.867 +	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
  19.868 +	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
  19.869 +	sig->sched_time = 0;
  19.870 +	INIT_LIST_HEAD(&sig->cpu_timers[0]);
  19.871 +	INIT_LIST_HEAD(&sig->cpu_timers[1]);
  19.872 +	INIT_LIST_HEAD(&sig->cpu_timers[2]);
  19.873 +
  19.874 +	task_lock(current->group_leader);
  19.875 +	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
  19.876 +	task_unlock(current->group_leader);
  19.877 +
  19.878 +	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
  19.879 +		/*
  19.880 +		 * New sole thread in the process gets an expiry time
  19.881 +		 * of the whole CPU time limit.
  19.882 +		 */
  19.883 +		tsk->it_prof_expires =
  19.884 +			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
  19.885 +	}
  19.886 +
  19.887 +	return 0;
  19.888 +}
  19.889 +
  19.890 +static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
  19.891 +{
  19.892 +	unsigned long new_flags = p->flags;
  19.893 +
  19.894 +	new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
  19.895 +	new_flags |= PF_FORKNOEXEC;
  19.896 +	if (!(clone_flags & CLONE_PTRACE))
  19.897 +		p->ptrace = 0;
  19.898 +	p->flags = new_flags;
  19.899 +}
  19.900 +
  19.901 +asmlinkage long sys_set_tid_address(int __user *tidptr)
  19.902 +{
  19.903 +	current->clear_child_tid = tidptr;
  19.904 +
  19.905 +	return current->pid;
  19.906 +}
  19.907 +
  19.908 +/*
  19.909 + * This creates a new process as a copy of the old one,
  19.910 + * but does not actually start it yet.
  19.911 + *
  19.912 + * It copies the registers, and all the appropriate
  19.913 + * parts of the process environment (as per the clone
  19.914 + * flags). The actual kick-off is left to the caller.
  19.915 + */
  19.916 +static task_t *copy_process(unsigned long clone_flags,
  19.917 +				 unsigned long stack_start,
  19.918 +				 struct pt_regs *regs,
  19.919 +				 unsigned long stack_size,
  19.920 +				 int __user *parent_tidptr,
  19.921 +				 int __user *child_tidptr,
  19.922 +				 int pid)
  19.923 +{
  19.924 +	int retval;
  19.925 +	struct task_struct *p = NULL;
  19.926 +
  19.927 +	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  19.928 +		return ERR_PTR(-EINVAL);
  19.929 +
  19.930 +	/*
  19.931 +	 * Thread groups must share signals as well, and detached threads
  19.932 +	 * can only be started up within the thread group.
  19.933 +	 */
  19.934 +	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
  19.935 +		return ERR_PTR(-EINVAL);
  19.936 +
  19.937 +	/*
  19.938 +	 * Shared signal handlers imply shared VM. By way of the above,
  19.939 +	 * thread groups also imply shared VM. Blocking this case allows
  19.940 +	 * for various simplifications in other code.
  19.941 +	 */
  19.942 +	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
  19.943 +		return ERR_PTR(-EINVAL);
  19.944 +
  19.945 +	retval = security_task_create(clone_flags);
  19.946 +	if (retval)
  19.947 +		goto fork_out;
  19.948 +
  19.949 +	retval = -ENOMEM;
  19.950 +	p = dup_task_struct(current);
  19.951 +	if (!p)
  19.952 +		goto fork_out;
  19.953 +
  19.954 +	retval = -EAGAIN;
  19.955 +	if (atomic_read(&p->user->processes) >=
  19.956 +			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
  19.957 +		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
  19.958 +				p->user != &root_user)
  19.959 +			goto bad_fork_free;
  19.960 +	}
  19.961 +
  19.962 +	atomic_inc(&p->user->__count);
  19.963 +	atomic_inc(&p->user->processes);
  19.964 +	get_group_info(p->group_info);
  19.965 +
  19.966 +	/*
  19.967 +	 * If multiple threads are within copy_process(), then this check
  19.968 +	 * triggers too late. This doesn't hurt, the check is only there
  19.969 +	 * to stop root fork bombs.
  19.970 +	 */
  19.971 +	if (nr_threads >= max_threads)
  19.972 +		goto bad_fork_cleanup_count;
  19.973 +
  19.974 +	if (!try_module_get(task_thread_info(p)->exec_domain->module))
  19.975 +		goto bad_fork_cleanup_count;
  19.976 +
  19.977 +	if (p->binfmt && !try_module_get(p->binfmt->module))
  19.978 +		goto bad_fork_cleanup_put_domain;
  19.979 +
  19.980 +	p->did_exec = 0;
  19.981 +	copy_flags(clone_flags, p);
  19.982 +	p->pid = pid;
  19.983 +	retval = -EFAULT;
  19.984 +	if (clone_flags & CLONE_PARENT_SETTID)
  19.985 +		if (put_user(p->pid, parent_tidptr))
  19.986 +			goto bad_fork_cleanup;
  19.987 +
  19.988 +	p->proc_dentry = NULL;
  19.989 +
  19.990 +	INIT_LIST_HEAD(&p->children);
  19.991 +	INIT_LIST_HEAD(&p->sibling);
  19.992 +	p->vfork_done = NULL;
  19.993 +	spin_lock_init(&p->alloc_lock);
  19.994 +	spin_lock_init(&p->proc_lock);
  19.995 +
  19.996 +	clear_tsk_thread_flag(p, TIF_SIGPENDING);
  19.997 +	init_sigpending(&p->pending);
  19.998 +
  19.999 +	p->utime = cputime_zero;
 19.1000 +	p->stime = cputime_zero;
 19.1001 + 	p->sched_time = 0;
 19.1002 +	p->rchar = 0;		/* I/O counter: bytes read */
 19.1003 +	p->wchar = 0;		/* I/O counter: bytes written */
 19.1004 +	p->syscr = 0;		/* I/O counter: read syscalls */
 19.1005 +	p->syscw = 0;		/* I/O counter: write syscalls */
 19.1006 +	acct_clear_integrals(p);
 19.1007 +
 19.1008 + 	p->it_virt_expires = cputime_zero;
 19.1009 +	p->it_prof_expires = cputime_zero;
 19.1010 + 	p->it_sched_expires = 0;
 19.1011 + 	INIT_LIST_HEAD(&p->cpu_timers[0]);
 19.1012 + 	INIT_LIST_HEAD(&p->cpu_timers[1]);
 19.1013 + 	INIT_LIST_HEAD(&p->cpu_timers[2]);
 19.1014 +
 19.1015 +	p->lock_depth = -1;		/* -1 = no lock */
 19.1016 +	do_posix_clock_monotonic_gettime(&p->start_time);
 19.1017 +	p->security = NULL;
 19.1018 +	p->io_context = NULL;
 19.1019 +	p->io_wait = NULL;
 19.1020 +	p->audit_context = NULL;
 19.1021 +	cpuset_fork(p);
 19.1022 +#ifdef CONFIG_NUMA
 19.1023 + 	p->mempolicy = mpol_copy(p->mempolicy);
 19.1024 + 	if (IS_ERR(p->mempolicy)) {
 19.1025 + 		retval = PTR_ERR(p->mempolicy);
 19.1026 + 		p->mempolicy = NULL;
 19.1027 + 		goto bad_fork_cleanup_cpuset;
 19.1028 + 	}
 19.1029 +#endif
 19.1030 +
 19.1031 +#ifdef CONFIG_DEBUG_MUTEXES
 19.1032 +	p->blocked_on = NULL; /* not blocked yet */
 19.1033 +#endif
 19.1034 +
 19.1035 +	p->tgid = p->pid;
 19.1036 +	if (clone_flags & CLONE_THREAD)
 19.1037 +		p->tgid = current->tgid;
 19.1038 +
 19.1039 +	if ((retval = security_task_alloc(p)))
 19.1040 +		goto bad_fork_cleanup_policy;
 19.1041 +	if ((retval = audit_alloc(p)))
 19.1042 +		goto bad_fork_cleanup_security;
 19.1043 +	/* copy all the process information */
 19.1044 +	if ((retval = copy_semundo(clone_flags, p)))
 19.1045 +		goto bad_fork_cleanup_audit;
 19.1046 +	if ((retval = copy_files(clone_flags, p)))
 19.1047 +		goto bad_fork_cleanup_semundo;
 19.1048 +	if ((retval = copy_fs(clone_flags, p)))
 19.1049 +		goto bad_fork_cleanup_files;
 19.1050 +	if ((retval = copy_sighand(clone_flags, p)))
 19.1051 +		goto bad_fork_cleanup_fs;
 19.1052 +	if ((retval = copy_signal(clone_flags, p)))
 19.1053 +		goto bad_fork_cleanup_sighand;
 19.1054 +	if ((retval = copy_mm(clone_flags, p)))
 19.1055 +		goto bad_fork_cleanup_signal;
 19.1056 +	if ((retval = copy_keys(clone_flags, p)))
 19.1057 +		goto bad_fork_cleanup_mm;
 19.1058 +	if ((retval = copy_namespace(clone_flags, p)))
 19.1059 +		goto bad_fork_cleanup_keys;
 19.1060 +	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
 19.1061 +	if (retval)
 19.1062 +		goto bad_fork_cleanup_namespace;
 19.1063 +
 19.1064 +	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 19.1065 +	/*
 19.1066 +	 * Clear TID on mm_release()?
 19.1067 +	 */
 19.1068 +	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
 19.1069 +
 19.1070 +	/*
 19.1071 +	 * sigaltstack should be cleared when sharing the same VM
 19.1072 +	 */
 19.1073 +	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
 19.1074 +		p->sas_ss_sp = p->sas_ss_size = 0;
 19.1075 +
 19.1076 +	/*
 19.1077 +	 * Syscall tracing should be turned off in the child regardless
 19.1078 +	 * of CLONE_PTRACE.
 19.1079 +	 */
 19.1080 +	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 19.1081 +#ifdef TIF_SYSCALL_EMU
 19.1082 +	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
 19.1083 +#endif
 19.1084 +
 19.1085 +	/* Our parent execution domain becomes current domain
 19.1086 +	   These must match for thread signalling to apply */
 19.1087 +	   
 19.1088 +	p->parent_exec_id = p->self_exec_id;
 19.1089 +
 19.1090 +	/* ok, now we should be set up.. */
 19.1091 +	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
 19.1092 +	p->pdeath_signal = 0;
 19.1093 +	p->exit_state = 0;
 19.1094 +
 19.1095 +	/*
 19.1096 +	 * Ok, make it visible to the rest of the system.
 19.1097 +	 * We dont wake it up yet.
 19.1098 +	 */
 19.1099 +	p->group_leader = p;
 19.1100 +	INIT_LIST_HEAD(&p->ptrace_children);
 19.1101 +	INIT_LIST_HEAD(&p->ptrace_list);
 19.1102 +
 19.1103 +	/* Perform scheduler related setup. Assign this task to a CPU. */
 19.1104 +	sched_fork(p, clone_flags);
 19.1105 +
 19.1106 +	/* Need tasklist lock for parent etc handling! */
 19.1107 +	write_lock_irq(&tasklist_lock);
 19.1108 +
 19.1109 +	/*
 19.1110 +	 * The task hasn't been attached yet, so its cpus_allowed mask will
 19.1111 +	 * not be changed, nor will its assigned CPU.
 19.1112 +	 *
 19.1113 +	 * The cpus_allowed mask of the parent may have changed after it was
 19.1114 +	 * copied first time - so re-copy it here, then check the child's CPU
 19.1115 +	 * to ensure it is on a valid CPU (and if not, just force it back to
 19.1116 +	 * parent's CPU). This avoids alot of nasty races.
 19.1117 +	 */
 19.1118 +	p->cpus_allowed = current->cpus_allowed;
 19.1119 +	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
 19.1120 +			!cpu_online(task_cpu(p))))
 19.1121 +		set_task_cpu(p, smp_processor_id());
 19.1122 +
 19.1123 +	/*
 19.1124 +	 * Check for pending SIGKILL! The new thread should not be allowed
 19.1125 +	 * to slip out of an OOM kill. (or normal SIGKILL.)
 19.1126 +	 */
 19.1127 +	if (sigismember(&current->pending.signal, SIGKILL)) {
 19.1128 +		write_unlock_irq(&tasklist_lock);
 19.1129 +		retval = -EINTR;
 19.1130 +		goto bad_fork_cleanup_namespace;
 19.1131 +	}
 19.1132 +
 19.1133 +	/* CLONE_PARENT re-uses the old parent */
 19.1134 +	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
 19.1135 +		p->real_parent = current->real_parent;
 19.1136 +	else
 19.1137 +		p->real_parent = current;
 19.1138 +	p->parent = p->real_parent;
 19.1139 +
 19.1140 +	spin_lock(&current->sighand->siglock);
 19.1141 +	if (clone_flags & CLONE_THREAD) {
 19.1142 +		/*
 19.1143 +		 * Important: if an exit-all has been started then
 19.1144 +		 * do not create this new thread - the whole thread
 19.1145 +		 * group is supposed to exit anyway.
 19.1146 +		 */
 19.1147 +		if (current->signal->flags & SIGNAL_GROUP_EXIT) {
 19.1148 +			spin_unlock(&current->sighand->siglock);
 19.1149 +			write_unlock_irq(&tasklist_lock);
 19.1150 +			retval = -EAGAIN;
 19.1151 +			goto bad_fork_cleanup_namespace;
 19.1152 +		}
 19.1153 +		p->group_leader = current->group_leader;
 19.1154 +
 19.1155 +		if (current->signal->group_stop_count > 0) {
 19.1156 +			/*
 19.1157 +			 * There is an all-stop in progress for the group.
 19.1158 +			 * We ourselves will stop as soon as we check signals.
 19.1159 +			 * Make the new thread part of that group stop too.
 19.1160 +			 */
 19.1161 +			current->signal->group_stop_count++;
 19.1162 +			set_tsk_thread_flag(p, TIF_SIGPENDING);
 19.1163 +		}
 19.1164 +
 19.1165 +		if (!cputime_eq(current->signal->it_virt_expires,
 19.1166 +				cputime_zero) ||
 19.1167 +		    !cputime_eq(current->signal->it_prof_expires,
 19.1168 +				cputime_zero) ||
 19.1169 +		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
 19.1170 +		    !list_empty(&current->signal->cpu_timers[0]) ||
 19.1171 +		    !list_empty(&current->signal->cpu_timers[1]) ||
 19.1172 +		    !list_empty(&current->signal->cpu_timers[2])) {
 19.1173 +			/*
 19.1174 +			 * Have child wake up on its first tick to check
 19.1175 +			 * for process CPU timers.
 19.1176 +			 */
 19.1177 +			p->it_prof_expires = jiffies_to_cputime(1);
 19.1178 +		}
 19.1179 +	}
 19.1180 +
 19.1181 +	/*
 19.1182 +	 * inherit ioprio
 19.1183 +	 */
 19.1184 +	p->ioprio = current->ioprio;
 19.1185 +
 19.1186 +	SET_LINKS(p);
 19.1187 +	if (unlikely(p->ptrace & PT_PTRACED))
 19.1188 +		__ptrace_link(p, current->parent);
 19.1189 +
 19.1190 +	if (thread_group_leader(p)) {
 19.1191 +		p->signal->tty = current->signal->tty;
 19.1192 +		p->signal->pgrp = process_group(current);
 19.1193 +		p->signal->session = current->signal->session;
 19.1194 +		attach_pid(p, PIDTYPE_PGID, process_group(p));
 19.1195 +		attach_pid(p, PIDTYPE_SID, p->signal->session);
 19.1196 +		if (p->pid)
 19.1197 +			__get_cpu_var(process_counts)++;
 19.1198 +	}
 19.1199 +	attach_pid(p, PIDTYPE_TGID, p->tgid);
 19.1200 +	attach_pid(p, PIDTYPE_PID, p->pid);
 19.1201 +
 19.1202 +	nr_threads++;
 19.1203 +	total_forks++;
 19.1204 +	spin_unlock(&current->sighand->siglock);
 19.1205 +	write_unlock_irq(&tasklist_lock);
 19.1206 +	proc_fork_connector(p);
 19.1207 +	return p;
 19.1208 +
 19.1209 +bad_fork_cleanup_namespace:
 19.1210 +	exit_namespace(p);
 19.1211 +bad_fork_cleanup_keys:
 19.1212 +	exit_keys(p);
 19.1213 +bad_fork_cleanup_mm:
 19.1214 +	if (p->mm)
 19.1215 +		mmput(p->mm);
 19.1216 +bad_fork_cleanup_signal:
 19.1217 +	exit_signal(p);
 19.1218 +bad_fork_cleanup_sighand:
 19.1219 +	exit_sighand(p);
 19.1220 +bad_fork_cleanup_fs:
 19.1221 +	exit_fs(p); /* blocking */
 19.1222 +bad_fork_cleanup_files:
 19.1223 +	exit_files(p); /* blocking */
 19.1224 +bad_fork_cleanup_semundo:
 19.1225 +	exit_sem(p);
 19.1226 +bad_fork_cleanup_audit:
 19.1227 +	audit_free(p);
 19.1228 +bad_fork_cleanup_security:
 19.1229 +	security_task_free(p);
 19.1230 +bad_fork_cleanup_policy:
 19.1231 +#ifdef CONFIG_NUMA
 19.1232 +	mpol_free(p->mempolicy);
 19.1233 +bad_fork_cleanup_cpuset:
 19.1234 +#endif
 19.1235 +	cpuset_exit(p);
 19.1236 +bad_fork_cleanup:
 19.1237 +	if (p->binfmt)
 19.1238 +		module_put(p->binfmt->module);
 19.1239 +bad_fork_cleanup_put_domain:
 19.1240 +	module_put(task_thread_info(p)->exec_domain->module);
 19.1241 +bad_fork_cleanup_count:
 19.1242 +	put_group_info(p->group_info);
 19.1243 +	atomic_dec(&p->user->processes);
 19.1244 +	free_uid(p->user);
 19.1245 +bad_fork_free:
 19.1246 +	free_task(p);
 19.1247 +fork_out:
 19.1248 +	return ERR_PTR(retval);
 19.1249 +}
 19.1250 +
 19.1251 +struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 19.1252 +{
 19.1253 +	memset(regs, 0, sizeof(struct pt_regs));
 19.1254 +	return regs;
 19.1255 +}
 19.1256 +
 19.1257 +task_t * __devinit fork_idle(int cpu)
 19.1258 +{
 19.1259 +	task_t *task;
 19.1260 +	struct pt_regs regs;
 19.1261 +
 19.1262 +	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
 19.1263 +	if (!task)
 19.1264 +		return ERR_PTR(-ENOMEM);
 19.1265 +	init_idle(task, cpu);
 19.1266 +	unhash_process(task);
 19.1267 +	return task;
 19.1268 +}
 19.1269 +
 19.1270 +static inline int fork_traceflag (unsigned clone_flags)
 19.1271 +{
 19.1272 +	if (clone_flags & CLONE_UNTRACED)
 19.1273 +		return 0;
 19.1274 +	else if (clone_flags & CLONE_VFORK) {
 19.1275 +		if (current->ptrace & PT_TRACE_VFORK)
 19.1276 +			return PTRACE_EVENT_VFORK;
 19.1277 +	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
 19.1278 +		if (current->ptrace & PT_TRACE_CLONE)
 19.1279 +			return PTRACE_EVENT_CLONE;
 19.1280 +	} else if (current->ptrace & PT_TRACE_FORK)
 19.1281 +		return PTRACE_EVENT_FORK;
 19.1282 +
 19.1283 +	return 0;
 19.1284 +}
 19.1285 +
 19.1286 +/*
 19.1287 + *  Ok, this is the main fork-routine.
 19.1288 + *
 19.1289 + * It copies the process, and if successful kick-starts
 19.1290 + * it and waits for it to finish using the VM if required.
 19.1291 + */
 19.1292 +long do_fork(unsigned long clone_flags,
 19.1293 +	      unsigned long stack_start,
 19.1294 +	      struct pt_regs *regs,
 19.1295 +	      unsigned long stack_size,
 19.1296 +	      int __user *parent_tidptr,
 19.1297 +	      int __user *child_tidptr)
 19.1298 +{
 19.1299 +	struct task_struct *p;
 19.1300 +	int trace = 0;
 19.1301 +	long pid = alloc_pidmap();
 19.1302 +
 19.1303 +	if (pid < 0)
 19.1304 +		return -EAGAIN;
 19.1305 +	if (unlikely(current->ptrace)) {
 19.1306 +		trace = fork_traceflag (clone_flags);
 19.1307 +		if (trace)
 19.1308 +			clone_flags |= CLONE_PTRACE;
 19.1309 +	}
 19.1310 +
 19.1311 +	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
 19.1312 +	/*
 19.1313 +	 * Do this prior waking up the new thread - the thread pointer
 19.1314 +	 * might get invalid after that point, if the thread exits quickly.
 19.1315 +	 */
 19.1316 +	if (!IS_ERR(p)) {
 19.1317 +		struct completion vfork;
 19.1318 +
 19.1319 +		if (clone_flags & CLONE_VFORK) {
 19.1320 +			p->vfork_done = &vfork;
 19.1321 +			init_completion(&vfork);
 19.1322 +		}
 19.1323 +
 19.1324 +		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
 19.1325 +			/*
 19.1326 +			 * We'll start up with an immediate SIGSTOP.
 19.1327 +			 */
 19.1328 +			sigaddset(&p->pending.signal, SIGSTOP);
 19.1329 +			set_tsk_thread_flag(p, TIF_SIGPENDING);
 19.1330 +		}
 19.1331 +
 19.1332 +		if (!(clone_flags & CLONE_STOPPED))
 19.1333 +			wake_up_new_task(p, clone_flags);
 19.1334 +		else
 19.1335 +			p->state = TASK_STOPPED;
 19.1336 +
 19.1337 +		if (unlikely (trace)) {
 19.1338 +			current->ptrace_message = pid;
 19.1339 +			ptrace_notify ((trace << 8) | SIGTRAP);
 19.1340 +		}
 19.1341 +
 19.1342 +		if (clone_flags & CLONE_VFORK) {
 19.1343 +			wait_for_completion(&vfork);
 19.1344 +			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
 19.1345 +				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
 19.1346 +		}
 19.1347 +	} else {
 19.1348 +		free_pidmap(pid);
 19.1349 +		pid = PTR_ERR(p);
 19.1350 +	}
 19.1351 +	return pid;
 19.1352 +}
 19.1353 +
 19.1354 +#ifndef ARCH_MIN_MMSTRUCT_ALIGN
 19.1355 +#define ARCH_MIN_MMSTRUCT_ALIGN 0
 19.1356 +#endif
 19.1357 +
 19.1358 +void __init proc_caches_init(void)
 19.1359 +{
 19.1360 +	sighand_cachep = kmem_cache_create("sighand_cache",
 19.1361 +			sizeof(struct sighand_struct), 0,
 19.1362 +			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 19.1363 +	signal_cachep = kmem_cache_create("signal_cache",
 19.1364 +			sizeof(struct signal_struct), 0,
 19.1365 +			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 19.1366 +	files_cachep = kmem_cache_create("files_cache", 
 19.1367 +			sizeof(struct files_struct), 0,
 19.1368 +			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 19.1369 +	fs_cachep = kmem_cache_create("fs_cache", 
 19.1370 +			sizeof(struct fs_struct), 0,
 19.1371 +			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 19.1372 +	vm_area_cachep = kmem_cache_create("vm_area_struct",
 19.1373 +			sizeof(struct vm_area_struct), 0,
 19.1374 +			SLAB_PANIC, NULL, NULL);
 19.1375 +	mm_cachep = kmem_cache_create("mm_struct",
 19.1376 +			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
 19.1377 +			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 19.1378 +}
 19.1379 +
 19.1380 +
 19.1381 +/*
 19.1382 + * Check constraints on flags passed to the unshare system call and
 19.1383 + * force unsharing of additional process context as appropriate.
 19.1384 + */
 19.1385 +static inline void check_unshare_flags(unsigned long *flags_ptr)
 19.1386 +{
 19.1387 +	/*
 19.1388 +	 * If unsharing a thread from a thread group, must also
 19.1389 +	 * unshare vm.
 19.1390 +	 */
 19.1391 +	if (*flags_ptr & CLONE_THREAD)
 19.1392 +		*flags_ptr |= CLONE_VM;
 19.1393 +
 19.1394 +	/*
 19.1395 +	 * If unsharing vm, must also unshare signal handlers.
 19.1396 +	 */
 19.1397 +	if (*flags_ptr & CLONE_VM)
 19.1398 +		*flags_ptr |= CLONE_SIGHAND;
 19.1399 +
 19.1400 +	/*
 19.1401 +	 * If unsharing signal handlers and the task was created
 19.1402 +	 * using CLONE_THREAD, then must unshare the thread
 19.1403 +	 */
 19.1404 +	if ((*flags_ptr & CLONE_SIGHAND) &&
 19.1405 +	    (atomic_read(&current->signal->count) > 1))
 19.1406 +		*flags_ptr |= CLONE_THREAD;
 19.1407 +
 19.1408 +	/*
 19.1409 +	 * If unsharing namespace, must also unshare filesystem information.
 19.1410 +	 */
 19.1411 +	if (*flags_ptr & CLONE_NEWNS)
 19.1412 +		*flags_ptr |= CLONE_FS;
 19.1413 +}
 19.1414 +
 19.1415 +/*
 19.1416 + * Unsharing of tasks created with CLONE_THREAD is not supported yet
 19.1417 + */
 19.1418 +static int unshare_thread(unsigned long unshare_flags)
 19.1419 +{
 19.1420 +	if (unshare_flags & CLONE_THREAD)
 19.1421 +		return -EINVAL;
 19.1422 +
 19.1423 +	return 0;
 19.1424 +}
 19.1425 +
 19.1426 +/*
 19.1427 + * Unshare the filesystem structure if it is being shared
 19.1428 + */
 19.1429 +static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 19.1430 +{
 19.1431 +	struct fs_struct *fs = current->fs;
 19.1432 +
 19.1433 +	if ((unshare_flags & CLONE_FS) &&
 19.1434 +	    (fs && atomic_read(&fs->count) > 1)) {
 19.1435 +		*new_fsp = __copy_fs_struct(current->fs);
 19.1436 +		if (!*new_fsp)
 19.1437 +			return -ENOMEM;
 19.1438 +	}
 19.1439 +
 19.1440 +	return 0;
 19.1441 +}
 19.1442 +
 19.1443 +/*
 19.1444 + * Unshare the namespace structure if it is being shared
 19.1445 + */
 19.1446 +static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
 19.1447 +{
 19.1448 +	struct namespace *ns = current->namespace;
 19.1449 +
 19.1450 +	if ((unshare_flags & CLONE_NEWNS) &&
 19.1451 +	    (ns && atomic_read(&ns->count) > 1)) {
 19.1452 +		if (!capable(CAP_SYS_ADMIN))
 19.1453 +			return -EPERM;
 19.1454 +
 19.1455 +		*new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
 19.1456 +		if (!*new_nsp)
 19.1457 +			return -ENOMEM;
 19.1458 +	}
 19.1459 +
 19.1460 +	return 0;
 19.1461 +}
 19.1462 +
 19.1463 +/*
 19.1464 + * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
 19.1465 + * supported yet
 19.1466 + */
 19.1467 +static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
 19.1468 +{
 19.1469 +	struct sighand_struct *sigh = current->sighand;
 19.1470 +
 19.1471 +	if ((unshare_flags & CLONE_SIGHAND) &&
 19.1472 +	    (sigh && atomic_read(&sigh->count) > 1))
 19.1473 +		return -EINVAL;
 19.1474 +	else
 19.1475 +		return 0;
 19.1476 +}
 19.1477 +
 19.1478 +/*
 19.1479 + * Unshare vm if it is being shared
 19.1480 + */
 19.1481 +static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
 19.1482 +{
 19.1483 +	struct mm_struct *mm = current->mm;
 19.1484 +
 19.1485 +	if ((unshare_flags & CLONE_VM) &&
 19.1486 +	    (mm && atomic_read(&mm->mm_users) > 1)) {
 19.1487 +		return -EINVAL;
 19.1488 +	}
 19.1489 +
 19.1490 +	return 0;
 19.1491 +}
 19.1492 +
 19.1493 +/*
 19.1494 + * Unshare file descriptor table if it is being shared
 19.1495 + */
 19.1496 +static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
 19.1497 +{
 19.1498 +	struct files_struct *fd = current->files;
 19.1499 +	int error = 0;
 19.1500 +
 19.1501 +	if ((unshare_flags & CLONE_FILES) &&
 19.1502 +	    (fd && atomic_read(&fd->count) > 1)) {
 19.1503 +		*new_fdp = dup_fd(fd, &error);
 19.1504 +		if (!*new_fdp)
 19.1505 +			return error;
 19.1506 +	}
 19.1507 +
 19.1508 +	return 0;
 19.1509 +}
 19.1510 +
 19.1511 +/*
 19.1512 + * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
 19.1513 + * supported yet
 19.1514 + */
 19.1515 +static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
 19.1516 +{
 19.1517 +	if (unshare_flags & CLONE_SYSVSEM)
 19.1518 +		return -EINVAL;
 19.1519 +
 19.1520 +	return 0;
 19.1521 +}
 19.1522 +
 19.1523 +/*
 19.1524 + * unshare allows a process to 'unshare' part of the process
 19.1525 + * context which was originally shared using clone.  copy_*
 19.1526 + * functions used by do_fork() cannot be used here directly
 19.1527 + * because they modify an inactive task_struct that is being
 19.1528 + * constructed. Here we are modifying the current, active,
 19.1529 + * task_struct.
 19.1530 + */
 19.1531 +asmlinkage long sys_unshare(unsigned long unshare_flags)
 19.1532 +{
 19.1533 +	int err = 0;
 19.1534 +	struct fs_struct *fs, *new_fs = NULL;
 19.1535 +	struct namespace *ns, *new_ns = NULL;
 19.1536 +	struct sighand_struct *sigh, *new_sigh = NULL;
 19.1537 +	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 19.1538 +	struct files_struct *fd, *new_fd = NULL;
 19.1539 +	struct sem_undo_list *new_ulist = NULL;
 19.1540 +
 19.1541 +	check_unshare_flags(&unshare_flags);
 19.1542 +
 19.1543 +	if ((err = unshare_thread(unshare_flags)))
 19.1544 +		goto bad_unshare_out;
 19.1545 +	if ((err = unshare_fs(unshare_flags, &new_fs)))
 19.1546 +		goto bad_unshare_cleanup_thread;
 19.1547 +	if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
 19.1548 +		goto bad_unshare_cleanup_fs;
 19.1549 +	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
 19.1550 +		goto bad_unshare_cleanup_ns;
 19.1551 +	if ((err = unshare_vm(unshare_flags, &new_mm)))
 19.1552 +		goto bad_unshare_cleanup_sigh;
 19.1553 +	if ((err = unshare_fd(unshare_flags, &new_fd)))
 19.1554 +		goto bad_unshare_cleanup_vm;
 19.1555 +	if ((err = unshare_semundo(unshare_flags, &new_ulist)))
 19.1556 +		goto bad_unshare_cleanup_fd;
 19.1557 +
 19.1558 +	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
 19.1559 +
 19.1560 +		task_lock(current);
 19.1561 +
 19.1562 +		if (new_fs) {
 19.1563 +			fs = current->fs;
 19.1564 +			current->fs = new_fs;
 19.1565 +			new_fs = fs;
 19.1566 +		}
 19.1567 +
 19.1568 +		if (new_ns) {
 19.1569 +			ns = current->namespace;
 19.1570 +			current->namespace = new_ns;
 19.1571 +			new_ns = ns;
 19.1572 +		}
 19.1573 +
 19.1574 +		if (new_sigh) {
 19.1575 +			sigh = current->sighand;
 19.1576 +			rcu_assign_pointer(current->sighand, new_sigh);
 19.1577 +			new_sigh = sigh;
 19.1578 +		}
 19.1579 +
 19.1580 +		if (new_mm) {
 19.1581 +			mm = current->mm;
 19.1582 +			active_mm = current->active_mm;
 19.1583 +			current->mm = new_mm;
 19.1584 +			current->active_mm = new_mm;
 19.1585 +			activate_mm(active_mm, new_mm);
 19.1586 +			new_mm = mm;
 19.1587 +		}
 19.1588 +
 19.1589 +		if (new_fd) {
 19.1590 +			fd = current->files;
 19.1591 +			current->files = new_fd;
 19.1592 +			new_fd = fd;
 19.1593 +		}
 19.1594 +
 19.1595 +		task_unlock(current);
 19.1596 +	}
 19.1597 +
 19.1598 +bad_unshare_cleanup_fd:
 19.1599 +	if (new_fd)
 19.1600 +		put_files_struct(new_fd);
 19.1601 +
 19.1602 +bad_unshare_cleanup_vm:
 19.1603 +	if (new_mm)
 19.1604 +		mmput(new_mm);
 19.1605 +
 19.1606 +bad_unshare_cleanup_sigh:
 19.1607 +	if (new_sigh)
 19.1608 +		if (atomic_dec_and_test(&new_sigh->count))
 19.1609 +			kmem_cache_free(sighand_cachep, new_sigh);
 19.1610 +
 19.1611 +bad_unshare_cleanup_ns:
 19.1612 +	if (new_ns)
 19.1613 +		put_namespace(new_ns);
 19.1614 +
 19.1615 +bad_unshare_cleanup_fs:
 19.1616 +	if (new_fs)
 19.1617 +		put_fs_struct(new_fs);
 19.1618 +
 19.1619 +bad_unshare_cleanup_thread:
 19.1620 +bad_unshare_out:
 19.1621 +	return err;
 19.1622 +}
    20.1 --- a/tools/console/daemon/io.c	Thu Jun 15 10:02:53 2006 -0600
    20.2 +++ b/tools/console/daemon/io.c	Thu Jun 15 10:23:57 2006 -0600
    20.3 @@ -24,8 +24,8 @@
    20.4  #include "io.h"
    20.5  #include <xenctrl.h>
    20.6  #include <xs.h>
    20.7 -#include <xen/linux/evtchn.h>
    20.8  #include <xen/io/console.h>
    20.9 +#include <xenctrl.h>
   20.10  
   20.11  #include <malloc.h>
   20.12  #include <stdlib.h>
   20.13 @@ -36,7 +36,6 @@
   20.14  #include <unistd.h>
   20.15  #include <termios.h>
   20.16  #include <stdarg.h>
   20.17 -#include <sys/ioctl.h>
   20.18  #include <sys/mman.h>
   20.19  
   20.20  #define MAX(a, b) (((a) > (b)) ? (a) : (b))
   20.21 @@ -64,19 +63,12 @@ struct domain
   20.22  	char *conspath;
   20.23  	int ring_ref;
   20.24  	evtchn_port_t local_port;
   20.25 -	int evtchn_fd;
   20.26 +	int xce_handle;
   20.27  	struct xencons_interface *interface;
   20.28  };
   20.29  
   20.30  static struct domain *dom_head;
   20.31  
   20.32 -static void evtchn_notify(struct domain *dom)
   20.33 -{
   20.34 -	struct ioctl_evtchn_notify notify;
   20.35 -	notify.port = dom->local_port;
   20.36 -	(void)ioctl(dom->evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
   20.37 -}
   20.38 -
   20.39  static void buffer_append(struct domain *dom)
   20.40  {
   20.41  	struct buffer *buffer = &dom->buffer;
   20.42 @@ -106,7 +98,7 @@ static void buffer_append(struct domain 
   20.43  
   20.44  	mb();
   20.45  	intf->out_cons = cons;
   20.46 -	evtchn_notify(dom);
   20.47 +	xc_evtchn_notify(dom->xce_handle, dom->local_port);
   20.48  
   20.49  	if (buffer->max_capacity &&
   20.50  	    buffer->size > buffer->max_capacity) {
   20.51 @@ -234,7 +226,6 @@ int xs_gather(struct xs_handle *xs, cons
   20.52  static int domain_create_ring(struct domain *dom)
   20.53  {
   20.54  	int err, remote_port, ring_ref, rc;
   20.55 -	struct ioctl_evtchn_bind_interdomain bind;
   20.56  
   20.57  	err = xs_gather(xs, dom->conspath,
   20.58  			"ring-ref", "%u", &ring_ref,
   20.59 @@ -258,24 +249,24 @@ static int domain_create_ring(struct dom
   20.60  	}
   20.61  
   20.62  	dom->local_port = -1;
   20.63 -	if (dom->evtchn_fd != -1)
   20.64 -		close(dom->evtchn_fd);
   20.65 +	if (dom->xce_handle != -1)
   20.66 +		xc_evtchn_close(dom->xce_handle);
   20.67  
   20.68  	/* Opening evtchn independently for each console is a bit
   20.69  	 * wasteful, but that's how the code is structured... */
   20.70 -	dom->evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
   20.71 -	if (dom->evtchn_fd == -1) {
   20.72 +	dom->xce_handle = xc_evtchn_open();
   20.73 +	if (dom->xce_handle == -1) {
   20.74  		err = errno;
   20.75  		goto out;
   20.76  	}
   20.77   
   20.78 -	bind.remote_domain = dom->domid;
   20.79 -	bind.remote_port   = remote_port;
   20.80 -	rc = ioctl(dom->evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
   20.81 +	rc = xc_evtchn_bind_interdomain(dom->xce_handle,
   20.82 +		dom->domid, remote_port);
   20.83 +
   20.84  	if (rc == -1) {
   20.85  		err = errno;
   20.86 -		close(dom->evtchn_fd);
   20.87 -		dom->evtchn_fd = -1;
   20.88 +		xc_evtchn_close(dom->xce_handle);
   20.89 +		dom->xce_handle = -1;
   20.90  		goto out;
   20.91  	}
   20.92  	dom->local_port = rc;
   20.93 @@ -285,8 +276,8 @@ static int domain_create_ring(struct dom
   20.94  
   20.95  		if (dom->tty_fd == -1) {
   20.96  			err = errno;
   20.97 -			close(dom->evtchn_fd);
   20.98 -			dom->evtchn_fd = -1;
   20.99 +			xc_evtchn_close(dom->xce_handle);
  20.100 +			dom->xce_handle = -1;
  20.101  			dom->local_port = -1;
  20.102  			goto out;
  20.103  		}
  20.104 @@ -344,7 +335,7 @@ static struct domain *create_domain(int 
  20.105  	dom->ring_ref = -1;
  20.106  	dom->local_port = -1;
  20.107  	dom->interface = NULL;
  20.108 -	dom->evtchn_fd = -1;
  20.109 +	dom->xce_handle = -1;
  20.110  
  20.111  	if (!watch_domain(dom, true))
  20.112  		goto out;
  20.113 @@ -409,9 +400,9 @@ static void shutdown_domain(struct domai
  20.114  	if (d->interface != NULL)
  20.115  		munmap(d->interface, getpagesize());
  20.116  	d->interface = NULL;
  20.117 -	if (d->evtchn_fd != -1)
  20.118 -		close(d->evtchn_fd);
  20.119 -	d->evtchn_fd = -1;
  20.120 +	if (d->xce_handle != -1)
  20.121 +		xc_evtchn_close(d->xce_handle);
  20.122 +	d->xce_handle = -1;
  20.123  	cleanup_domain(d);
  20.124  }
  20.125  
  20.126 @@ -483,7 +474,7 @@ static void handle_tty_read(struct domai
  20.127  		}
  20.128  		wmb();
  20.129  		intf->in_prod = prod;
  20.130 -		evtchn_notify(dom);
  20.131 +		xc_evtchn_notify(dom->xce_handle, dom->local_port);
  20.132  	} else {
  20.133  		close(dom->tty_fd);
  20.134  		dom->tty_fd = -1;
  20.135 @@ -516,14 +507,14 @@ static void handle_tty_write(struct doma
  20.136  
  20.137  static void handle_ring_read(struct domain *dom)
  20.138  {
  20.139 -	evtchn_port_t v;
  20.140 +	evtchn_port_t port;
  20.141  
  20.142 -	if (!read_sync(dom->evtchn_fd, &v, sizeof(v)))
  20.143 +	if ((port = xc_evtchn_pending(dom->xce_handle)) == -1)
  20.144  		return;
  20.145  
  20.146  	buffer_append(dom);
  20.147  
  20.148 -	(void)write_sync(dom->evtchn_fd, &v, sizeof(v));
  20.149 +	(void)xc_evtchn_unmask(dom->xce_handle, port);
  20.150  }
  20.151  
  20.152  static void handle_xs(void)
  20.153 @@ -566,9 +557,10 @@ void handle_io(void)
  20.154  		max_fd = MAX(xs_fileno(xs), max_fd);
  20.155  
  20.156  		for (d = dom_head; d; d = d->next) {
  20.157 -			if (d->evtchn_fd != -1) {
  20.158 -				FD_SET(d->evtchn_fd, &readfds);
  20.159 -				max_fd = MAX(d->evtchn_fd, max_fd);
  20.160 +			if (d->xce_handle != -1) {
  20.161 +				int evtchn_fd = xc_evtchn_fd(d->xce_handle);
  20.162 +				FD_SET(evtchn_fd, &readfds);
  20.163 +				max_fd = MAX(evtchn_fd, max_fd);
  20.164  			}
  20.165  
  20.166  			if (d->tty_fd != -1) {
  20.167 @@ -588,8 +580,8 @@ void handle_io(void)
  20.168  
  20.169  		for (d = dom_head; d; d = n) {
  20.170  			n = d->next;
  20.171 -			if (d->evtchn_fd != -1 &&
  20.172 -			    FD_ISSET(d->evtchn_fd, &readfds))
  20.173 +			if (d->xce_handle != -1 &&
  20.174 +			    FD_ISSET(xc_evtchn_fd(d->xce_handle), &readfds))
  20.175  				handle_ring_read(d);
  20.176  
  20.177  			if (d->tty_fd != -1) {
    21.1 --- a/tools/console/daemon/utils.c	Thu Jun 15 10:02:53 2006 -0600
    21.2 +++ b/tools/console/daemon/utils.c	Thu Jun 15 10:23:57 2006 -0600
    21.3 @@ -39,32 +39,6 @@
    21.4  struct xs_handle *xs;
    21.5  int xc;
    21.6  
    21.7 -bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
    21.8 -{
    21.9 -	size_t offset = 0;
   21.10 -	ssize_t len;
   21.11 -
   21.12 -	while (offset < size) {
   21.13 -		if (do_read) {
   21.14 -			len = read(fd, data + offset, size - offset);
   21.15 -		} else {
   21.16 -			len = write(fd, data + offset, size - offset);
   21.17 -		}
   21.18 -
   21.19 -		if (len < 1) {
   21.20 -			if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
   21.21 -				continue;
   21.22 -			} else {
   21.23 -				return false;
   21.24 -			}
   21.25 -		} else {
   21.26 -			offset += len;
   21.27 -		}
   21.28 -	}
   21.29 -
   21.30 -	return true;
   21.31 -}
   21.32 -
   21.33  static void child_exit(int sig)
   21.34  {
   21.35  	while (waitpid(-1, NULL, WNOHANG) > 0);
    22.1 --- a/tools/console/daemon/utils.h	Thu Jun 15 10:02:53 2006 -0600
    22.2 +++ b/tools/console/daemon/utils.h	Thu Jun 15 10:23:57 2006 -0600
    22.3 @@ -29,9 +29,6 @@
    22.4  
    22.5  void daemonize(const char *pidfile);
    22.6  bool xen_setup(void);
    22.7 -#define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
    22.8 -#define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
    22.9 -bool _read_write_sync(int fd, void *data, size_t size, bool do_read);
   22.10  
   22.11  extern struct xs_handle *xs;
   22.12  extern int xc;
    23.1 --- a/tools/ioemu/sdl.c	Thu Jun 15 10:02:53 2006 -0600
    23.2 +++ b/tools/ioemu/sdl.c	Thu Jun 15 10:23:57 2006 -0600
    23.3 @@ -376,13 +376,18 @@ static void sdl_update_caption(void)
    23.4  
    23.5  static void sdl_hide_cursor(void)
    23.6  {
    23.7 -    SDL_SetCursor(sdl_cursor_hidden);
    23.8 +    if (kbd_mouse_is_absolute()) {
    23.9 +	SDL_ShowCursor(1);
   23.10 +	SDL_SetCursor(sdl_cursor_hidden);
   23.11 +    } else {
   23.12 +	SDL_ShowCursor(0);
   23.13 +    }
   23.14  }
   23.15  
   23.16  static void sdl_show_cursor(void)
   23.17  {
   23.18      if (!kbd_mouse_is_absolute()) {
   23.19 -	SDL_SetCursor(sdl_cursor_normal);
   23.20 +	SDL_ShowCursor(1);
   23.21      }
   23.22  }
   23.23  
    24.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Thu Jun 15 10:02:53 2006 -0600
    24.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Thu Jun 15 10:23:57 2006 -0600
    24.3 @@ -47,11 +47,9 @@
    24.4  
    24.5  #include <limits.h>
    24.6  #include <fcntl.h>
    24.7 -#include <sys/ioctl.h>
    24.8  
    24.9  #include <xenctrl.h>
   24.10  #include <xen/hvm/ioreq.h>
   24.11 -#include <xen/linux/evtchn.h>
   24.12  
   24.13  #include "cpu.h"
   24.14  #include "exec-all.h"
   24.15 @@ -123,7 +121,7 @@ target_ulong cpu_get_phys_page_debug(CPU
   24.16  }
   24.17  
   24.18  //the evtchn fd for polling
   24.19 -int evtchn_fd = -1;
   24.20 +int xce_handle = -1;
   24.21  
   24.22  //which vcpu we are serving
   24.23  int send_vcpu = 0;
   24.24 @@ -170,11 +168,10 @@ static ioreq_t* __cpu_get_ioreq(int vcpu
   24.25  //retval--the number of ioreq packet
   24.26  static ioreq_t* cpu_get_ioreq(void)
   24.27  {
   24.28 -    int i, rc;
   24.29 +    int i;
   24.30      evtchn_port_t port;
   24.31  
   24.32 -    rc = read(evtchn_fd, &port, sizeof(port));
   24.33 -    if ( rc == sizeof(port) ) {
   24.34 +    if ( (port = xc_evtchn_pending(xce_handle)) != -1 ) {
   24.35          for ( i = 0; i < vcpus; i++ )
   24.36              if ( shared_page->vcpu_iodata[i].dm_eport == port )
   24.37                  break;
   24.38 @@ -184,8 +181,7 @@ static ioreq_t* cpu_get_ioreq(void)
   24.39              exit(1);
   24.40          }
   24.41  
   24.42 -        // unmask the wanted port again
   24.43 -        write(evtchn_fd, &port, sizeof(port));
   24.44 +	xc_evtchn_unmask(xce_handle, port);
   24.45  
   24.46          //get the io packet from shared memory
   24.47          send_vcpu = i;
   24.48 @@ -436,6 +432,7 @@ int main_loop(void)
   24.49      extern int shutdown_requested;
   24.50      CPUState *env = global_env;
   24.51      int retval;
   24.52 +    int evtchn_fd = xc_evtchn_fd(xce_handle);
   24.53      extern void main_loop_wait(int);
   24.54  
   24.55      /* Watch stdin (fd 0) to see when it has input. */
   24.56 @@ -475,11 +472,9 @@ int main_loop(void)
   24.57          main_loop_wait(0);
   24.58  
   24.59          if (env->send_event) {
   24.60 -            struct ioctl_evtchn_notify notify;
   24.61 -
   24.62              env->send_event = 0;
   24.63 -            notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
   24.64 -            (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
   24.65 +            (void)xc_evtchn_notify(xce_handle,
   24.66 +                 shared_page->vcpu_iodata[send_vcpu].dm_eport);
   24.67          }
   24.68      }
   24.69      destroy_hvm_domain();
   24.70 @@ -511,7 +506,6 @@ static void qemu_hvm_reset(void *unused)
   24.71  CPUState * cpu_init()
   24.72  {
   24.73      CPUX86State *env;
   24.74 -    struct ioctl_evtchn_bind_interdomain bind;
   24.75      int i, rc;
   24.76  
   24.77      cpu_exec_init();
   24.78 @@ -523,21 +517,19 @@ CPUState * cpu_init()
   24.79  
   24.80      cpu_single_env = env;
   24.81  
   24.82 -    if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
   24.83 +    if (xce_handle != -1)//the evtchn has been opened by another cpu object
   24.84          return NULL;
   24.85  
   24.86 -    //use nonblock reading not polling, may change in future.
   24.87 -    evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
   24.88 -    if (evtchn_fd == -1) {
   24.89 +    xce_handle = xc_evtchn_open();
   24.90 +    if (xce_handle == -1) {
   24.91          fprintf(logfile, "open evtchn device error %d\n", errno);
   24.92          return NULL;
   24.93      }
   24.94  
   24.95      /* FIXME: how about if we overflow the page here? */
   24.96 -    bind.remote_domain = domid;
   24.97      for ( i = 0; i < vcpus; i++ ) {
   24.98 -        bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
   24.99 -        rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
  24.100 +        rc = xc_evtchn_bind_interdomain(xce_handle, domid,
  24.101 +            shared_page->vcpu_iodata[i].vp_eport);
  24.102          if ( rc == -1 ) {
  24.103              fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
  24.104              return NULL;
    25.1 --- a/tools/libxc/xc_elf.h	Thu Jun 15 10:02:53 2006 -0600
    25.2 +++ b/tools/libxc/xc_elf.h	Thu Jun 15 10:23:57 2006 -0600
    25.3 @@ -170,13 +170,14 @@ typedef struct {
    25.4  #define EM_PARISC	15		/* HPPA */
    25.5  #define EM_SPARC32PLUS	18		/* Enhanced instruction set SPARC */
    25.6  #define EM_PPC		20		/* PowerPC */
    25.7 +#define EM_PPC64	21		/* PowerPC 64-bit */
    25.8  #define EM_ARM		40		/* Advanced RISC Machines ARM */
    25.9  #define EM_ALPHA	41		/* DEC ALPHA */
   25.10  #define EM_SPARCV9	43		/* SPARC version 9 */
   25.11  #define EM_ALPHA_EXP	0x9026		/* DEC ALPHA */
   25.12 +#define EM_IA_64	50		/* Intel Merced */
   25.13  #define EM_X86_64	62		/* AMD x86-64 architecture */
   25.14  #define EM_VAX		75		/* DEC VAX */
   25.15 -#define EM_NUM		15		/* number of machine types */
   25.16  
   25.17  /* Version */
   25.18  #define EV_NONE		0		/* Invalid */
    26.1 --- a/tools/libxc/xc_linux.c	Thu Jun 15 10:02:53 2006 -0600
    26.2 +++ b/tools/libxc/xc_linux.c	Thu Jun 15 10:23:57 2006 -0600
    26.3 @@ -103,6 +103,124 @@ int do_xen_hypercall(int xc_handle, priv
    26.4                        (unsigned long)hypercall);
    26.5  }
    26.6  
    26.7 +#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
    26.8 +#define EVTCHN_DEV_MAJOR 10
    26.9 +#define EVTCHN_DEV_MINOR 201
   26.10 +
   26.11 +int xc_evtchn_open(void)
   26.12 +{
   26.13 +    struct stat st;
   26.14 +    int fd;
   26.15 +
   26.16 +    /* Make sure any existing device file links to correct device. */
   26.17 +    if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
   26.18 +        (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
   26.19 +        (void)unlink(EVTCHN_DEV_NAME);
   26.20 +
   26.21 +reopen:
   26.22 +    if ( (fd = open(EVTCHN_DEV_NAME, O_RDWR)) == -1 )
   26.23 +    {
   26.24 +        if ( (errno == ENOENT) &&
   26.25 +            ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
   26.26 +            (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
   26.27 +            makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0) )
   26.28 +            goto reopen;
   26.29 +
   26.30 +        PERROR("Could not open event channel interface");
   26.31 +        return -1;
   26.32 +    }
   26.33 +
   26.34 +    return fd;
   26.35 +}
   26.36 +
   26.37 +int xc_evtchn_close(int xce_handle)
   26.38 +{
   26.39 +    return close(xce_handle);
   26.40 +}
   26.41 +
   26.42 +int xc_evtchn_fd(int xce_handle)
   26.43 +{
   26.44 +    return xce_handle;
   26.45 +}
   26.46 +
   26.47 +int xc_evtchn_notify(int xce_handle, evtchn_port_t port)
   26.48 +{
   26.49 +    struct ioctl_evtchn_notify notify;
   26.50 +
   26.51 +    notify.port = port;
   26.52 +
   26.53 +    return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, &notify);
   26.54 +}
   26.55 +
   26.56 +evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
   26.57 +    evtchn_port_t remote_port)
   26.58 +{
   26.59 +    struct ioctl_evtchn_bind_interdomain bind;
   26.60 +
   26.61 +    bind.remote_domain = domid;
   26.62 +    bind.remote_port = remote_port;
   26.63 +
   26.64 +    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
   26.65 +}
   26.66 +
   26.67 +int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
   26.68 +{
   26.69 +    struct ioctl_evtchn_unbind unbind;
   26.70 +
   26.71 +    unbind.port = port;
   26.72 +
   26.73 +    return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind);
   26.74 +}
   26.75 +
   26.76 +evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq)
   26.77 +{
   26.78 +    struct ioctl_evtchn_bind_virq bind;
   26.79 +
   26.80 +    bind.virq = virq;
   26.81 +
   26.82 +    return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind);
   26.83 +}
   26.84 +
   26.85 +static int dorw(int fd, char *data, size_t size, int do_write)
   26.86 +{
   26.87 +    size_t offset = 0;
   26.88 +    ssize_t len;
   26.89 +
   26.90 +    while ( offset < size )
   26.91 +    {
   26.92 +        if (do_write)
   26.93 +            len = write(fd, data + offset, size - offset);
   26.94 +        else
   26.95 +            len = read(fd, data + offset, size - offset);
   26.96 +
   26.97 +        if ( len == -1 )
   26.98 +        {
   26.99 +             if ( errno == EINTR )
  26.100 +                 continue;
  26.101 +             return -1;
  26.102 +        }
  26.103 +
  26.104 +        offset += len;
  26.105 +    }
  26.106 +
  26.107 +    return 0;
  26.108 +}
  26.109 +
  26.110 +evtchn_port_t xc_evtchn_pending(int xce_handle)
  26.111 +{
  26.112 +    evtchn_port_t port;
  26.113 +
  26.114 +    if ( dorw(xce_handle, (char *)&port, sizeof(port), 0) == -1 )
  26.115 +        return -1;
  26.116 +
  26.117 +    return port;
  26.118 +}
  26.119 +
  26.120 +int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
  26.121 +{
  26.122 +    return dorw(xce_handle, (char *)&port, sizeof(port), 1);
  26.123 +}
  26.124 +
  26.125  /*
  26.126   * Local variables:
  26.127   * mode: C
    27.1 --- a/tools/libxc/xc_linux_restore.c	Thu Jun 15 10:02:53 2006 -0600
    27.2 +++ b/tools/libxc/xc_linux_restore.c	Thu Jun 15 10:23:57 2006 -0600
    27.3 @@ -456,6 +456,15 @@ int xc_linux_restore(int xc_handle, int 
    27.4          n+= j; /* crude stats */
    27.5      }
    27.6  
    27.7 +    /*
    27.8 +     * Ensure we flush all machphys updates before potential PAE-specific
    27.9 +     * reallocations below.
   27.10 +     */
   27.11 +    if (xc_finish_mmu_updates(xc_handle, mmu)) {
   27.12 +        ERR("Error doing finish_mmu_updates()");
   27.13 +        goto out;
   27.14 +    }
   27.15 +
   27.16      DPRINTF("Received all pages (%d races)\n", nraces);
   27.17  
   27.18      if ((pt_levels == 3) && !pae_extended_cr3) {
   27.19 @@ -550,15 +559,12 @@ int xc_linux_restore(int xc_handle, int 
   27.20              }
   27.21          }
   27.22  
   27.23 +        if (xc_finish_mmu_updates(xc_handle, mmu)) {
   27.24 +            ERR("Error doing finish_mmu_updates()");
   27.25 +            goto out;
   27.26 +        }
   27.27      }
   27.28  
   27.29 -
   27.30 -    if (xc_finish_mmu_updates(xc_handle, mmu)) {
   27.31 -        ERR("Error doing finish_mmu_updates()");
   27.32 -        goto out;
   27.33 -    }
   27.34 -
   27.35 -
   27.36      /*
   27.37       * Pin page tables. Do this after writing to them as otherwise Xen
   27.38       * will barf when doing the type-checking.
    28.1 --- a/tools/libxc/xc_load_elf.c	Thu Jun 15 10:02:53 2006 -0600
    28.2 +++ b/tools/libxc/xc_load_elf.c	Thu Jun 15 10:23:57 2006 -0600
    28.3 @@ -22,6 +22,24 @@ loadelfsymtab(
    28.4      const char *image, int xch, uint32_t dom, xen_pfn_t *parray,
    28.5      struct domain_setup_info *dsi);
    28.6  
    28.7 +/*
    28.8 + * Elf header attributes we require for each supported host platform.
    28.9 + * These are checked in parseelfimage().
   28.10 + */
   28.11 +#if defined(__ia64__)
   28.12 +#define ELFCLASS   ELFCLASS64
   28.13 +#define ELFDATA    ELFDATA2LSB
   28.14 +#define ELFMACHINE EM_IA_64
   28.15 +#elif defined(__i386__)
   28.16 +#define ELFCLASS   ELFCLASS32
   28.17 +#define ELFDATA    ELFDATA2LSB
   28.18 +#define ELFMACHINE EM_386
   28.19 +#elif defined(__x86_64__)
   28.20 +#define ELFCLASS   ELFCLASS64
   28.21 +#define ELFDATA    ELFDATA2LSB
   28.22 +#define ELFMACHINE EM_X86_64
   28.23 +#endif
   28.24 +
   28.25  int probe_elf(const char *image,
   28.26                unsigned long image_size,
   28.27                struct load_funcs *load_funcs)
   28.28 @@ -61,16 +79,10 @@ static int parseelfimage(const char *ima
   28.29          return -EINVAL;
   28.30      }
   28.31  
   28.32 -    if (
   28.33 -#if defined(__i386__)
   28.34 -        (ehdr->e_ident[EI_CLASS] != ELFCLASS32) ||
   28.35 -        (ehdr->e_machine != EM_386) ||
   28.36 -#elif defined(__x86_64__)
   28.37 -        (ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
   28.38 -        (ehdr->e_machine != EM_X86_64) ||
   28.39 -#endif
   28.40 -        (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) ||
   28.41 -        (ehdr->e_type != ET_EXEC) )
   28.42 +    if ( (ehdr->e_ident[EI_CLASS] != ELFCLASS) ||
   28.43 +         (ehdr->e_machine != ELFMACHINE) ||
   28.44 +         (ehdr->e_ident[EI_DATA] != ELFDATA) ||
   28.45 +         (ehdr->e_type != ET_EXEC) )
   28.46      {
   28.47          ERROR("Kernel not a Xen-compatible Elf image.");
   28.48          return -EINVAL;
    29.1 --- a/tools/libxc/xenctrl.h	Thu Jun 15 10:02:53 2006 -0600
    29.2 +++ b/tools/libxc/xenctrl.h	Thu Jun 15 10:23:57 2006 -0600
    29.3 @@ -604,4 +604,58 @@ int xc_finish_mmu_updates(int xc_handle,
    29.4  
    29.5  int xc_acm_op(int xc_handle, int cmd, void *arg, size_t arg_size);
    29.6  
    29.7 +/*
    29.8 + * Return a handle to the event channel driver, or -1 on failure, in which case
    29.9 + * errno will be set appropriately.
   29.10 + */
   29.11 +int xc_evtchn_open(void);
   29.12 +
   29.13 +/*
   29.14 + * Close a handle previously allocated with xc_evtchn_open().
   29.15 + */
   29.16 +int xc_evtchn_close(int xce_handle);
   29.17 +
   29.18 +/*
   29.19 + * Return an fd that can be select()ed on for further calls to
   29.20 + * xc_evtchn_pending().
   29.21 + */
   29.22 +int xc_evtchn_fd(int xce_handle);
   29.23 +
   29.24 +/*
   29.25 + * Notify the given event channel. Returns -1 on failure, in which case
   29.26 + * errno will be set appropriately.
   29.27 + */
   29.28 +int xc_evtchn_notify(int xce_handle, evtchn_port_t port);
   29.29 +
   29.30 +/*
   29.31 + * Returns a new event port bound to the remote port for the given domain ID,
   29.32 + * or -1 on failure, in which case errno will be set appropriately.
   29.33 + */
   29.34 +evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid,
   29.35 +    evtchn_port_t remote_port);
   29.36 +
   29.37 +/*
   29.38 + * Unbind the given event channel. Returns -1 on failure, in which case errno
   29.39 + * will be set appropriately.
   29.40 + */
   29.41 +int xc_evtchn_unbind(int xce_handle, evtchn_port_t port);
   29.42 +
   29.43 +/*
   29.44 + * Bind an event channel to the given VIRQ. Returns the event channel bound to
   29.45 + * the VIRQ, or -1 on failure, in which case errno will be set appropriately.
   29.46 + */
   29.47 +evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq);
   29.48 +
   29.49 +/*
   29.50 + * Return the next event channel to become pending, or -1 on failure, in which
   29.51 + * case errno will be set appropriately.  
   29.52 + */
   29.53 +evtchn_port_t xc_evtchn_pending(int xce_handle);
   29.54 +
   29.55 +/*
   29.56 + * Unmask the given event channel. Returns -1 on failure, in which case errno
   29.57 + * will be set appropriately.
   29.58 + */
   29.59 +int xc_evtchn_unmask(int xce_handle, evtchn_port_t port);
   29.60 +
   29.61  #endif
    30.1 --- a/tools/python/xen/util/security.py	Thu Jun 15 10:02:53 2006 -0600
    30.2 +++ b/tools/python/xen/util/security.py	Thu Jun 15 10:23:57 2006 -0600
    30.3 @@ -52,7 +52,8 @@ empty_line_re = re.compile("^\s*$")
    30.4  binary_name_re = re.compile(".*[chwall|ste|chwall_ste].*\.bin", re.IGNORECASE)
    30.5  policy_name_re = re.compile(".*[chwall|ste|chwall_ste].*", re.IGNORECASE)
    30.6  
    30.7 -
    30.8 +#other global variables
    30.9 +NULL_SSIDREF = 0
   30.10  
   30.11  log = logging.getLogger("xend.util.security")
   30.12  
   30.13 @@ -255,6 +256,8 @@ def ssidref2label(ssidref_var):
   30.14      #2. get labelnames for both ssidref parts
   30.15      pri_ssid = ssidref & 0xffff
   30.16      sec_ssid = ssidref >> 16
   30.17 +    pri_null_ssid = NULL_SSIDREF & 0xffff
   30.18 +    sec_null_ssid = NULL_SSIDREF >> 16
   30.19      pri_labels = []
   30.20      sec_labels = []
   30.21      labels = []
   30.22 @@ -270,7 +273,11 @@ def ssidref2label(ssidref_var):
   30.23      f.close()
   30.24  
   30.25      #3. get the label that is in both lists (combination must be a single label)
   30.26 -    if secondary == "NULL":
   30.27 +    if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid != sec_null_ssid):
   30.28 +        labels = sec_labels
   30.29 +    elif (secondary == "CHWALL") and (pri_ssid != pri_null_ssid) and (sec_ssid == sec_null_ssid):
   30.30 +        labels = pri_labels
   30.31 +    elif secondary == "NULL":
   30.32          labels = pri_labels
   30.33      else:
   30.34          for i in pri_labels:
   30.35 @@ -285,7 +292,7 @@ def ssidref2label(ssidref_var):
   30.36  
   30.37  
   30.38  
   30.39 -def label2ssidref(labelname, policyname):
   30.40 +def label2ssidref(labelname, policyname, type):
   30.41      """
   30.42      returns ssidref corresponding to labelname;
   30.43      maps current policy to default directory
   30.44 @@ -294,6 +301,14 @@ def label2ssidref(labelname, policyname)
   30.45      if policyname in ['NULL', 'INACTIVE', 'DEFAULT']:
   30.46          err("Cannot translate labels for \'" + policyname + "\' policy.")
   30.47  
   30.48 +    allowed_types = ['ANY']
   30.49 +    if type == 'dom':
   30.50 +        allowed_types.append('VM')
   30.51 +    elif type == 'res':
   30.52 +        allowed_types.append('RES')
   30.53 +    else:
   30.54 +        err("Invalid type.  Must specify 'dom' or 'res'.")
   30.55 +
   30.56      (primary, secondary, f, pol_exists) = getmapfile(policyname)
   30.57  
   30.58      #2. get labelnames for ssidref parts and find a common label
   30.59 @@ -303,11 +318,15 @@ def label2ssidref(labelname, policyname)
   30.60          l = line.split()
   30.61          if (len(l) < 5) or (l[0] != "LABEL->SSID"):
   30.62              continue
   30.63 -        if primary and (l[2] == primary) and (l[3] == labelname):
   30.64 +        if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3] == labelname):
   30.65              pri_ssid.append(int(l[4], 16))
   30.66 -        if secondary and (l[2] == secondary) and (l[3] == labelname):
   30.67 +        if secondary and (l[1] in allowed_types) and (l[2] == secondary) and (l[3] == labelname):
   30.68              sec_ssid.append(int(l[4], 16))
   30.69      f.close()
   30.70 +    if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
   30.71 +        pri_ssid.append(NULL_SSIDREF)
   30.72 +    elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0):
   30.73 +        sec_ssid.append(NULL_SSIDREF)
   30.74  
   30.75      #3. sanity check and composition of ssidref
   30.76      if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary != "NULL")):
   30.77 @@ -360,7 +379,7 @@ def refresh_ssidref(config):
   30.78          err("Policy \'" + policyname + "\' in label does not match active policy \'"
   30.79              + active_policy +"\'!")
   30.80  
   30.81 -    new_ssidref = label2ssidref(labelname, policyname)
   30.82 +    new_ssidref = label2ssidref(labelname, policyname, 'dom')
   30.83      if not new_ssidref:
   30.84          err("SSIDREF refresh failed!")
   30.85  
   30.86 @@ -409,7 +428,7 @@ def get_decision(arg1, arg2):
   30.87      enables domains to retrieve access control decisions from
   30.88      the hypervisor Access Control Module.
   30.89      IN: args format = ['domid', id] or ['ssidref', ssidref]
   30.90 -    or ['access_control', ['policy', policy], ['label', label]]
   30.91 +    or ['access_control', ['policy', policy], ['label', label], ['type', type]]
   30.92      """
   30.93  
   30.94      if not on():
   30.95 @@ -417,14 +436,14 @@ def get_decision(arg1, arg2):
   30.96  
   30.97      #translate labels before calling low-level function
   30.98      if arg1[0] == 'access_control':
   30.99 -        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') :
  30.100 +        if (arg1[1][0] != 'policy') or (arg1[2][0] != 'label') or (arg1[3][0] != 'type'):
  30.101              err("Argument type not supported.")
  30.102 -        ssidref = label2ssidref(arg1[2][1], arg1[1][1])
  30.103 +        ssidref = label2ssidref(arg1[2][1], arg1[1][1], arg1[3][1])
  30.104          arg1 = ['ssidref', str(ssidref)]
  30.105      if arg2[0] == 'access_control':
  30.106 -        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') :
  30.107 +        if (arg2[1][0] != 'policy') or (arg2[2][0] != 'label') or (arg2[3][0] != 'type'):
  30.108              err("Argument type not supported.")
  30.109 -        ssidref = label2ssidref(arg2[2][1], arg2[1][1])
  30.110 +        ssidref = label2ssidref(arg2[2][1], arg2[1][1], arg2[3][1])
  30.111          arg2 = ['ssidref', str(ssidref)]
  30.112  
  30.113      # accept only int or string types for domid and ssidref
    31.1 --- a/tools/python/xen/xm/addlabel.py	Thu Jun 15 10:02:53 2006 -0600
    31.2 +++ b/tools/python/xen/xm/addlabel.py	Thu Jun 15 10:23:57 2006 -0600
    31.3 @@ -50,7 +50,7 @@ def main(argv):
    31.4              err("No active policy. Policy must be specified in command line.")
    31.5  
    31.6          #sanity checks: make sure this label can be instantiated later on
    31.7 -        ssidref = label2ssidref(label, policyref)
    31.8 +        ssidref = label2ssidref(label, policyref, 'dom')
    31.9  
   31.10          new_label = "access_control = ['policy=%s,label=%s']\n" % (policyref, label)
   31.11          if not os.path.isfile(configfile):
    32.1 --- a/tools/python/xen/xm/create.py	Thu Jun 15 10:02:53 2006 -0600
    32.2 +++ b/tools/python/xen/xm/create.py	Thu Jun 15 10:23:57 2006 -0600
    32.3 @@ -541,7 +541,7 @@ def configure_security(config, vals):
    32.4          if sxp.child_value(config, 'ssidref'):
    32.5              err("ERROR: SSIDREF and access_control are mutually exclusive but both specified!")
    32.6          #else calculate ssidre from label
    32.7 -        ssidref = security.label2ssidref(label, policy)
    32.8 +        ssidref = security.label2ssidref(label, policy, 'dom')
    32.9          if not ssidref :
   32.10              err("ERROR calculating ssidref from access_control.")
   32.11          security_label = ['security', [ config_access_control, ['ssidref' , ssidref ] ] ]
    33.1 --- a/tools/python/xen/xm/main.py	Thu Jun 15 10:02:53 2006 -0600
    33.2 +++ b/tools/python/xen/xm/main.py	Thu Jun 15 10:23:57 2006 -0600
    33.3 @@ -1193,6 +1193,9 @@ def main(argv=sys.argv):
    33.4              else:
    33.5                  print  >>sys.stderr, "Error: %s" % ex.faultString
    33.6              sys.exit(1)
    33.7 +        except (ValueError, OverflowError):
    33.8 +            err("Invalid argument.")
    33.9 +            usage(argv[1])
   33.10          except:
   33.11              print "Unexpected error:", sys.exc_info()[0]
   33.12              print
    34.1 --- a/tools/security/Makefile	Thu Jun 15 10:02:53 2006 -0600
    34.2 +++ b/tools/security/Makefile	Thu Jun 15 10:23:57 2006 -0600
    34.3 @@ -33,7 +33,7 @@ OBJS_XML2BIN := $(patsubst %.c,%.o,$(fil
    34.4  
    34.5  ACM_INST_TOOLS    = xensec_tool xensec_xml2bin xensec_gen
    34.6  ACM_OBJS          = $(OBJS_TOOL) $(OBJS_XML2BIN) $(OBJS_GETD)
    34.7 -ACM_SCRIPTS       = python/xensec_tools/acm_getlabel python/xensec_tools/acm_getdecision
    34.8 +ACM_SCRIPTS       = python/xensec_tools/acm_getlabel
    34.9  
   34.10  ACM_CONFIG_DIR    = /etc/xen/acm-security
   34.11  ACM_POLICY_DIR    = $(ACM_CONFIG_DIR)/policies
    35.1 --- a/tools/security/python/xensec_gen/cgi-bin/policy.cgi	Thu Jun 15 10:02:53 2006 -0600
    35.2 +++ b/tools/security/python/xensec_gen/cgi-bin/policy.cgi	Thu Jun 15 10:23:57 2006 -0600
    35.3 @@ -406,7 +406,7 @@ def parsePolicyXml( ):
    35.4  					msg = msg + 'Please validate the Policy file used.'
    35.5  					formatXmlError( msg )
    35.6  
    35.7 -					allCSMTypes[csName][1] = csMemberList
    35.8 +				allCSMTypes[csName][1] = csMemberList
    35.9  
   35.10  	if pOrder != '':
   35.11  		formPolicyOrder[1] = pOrder
    36.1 --- a/tools/security/python/xensec_tools/acm_getdecision	Thu Jun 15 10:02:53 2006 -0600
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,55 +0,0 @@
    36.4 -#!/usr/bin/env python
    36.5 -#  -*- mode: python; -*-
    36.6 -import sys
    36.7 -import traceback
    36.8 -import getopt
    36.9 -
   36.10 -# add fallback path for non-native python path installs if needed
   36.11 -sys.path.insert(-1, '/usr/lib/python')
   36.12 -sys.path.insert(-1, '/usr/lib64/python')
   36.13 -
   36.14 -from xen.util.security import ACMError, err, get_decision, active_policy
   36.15 -
   36.16 -def usage():
   36.17 -    print "Usage: acm_getdecision -i domainid --label labelname"
   36.18 -    print "  Test program illustrating the retrieval of"
   36.19 -    print "  access control decisions from Xen. At this time,"
   36.20 -    print "  only sharing (STE) policy decisions are supported."
   36.21 -    print "  Arguments are two paramters in any combination:"
   36.22 -    print "\t -i domain_id or --domid domain_id"
   36.23 -    print "\t -l labelname or --label labelname"
   36.24 -    print "  Return value:"
   36.25 -    print "\t PERMITTED if access is permitted"
   36.26 -    print "\t DENIED if access is denied"
   36.27 -    print "\t ACMError -- e.g., unknown label or domain id"
   36.28 -    err("Usage")
   36.29 -
   36.30 -try:
   36.31 -
   36.32 -    if len(sys.argv) != 5:
   36.33 -        usage()
   36.34 -
   36.35 -    decision_args = []
   36.36 -
   36.37 -    for idx in range(1, len(sys.argv), 2):
   36.38 -        if sys.argv[idx] in ['-i', '--domid']:
   36.39 -            decision_args.append(['domid', sys.argv[idx+1]])
   36.40 -        elif sys.argv[idx] in ['-l', '--label']:
   36.41 -            decision_args.append(['access_control',
   36.42 -                                  ['policy', active_policy],
   36.43 -                                  ['label', sys.argv[idx+1]]
   36.44 -                                  ])
   36.45 -        else:
   36.46 -            print "unknown argument %s" % sys.argv[idx]
   36.47 -            usage()
   36.48 -
   36.49 -    if len(decision_args) != 2:
   36.50 -        print "too many arguments"
   36.51 -        usage()
   36.52 -
   36.53 -    print get_decision(decision_args[0], decision_args[1])
   36.54 -
   36.55 -except ACMError:
   36.56 -	pass
   36.57 -except:
   36.58 -    traceback.print_exc(limit=1)
    37.1 --- a/tools/security/secpol_xml2bin.c	Thu Jun 15 10:02:53 2006 -0600
    37.2 +++ b/tools/security/secpol_xml2bin.c	Thu Jun 15 10:23:57 2006 -0600
    37.3 @@ -44,6 +44,8 @@
    37.4  
    37.5  #define DEBUG    0
    37.6  
    37.7 +#define NULL_LABEL_NAME "__NULL_LABEL__"
    37.8 +
    37.9  /* primary / secondary policy component setting */
   37.10  enum policycomponent { CHWALL, STE, NULLPOLICY }
   37.11      primary = NULLPOLICY, secondary = NULLPOLICY;
   37.12 @@ -467,7 +469,7 @@ int init_ssid_queues(void)
   37.13          return -ENOMEM;
   37.14  
   37.15      /* default chwall ssid */
   37.16 -    default_ssid_chwall->name = "DEFAULT";
   37.17 +    default_ssid_chwall->name = NULL_LABEL_NAME;
   37.18      default_ssid_chwall->num = max_chwall_ssids++;
   37.19      default_ssid_chwall->is_ref = 0;
   37.20      default_ssid_chwall->type = ANY;
   37.21 @@ -484,7 +486,7 @@ int init_ssid_queues(void)
   37.22      max_chwall_labels++;
   37.23  
   37.24      /* default ste ssid */
   37.25 -    default_ssid_ste->name = "DEFAULT";
   37.26 +    default_ssid_ste->name = NULL_LABEL_NAME;
   37.27      default_ssid_ste->num = max_ste_ssids++;
   37.28      default_ssid_ste->is_ref = 0;
   37.29      default_ssid_ste->type = ANY;
    38.1 --- a/tools/xenmon/xenbaked.c	Thu Jun 15 10:02:53 2006 -0600
    38.2 +++ b/tools/xenmon/xenbaked.c	Thu Jun 15 10:23:57 2006 -0600
    38.3 @@ -33,9 +33,6 @@
    38.4  #include <stdlib.h>
    38.5  #include <stdio.h>
    38.6  #include <sys/mman.h>
    38.7 -#include <sys/stat.h>
    38.8 -#include <sys/types.h>
    38.9 -#include <sys/ioctl.h>
   38.10  #include <fcntl.h>
   38.11  #include <unistd.h>
   38.12  #include <errno.h>
   38.13 @@ -45,7 +42,6 @@
   38.14  #include <xen/xen.h>
   38.15  #include <string.h>
   38.16  #include <sys/select.h>
   38.17 -#include <xen/linux/evtchn.h>
   38.18  
   38.19  #define PERROR(_m, _a...)                                       \
   38.20  do {                                                            \
   38.21 @@ -256,51 +252,29 @@ void log_event(int event_id)
   38.22          stat_map[0].event_count++;	// other
   38.23  }
   38.24  
   38.25 -#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
   38.26 -#define EVTCHN_DEV_MAJOR 10
   38.27 -#define EVTCHN_DEV_MINOR 201
   38.28 -
   38.29  int virq_port;
   38.30 -int eventchn_fd = -1;
   38.31 +int xce_handle = -1;
   38.32  
   38.33  /* Returns the event channel handle. */
   38.34  /* Stolen from xenstore code */
   38.35  int eventchn_init(void)
   38.36  {
   38.37 -  struct stat st;
   38.38 -  struct ioctl_evtchn_bind_virq bind;
   38.39    int rc;
   38.40    
   38.41    // to revert to old way:
   38.42    if (0)
   38.43      return -1;
   38.44    
   38.45 -  /* Make sure any existing device file links to correct device. */
   38.46 -  if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
   38.47 -      (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
   38.48 -    (void)unlink(EVTCHN_DEV_NAME);
   38.49 -  
   38.50 - reopen:
   38.51 -  eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
   38.52 -  if (eventchn_fd == -1) {
   38.53 -    if ((errno == ENOENT) &&
   38.54 -	((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
   38.55 -	(mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
   38.56 -	       makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
   38.57 -      goto reopen;
   38.58 -    return -errno;
   38.59 -  }
   38.60 -  
   38.61 -  if (eventchn_fd < 0)
   38.62 +  xce_handle = xc_evtchn_open();
   38.63 +
   38.64 +  if (xce_handle < 0)
   38.65      perror("Failed to open evtchn device");
   38.66    
   38.67 -  bind.virq = VIRQ_TBUF;
   38.68 -  rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
   38.69 -  if (rc == -1)
   38.70 +  if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
   38.71      perror("Failed to bind to domain exception virq port");
   38.72    virq_port = rc;
   38.73    
   38.74 -  return eventchn_fd;
   38.75 +  return xce_handle;
   38.76  }
   38.77  
   38.78  void wait_for_event(void)
   38.79 @@ -309,27 +283,30 @@ void wait_for_event(void)
   38.80    fd_set inset;
   38.81    evtchn_port_t port;
   38.82    struct timeval tv;
   38.83 +  int evtchn_fd;
   38.84    
   38.85 -  if (eventchn_fd < 0) {
   38.86 +  if (xce_handle < 0) {
   38.87      nanosleep(&opts.poll_sleep, NULL);
   38.88      return;
   38.89    }
   38.90  
   38.91 +  evtchn_fd = xc_evtchn_fd(xce_handle);
   38.92 +
   38.93    FD_ZERO(&inset);
   38.94 -  FD_SET(eventchn_fd, &inset);
   38.95 +  FD_SET(evtchn_fd, &inset);
   38.96    tv.tv_sec = 1;
   38.97    tv.tv_usec = 0;
   38.98    // tv = millis_to_timespec(&opts.poll_sleep);
   38.99 -  ret = select(eventchn_fd+1, &inset, NULL, NULL, &tv);
  38.100 +  ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
  38.101    
  38.102 -  if ( (ret == 1) && FD_ISSET(eventchn_fd, &inset)) {
  38.103 -    if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
  38.104 +  if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
  38.105 +    if ((port = xc_evtchn_pending(xce_handle)) == -1)
  38.106        perror("Failed to read from event fd");
  38.107      
  38.108      //    if (port == virq_port)
  38.109      //      printf("got the event I was looking for\r\n");
  38.110 -    
  38.111 -    if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
  38.112 +
  38.113 +    if (xc_evtchn_unmask(xce_handle, port) == -1)
  38.114        perror("Failed to write to event fd");
  38.115    }
  38.116  }
    39.1 --- a/tools/xenstat/libxenstat/src/xenstat.c	Thu Jun 15 10:02:53 2006 -0600
    39.2 +++ b/tools/xenstat/libxenstat/src/xenstat.c	Thu Jun 15 10:23:57 2006 -0600
    39.3 @@ -223,18 +223,20 @@ xenstat_node *xenstat_get_node(xenstat_h
    39.4  
    39.5  	num_domains = 0;
    39.6  	do {
    39.7 -		xenstat_domain *domain;
    39.8 +		xenstat_domain *domain, *tmp;
    39.9  
   39.10  		new_domains = xc_domain_getinfolist(handle->xc_handle,
   39.11  			num_domains, DOMAIN_CHUNK_SIZE, domaininfo);
   39.12  
   39.13 -		node->domains = realloc(node->domains,
   39.14 -					(num_domains + new_domains)
   39.15 -					* sizeof(xenstat_domain));
   39.16 -		if (node->domains == NULL) {
   39.17 +		tmp = realloc(node->domains,
   39.18 +			      (num_domains + new_domains)
   39.19 +			      * sizeof(xenstat_domain));
   39.20 +		if (tmp == NULL) {
   39.21 +			free(node->domains);
   39.22  			free(node);
   39.23  			return NULL;
   39.24  		}
   39.25 +		node->domains = tmp;
   39.26  
   39.27  		domain = node->domains + num_domains;
   39.28  
   39.29 @@ -582,11 +584,14 @@ static int xenstat_collect_networks(xens
   39.30  			domain->num_networks = 1;
   39.31  			domain->networks = malloc(sizeof(xenstat_network));
   39.32  		} else {
   39.33 +			struct xenstat_network *tmp;
   39.34  			domain->num_networks++;
   39.35 -			domain->networks =
   39.36 -			    realloc(domain->networks,
   39.37 -				    domain->num_networks *
   39.38 -				    sizeof(xenstat_network));
   39.39 +			tmp = realloc(domain->networks,
   39.40 +				      domain->num_networks *
   39.41 +				      sizeof(xenstat_network));
   39.42 +			if (tmp == NULL)
   39.43 +				free(domain->networks);
   39.44 +			domain->networks = tmp;
   39.45  		}
   39.46  		if (domain->networks == NULL)
   39.47  			return 0;
    40.1 --- a/tools/xenstore/fake_libxc.c	Thu Jun 15 10:02:53 2006 -0600
    40.2 +++ b/tools/xenstore/fake_libxc.c	Thu Jun 15 10:23:57 2006 -0600
    40.3 @@ -37,7 +37,7 @@ static int xs_test_pid;
    40.4  static evtchn_port_t port;
    40.5  
    40.6  /* The event channel maps to a signal, shared page to an mmapped file. */
    40.7 -void evtchn_notify(int local_port)
    40.8 +void xc_evtchn_notify(int xce_handle, int local_port)
    40.9  {
   40.10  	assert(local_port == port);
   40.11  	if (kill(xs_test_pid, SIGUSR2) != 0)
   40.12 @@ -124,7 +124,7 @@ void fake_ack_event(void)
   40.13  	signal(SIGUSR2, send_to_fd);
   40.14  }
   40.15  
   40.16 -int fake_open_eventchn(void)
   40.17 +int xc_evtchn_open(void)
   40.18  {
   40.19  	int fds[2];
   40.20  
    41.1 --- a/tools/xenstore/xenstored_core.c	Thu Jun 15 10:02:53 2006 -0600
    41.2 +++ b/tools/xenstore/xenstored_core.c	Thu Jun 15 10:23:57 2006 -0600
    41.3 @@ -54,7 +54,7 @@
    41.4  #include "hashtable.h"
    41.5  
    41.6  
    41.7 -extern int eventchn_fd; /* in xenstored_domain.c */
    41.8 +extern int xce_handle; /* in xenstored_domain.c */
    41.9  
   41.10  static bool verbose = false;
   41.11  LIST_HEAD(connections);
   41.12 @@ -353,8 +353,11 @@ static int initialize_set(fd_set *inset,
   41.13  
   41.14  	set_fd(sock,               inset, &max);
   41.15  	set_fd(ro_sock,            inset, &max);
   41.16 -	set_fd(eventchn_fd,        inset, &max);
   41.17  	set_fd(reopen_log_pipe[0], inset, &max);
   41.18 +
   41.19 +	if (xce_handle != -1)
   41.20 +		set_fd(xc_evtchn_fd(xce_handle), inset, &max);
   41.21 +
   41.22  	list_for_each_entry(i, &connections, list) {
   41.23  		if (i->domain)
   41.24  			continue;
   41.25 @@ -1769,6 +1772,7 @@ int main(int argc, char *argv[])
   41.26  	bool outputpid = false;
   41.27  	bool no_domain_init = false;
   41.28  	const char *pidfile = NULL;
   41.29 +	int evtchn_fd = -1;
   41.30  
   41.31  	while ((opt = getopt_long(argc, argv, "DE:F:HNPS:T:RLVW:", options,
   41.32  				  NULL)) != -1) {
   41.33 @@ -1907,6 +1911,9 @@ int main(int argc, char *argv[])
   41.34  	signal(SIGUSR1, stop_failtest);
   41.35  #endif
   41.36  
   41.37 +	if (xce_handle != -1)
   41.38 +		evtchn_fd = xc_evtchn_fd(xce_handle);
   41.39 +
   41.40  	/* Get ready to listen to the tools. */
   41.41  	max = initialize_set(&inset, &outset, *sock, *ro_sock);
   41.42  
   41.43 @@ -1934,7 +1941,7 @@ int main(int argc, char *argv[])
   41.44  		if (FD_ISSET(*ro_sock, &inset))
   41.45  			accept_connection(*ro_sock, false);
   41.46  
   41.47 -		if (eventchn_fd > 0 && FD_ISSET(eventchn_fd, &inset))
   41.48 +		if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
   41.49  			handle_event();
   41.50  
   41.51  		list_for_each_entry(i, &connections, list) {
    42.1 --- a/tools/xenstore/xenstored_domain.c	Thu Jun 15 10:02:53 2006 -0600
    42.2 +++ b/tools/xenstore/xenstored_domain.c	Thu Jun 15 10:23:57 2006 -0600
    42.3 @@ -18,15 +18,10 @@
    42.4  */
    42.5  
    42.6  #include <stdio.h>
    42.7 -#include <linux/ioctl.h>
    42.8 -#include <sys/ioctl.h>
    42.9  #include <sys/mman.h>
   42.10  #include <unistd.h>
   42.11  #include <stdlib.h>
   42.12  #include <stdarg.h>
   42.13 -#include <sys/types.h>
   42.14 -#include <sys/stat.h>
   42.15 -#include <fcntl.h>
   42.16  
   42.17  //#define DEBUG
   42.18  #include "utils.h"
   42.19 @@ -37,12 +32,11 @@
   42.20  #include "xenstored_test.h"
   42.21  
   42.22  #include <xenctrl.h>
   42.23 -#include <xen/sys/evtchn.h>
   42.24  
   42.25  static int *xc_handle;
   42.26  static evtchn_port_t virq_port;
   42.27  
   42.28 -int eventchn_fd = -1; 
   42.29 +int xce_handle = -1; 
   42.30  
   42.31  struct domain
   42.32  {
   42.33 @@ -83,19 +77,6 @@ struct domain
   42.34  
   42.35  static LIST_HEAD(domains);
   42.36  
   42.37 -#ifndef TESTING
   42.38 -static void evtchn_notify(int port)
   42.39 -{
   42.40 -	int rc; 
   42.41 -
   42.42 -	struct ioctl_evtchn_notify notify;
   42.43 -	notify.port = port;
   42.44 -	rc = ioctl(eventchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
   42.45 -}
   42.46 -#else
   42.47 -extern void evtchn_notify(int port);
   42.48 -#endif
   42.49 -
   42.50  /* FIXME: Mark connection as broken (close it?) when this happens. */
   42.51  static bool check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
   42.52  {
   42.53 @@ -146,7 +127,7 @@ static int writechn(struct connection *c
   42.54  	mb();
   42.55  	intf->rsp_prod += len;
   42.56  
   42.57 -	evtchn_notify(conn->domain->port);
   42.58 +	xc_evtchn_notify(xce_handle, conn->domain->port);
   42.59  
   42.60  	return len;
   42.61  }
   42.62 @@ -176,7 +157,7 @@ static int readchn(struct connection *co
   42.63  	mb();
   42.64  	intf->req_cons += len;
   42.65  
   42.66 -	evtchn_notify(conn->domain->port);
   42.67 +	xc_evtchn_notify(xce_handle, conn->domain->port);
   42.68  
   42.69  	return len;
   42.70  }
   42.71 @@ -184,13 +165,11 @@ static int readchn(struct connection *co
   42.72  static int destroy_domain(void *_domain)
   42.73  {
   42.74  	struct domain *domain = _domain;
   42.75 -	struct ioctl_evtchn_unbind unbind;
   42.76  
   42.77  	list_del(&domain->list);
   42.78  
   42.79  	if (domain->port) {
   42.80 -		unbind.port = domain->port;
   42.81 -		if (ioctl(eventchn_fd, IOCTL_EVTCHN_UNBIND, &unbind) == -1)
   42.82 +		if (xc_evtchn_unbind(xce_handle, domain->port) == -1)
   42.83  			eprintf("> Unbinding port %i failed!\n", domain->port);
   42.84  	}
   42.85  
   42.86 @@ -231,14 +210,14 @@ void handle_event(void)
   42.87  {
   42.88  	evtchn_port_t port;
   42.89  
   42.90 -	if (read(eventchn_fd, &port, sizeof(port)) != sizeof(port))
   42.91 +	if ((port = xc_evtchn_pending(xce_handle)) == -1)
   42.92  		barf_perror("Failed to read from event fd");
   42.93  
   42.94  	if (port == virq_port)
   42.95  		domain_cleanup();
   42.96  
   42.97  #ifndef TESTING
   42.98 -	if (write(eventchn_fd, &port, sizeof(port)) != sizeof(port))
   42.99 +	if (xc_evtchn_unmask(xce_handle, port) == -1)
  42.100  		barf_perror("Failed to write to event fd");
  42.101  #endif
  42.102  }
  42.103 @@ -269,7 +248,6 @@ static struct domain *new_domain(void *c
  42.104  				 int port)
  42.105  {
  42.106  	struct domain *domain;
  42.107 -	struct ioctl_evtchn_bind_interdomain bind;
  42.108  	int rc;
  42.109  
  42.110  
  42.111 @@ -283,9 +261,7 @@ static struct domain *new_domain(void *c
  42.112  	talloc_set_destructor(domain, destroy_domain);
  42.113  
  42.114  	/* Tell kernel we're interested in this event. */
  42.115 -	bind.remote_domain = domid;
  42.116 -	bind.remote_port   = port;
  42.117 -	rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
  42.118 +	rc = xc_evtchn_bind_interdomain(xce_handle, domid, port);
  42.119  	if (rc == -1)
  42.120  	    return NULL;
  42.121  	domain->port = rc;
  42.122 @@ -490,23 +466,14 @@ static int dom0_init(void)
  42.123  
  42.124  	talloc_steal(dom0->conn, dom0); 
  42.125  
  42.126 -	evtchn_notify(dom0->port); 
  42.127 +	xc_evtchn_notify(xce_handle, dom0->port); 
  42.128  
  42.129  	return 0; 
  42.130  }
  42.131  
  42.132 -
  42.133 -
  42.134 -#define EVTCHN_DEV_NAME  "/dev/xen/evtchn"
  42.135 -#define EVTCHN_DEV_MAJOR 10
  42.136 -#define EVTCHN_DEV_MINOR 201
  42.137 -
  42.138 -
  42.139  /* Returns the event channel handle. */
  42.140  int domain_init(void)
  42.141  {
  42.142 -	struct stat st;
  42.143 -	struct ioctl_evtchn_bind_virq bind;
  42.144  	int rc;
  42.145  
  42.146  	xc_handle = talloc(talloc_autofree_context(), int);
  42.147 @@ -519,39 +486,19 @@ int domain_init(void)
  42.148  
  42.149  	talloc_set_destructor(xc_handle, close_xc_handle);
  42.150  
  42.151 -#ifdef TESTING
  42.152 -	eventchn_fd = fake_open_eventchn();
  42.153 -	(void)&st;
  42.154 -#else
  42.155 -	/* Make sure any existing device file links to correct device. */
  42.156 -	if ((lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
  42.157 -	    (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)))
  42.158 -		(void)unlink(EVTCHN_DEV_NAME);
  42.159 +	xce_handle = xc_evtchn_open();
  42.160  
  42.161 - reopen:
  42.162 -	eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
  42.163 -	if (eventchn_fd == -1) {
  42.164 -		if ((errno == ENOENT) &&
  42.165 -		    ((mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST)) &&
  42.166 -		    (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
  42.167 -			   makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) == 0))
  42.168 -			goto reopen;
  42.169 -		return -errno;
  42.170 -	}
  42.171 -#endif
  42.172 -	if (eventchn_fd < 0)
  42.173 +	if (xce_handle < 0)
  42.174  		barf_perror("Failed to open evtchn device");
  42.175  
  42.176  	if (dom0_init() != 0) 
  42.177  		barf_perror("Failed to initialize dom0 state"); 
  42.178  
  42.179 -	bind.virq = VIRQ_DOM_EXC;
  42.180 -	rc = ioctl(eventchn_fd, IOCTL_EVTCHN_BIND_VIRQ, &bind);
  42.181 -	if (rc == -1)
  42.182 +	if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_DOM_EXC)) == -1)
  42.183  		barf_perror("Failed to bind to domain exception virq port");
  42.184  	virq_port = rc;
  42.185  
  42.186 -	return eventchn_fd;
  42.187 +	return xce_handle;
  42.188  }
  42.189  
  42.190  void domain_entry_inc(struct connection *conn)
    43.1 --- a/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py	Thu Jun 15 10:02:53 2006 -0600
    43.2 +++ b/tools/xm-test/tests/block-integrity/01_block_device_read_verify.py	Thu Jun 15 10:23:57 2006 -0600
    43.3 @@ -31,7 +31,7 @@ traceCommand("cat /dev/urandom > /dev/ra
    43.4  
    43.5  s, o = traceCommand("md5sum /dev/ram1")
    43.6  
    43.7 -dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
    43.8 +dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
    43.9  
   43.10  block_attach(domain, "phy:ram1", "hda1")
   43.11  
   43.12 @@ -40,7 +40,7 @@ try:
   43.13  except ConsoleError, e:
   43.14      FAIL(str(e))
   43.15  
   43.16 -domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
   43.17 +domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
   43.18  
   43.19  domain.closeConsole()
   43.20  
    44.1 --- a/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py	Thu Jun 15 10:02:53 2006 -0600
    44.2 +++ b/tools/xm-test/tests/block-integrity/02_block_device_write_verify.py	Thu Jun 15 10:23:57 2006 -0600
    44.3 @@ -37,7 +37,7 @@ try:
    44.4  except ConsoleError, e:
    44.5      FAIL(str(e))
    44.6  
    44.7 -domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"])
    44.8 +domU_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", run["output"], re.M)
    44.9  
   44.10  domain.closeConsole()
   44.11  
   44.12 @@ -45,7 +45,7 @@ domain.stop()
   44.13  
   44.14  s, o = traceCommand("md5sum /dev/ram1")
   44.15  
   44.16 -dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o)
   44.17 +dom0_md5sum_match = re.search(r"^[\dA-Fa-f]{32}", o, re.M)
   44.18  
   44.19  if domU_md5sum_match == None:
   44.20      FAIL("Failed to get md5sum of data written in domU.")
    45.1 --- a/xen/arch/x86/traps.c	Thu Jun 15 10:02:53 2006 -0600
    45.2 +++ b/xen/arch/x86/traps.c	Thu Jun 15 10:23:57 2006 -0600
    45.3 @@ -1279,7 +1279,7 @@ asmlinkage int do_general_protection(str
    45.4  static void nmi_softirq(void)
    45.5  {
    45.6      /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
    45.7 -    evtchn_notify(dom0->vcpu[0]);
    45.8 +    vcpu_kick(dom0->vcpu[0]);
    45.9  }
   45.10  
   45.11  static void nmi_dom0_report(unsigned int reason_idx)
    46.1 --- a/xen/common/event_channel.c	Thu Jun 15 10:02:53 2006 -0600
    46.2 +++ b/xen/common/event_channel.c	Thu Jun 15 10:23:57 2006 -0600
    46.3 @@ -493,10 +493,9 @@ void evtchn_set_pending(struct vcpu *v, 
    46.4  
    46.5      if ( !test_bit        (port, s->evtchn_mask) &&
    46.6           !test_and_set_bit(port / BITS_PER_LONG,
    46.7 -                           &v->vcpu_info->evtchn_pending_sel) &&
    46.8 -         !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
    46.9 +                           &v->vcpu_info->evtchn_pending_sel) )
   46.10      {
   46.11 -        evtchn_notify(v);
   46.12 +        vcpu_mark_events_pending(v);
   46.13      }
   46.14      
   46.15      /* Check if some VCPU might be polling for this event. */
   46.16 @@ -682,10 +681,9 @@ static long evtchn_unmask(evtchn_unmask_
   46.17      if ( test_and_clear_bit(port, s->evtchn_mask) &&
   46.18           test_bit          (port, s->evtchn_pending) &&
   46.19           !test_and_set_bit (port / BITS_PER_LONG,
   46.20 -                            &v->vcpu_info->evtchn_pending_sel) &&
   46.21 -         !test_and_set_bit (0, &v->vcpu_info->evtchn_upcall_pending) )
   46.22 +                            &v->vcpu_info->evtchn_pending_sel) )
   46.23      {
   46.24 -        evtchn_notify(v);
   46.25 +        vcpu_mark_events_pending(v);
   46.26      }
   46.27  
   46.28      spin_unlock(&d->evtchn_lock);
    47.1 --- a/xen/include/asm-ia64/event.h	Thu Jun 15 10:02:53 2006 -0600
    47.2 +++ b/xen/include/asm-ia64/event.h	Thu Jun 15 10:23:57 2006 -0600
    47.3 @@ -12,7 +12,7 @@
    47.4  #include <public/arch-ia64.h>
    47.5  #include <asm/vcpu.h>
    47.6  
    47.7 -static inline void evtchn_notify(struct vcpu *v)
    47.8 +static inline void vcpu_kick(struct vcpu *v)
    47.9  {
   47.10      /*
   47.11       * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
   47.12 @@ -32,6 +32,12 @@ static inline void evtchn_notify(struct 
   47.13          vcpu_pend_interrupt(v, v->domain->shared_info->arch.evtchn_vector);
   47.14  }
   47.15  
   47.16 +static inline void vcpu_mark_events_pending(struct vcpu *v)
   47.17 +{
   47.18 +    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
   47.19 +        vcpu_kick(v);
   47.20 +}
   47.21 +
   47.22  /* Note: Bitwise operations result in fast code with no branches. */
   47.23  #define event_pending(v)                        \
   47.24      (!!(v)->vcpu_info->evtchn_upcall_pending &  \
    48.1 --- a/xen/include/asm-x86/event.h	Thu Jun 15 10:02:53 2006 -0600
    48.2 +++ b/xen/include/asm-x86/event.h	Thu Jun 15 10:23:57 2006 -0600
    48.3 @@ -9,7 +9,7 @@
    48.4  #ifndef __ASM_EVENT_H__
    48.5  #define __ASM_EVENT_H__
    48.6  
    48.7 -static inline void evtchn_notify(struct vcpu *v)
    48.8 +static inline void vcpu_kick(struct vcpu *v)
    48.9  {
   48.10      /*
   48.11       * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of
   48.12 @@ -26,6 +26,12 @@ static inline void evtchn_notify(struct 
   48.13          smp_send_event_check_cpu(v->processor);
   48.14  }
   48.15  
   48.16 +static inline void vcpu_mark_events_pending(struct vcpu *v)
   48.17 +{
   48.18 +    if ( !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
   48.19 +        vcpu_kick(v);
   48.20 +}
   48.21 +
   48.22  static inline int local_events_need_delivery(void)
   48.23  {
   48.24      struct vcpu *v = current;
    49.1 --- a/xen/include/xen/elf.h	Thu Jun 15 10:02:53 2006 -0600
    49.2 +++ b/xen/include/xen/elf.h	Thu Jun 15 10:23:57 2006 -0600
    49.3 @@ -178,9 +178,9 @@ typedef struct {
    49.4  #define EM_ALPHA	41		/* DEC ALPHA */
    49.5  #define EM_SPARCV9	43		/* SPARC version 9 */
    49.6  #define EM_ALPHA_EXP	0x9026		/* DEC ALPHA */
    49.7 +#define EM_IA_64	50		/* Intel Merced */
    49.8  #define EM_X86_64	62		/* AMD x86-64 architecture */
    49.9  #define EM_VAX		75		/* DEC VAX */
   49.10 -#define EM_NUM		15		/* number of machine types */
   49.11  
   49.12  /* Version */
   49.13  #define EV_NONE		0		/* Invalid */