ia64/xen-unstable

changeset 14058:97826d77bd4d

Use RCU for domain_list and domain_hash.

Signed-off-by: Jose Renato Santos <jsantos@hpl.hp.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Feb 21 16:13:49 2007 +0000 (2007-02-21)
parents 3c581edac93a
children af3281c66e73
files xen/acm/acm_chinesewall_hooks.c xen/acm/acm_simple_type_enforcement_hooks.c xen/arch/ia64/linux-xen/mca.c xen/arch/ia64/linux-xen/perfmon.c xen/arch/powerpc/audit.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/time.c xen/common/domain.c xen/common/domctl.c xen/common/keyhandler.c xen/common/sched_sedf.c xen/common/sysctl.c xen/include/xen/rcupdate.h xen/include/xen/sched.h
line diff
     1.1 --- a/xen/acm/acm_chinesewall_hooks.c	Wed Feb 21 14:44:09 2007 +0000
     1.2 +++ b/xen/acm/acm_chinesewall_hooks.c	Wed Feb 21 16:13:49 2007 +0000
     1.3 @@ -194,19 +194,18 @@ chwall_init_state(struct acm_chwall_poli
     1.4      int violation = 0, i, j;
     1.5      struct chwall_ssid *chwall_ssid;
     1.6      ssidref_t chwall_ssidref;
     1.7 -    struct domain **pd;
     1.8 +    struct domain *d;
     1.9  
    1.10 -    write_lock(&domlist_lock);
    1.11 +    spin_lock(&domlist_update_lock);
    1.12      /* go through all domains and adjust policy as if this domain was started now */
    1.13 -    pd = &domain_list;
    1.14 -    for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list)
    1.15 +    for_each_domain ( d )
    1.16      {
    1.17          chwall_ssid =
    1.18              GET_SSIDP(ACM_CHINESE_WALL_POLICY,
    1.19 -                      (struct acm_ssid_domain *) (*pd)->ssid);
    1.20 +                      (struct acm_ssid_domain *)d->ssid);
    1.21          chwall_ssidref = chwall_ssid->chwall_ssidref;
    1.22          traceprintk("%s: validating policy for domain %x (chwall-REF=%x).\n",
    1.23 -                    __func__, (*pd)->domain_id, chwall_ssidref);
    1.24 +                    __func__, d->domain_id, chwall_ssidref);
    1.25          /* a) adjust types ref-count for running domains */
    1.26          for (i = 0; i < chwall_buf->chwall_max_types; i++)
    1.27              running_types[i] +=
    1.28 @@ -247,7 +246,7 @@ chwall_init_state(struct acm_chwall_poli
    1.29          }
    1.30      }
    1.31   out:
    1.32 -    write_unlock(&domlist_lock);
    1.33 +    spin_unlock(&domlist_update_lock);
    1.34      return violation;
    1.35      /* returning "violation != 0" means that the currently running set of domains would
    1.36       * not be possible if the new policy had been enforced before starting them; for chinese
     2.1 --- a/xen/acm/acm_simple_type_enforcement_hooks.c	Wed Feb 21 14:44:09 2007 +0000
     2.2 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c	Wed Feb 21 16:13:49 2007 +0000
     2.3 @@ -175,36 +175,37 @@ ste_init_state(struct acm_ste_policy_buf
     2.4      int violation = 1;
     2.5      struct ste_ssid *ste_ssid, *ste_rssid;
     2.6      ssidref_t ste_ssidref, ste_rssidref;
     2.7 -    struct domain **pd, *rdom;
     2.8 +    struct domain *d, *rdom;
     2.9      domid_t rdomid;
    2.10      struct grant_entry sha_copy;
    2.11      int port, i;
    2.12  
    2.13 -    read_lock(&domlist_lock); /* go by domain? or directly by global? event/grant list */
    2.14 +    rcu_read_lock(&domlist_read_lock);
    2.15 +    /* go by domain? or directly by global? event/grant list */
    2.16      /* go through all domains and adjust policy as if this domain was started now */
    2.17 -    pd = &domain_list;
    2.18 -    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
    2.19 +    for_each_domain ( d )
    2.20 +    {
    2.21          ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
    2.22 -                             (struct acm_ssid_domain *)(*pd)->ssid);
    2.23 +                             (struct acm_ssid_domain *)d->ssid);
    2.24          ste_ssidref = ste_ssid->ste_ssidref;
    2.25          traceprintk("%s: validating policy for eventch domain %x (ste-Ref=%x).\n",
    2.26 -                    __func__, (*pd)->domain_id, ste_ssidref);
    2.27 +                    __func__, d->domain_id, ste_ssidref);
    2.28          /* a) check for event channel conflicts */
    2.29          for (port=0; port < NR_EVTCHN_BUCKETS; port++) {
    2.30 -            spin_lock(&(*pd)->evtchn_lock);
    2.31 -            if ((*pd)->evtchn[port] == NULL) {
    2.32 -                spin_unlock(&(*pd)->evtchn_lock);
    2.33 +            spin_lock(&d->evtchn_lock);
    2.34 +            if (d->evtchn[port] == NULL) {
    2.35 +                spin_unlock(&d->evtchn_lock);
    2.36                  continue;
    2.37              }
    2.38 -            if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) {
    2.39 -                rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom;
    2.40 +            if (d->evtchn[port]->state == ECS_INTERDOMAIN) {
    2.41 +                rdom = d->evtchn[port]->u.interdomain.remote_dom;
    2.42                  rdomid = rdom->domain_id;
    2.43                  /* rdom now has remote domain */
    2.44                  ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
    2.45                                        (struct acm_ssid_domain *)(rdom->ssid));
    2.46                  ste_rssidref = ste_rssid->ste_ssidref;
    2.47 -            } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) {
    2.48 -                rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid;
    2.49 +            } else if (d->evtchn[port]->state == ECS_UNBOUND) {
    2.50 +                rdomid = d->evtchn[port]->u.unbound.remote_domid;
    2.51                  if ((rdom = get_domain_by_id(rdomid)) == NULL) {
    2.52                      printk("%s: Error finding domain to id %x!\n", __func__, rdomid);
    2.53                      goto out;
    2.54 @@ -215,36 +216,36 @@ ste_init_state(struct acm_ste_policy_buf
    2.55                  ste_rssidref = ste_rssid->ste_ssidref;
    2.56                  put_domain(rdom);
    2.57              } else {
    2.58 -                spin_unlock(&(*pd)->evtchn_lock);
    2.59 +                spin_unlock(&d->evtchn_lock);
    2.60                  continue; /* port unused */
    2.61              }
    2.62 -            spin_unlock(&(*pd)->evtchn_lock);
    2.63 +            spin_unlock(&d->evtchn_lock);
    2.64  
    2.65              /* rdom now has remote domain */
    2.66              ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
    2.67                                    (struct acm_ssid_domain *)(rdom->ssid));
    2.68              ste_rssidref = ste_rssid->ste_ssidref;
    2.69              traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x (rssidref %x) used (port %x).\n", 
    2.70 -                        __func__, (*pd)->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);  
    2.71 +                        __func__, d->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);  
    2.72              /* check whether on subj->ssid, obj->ssid share a common type*/
    2.73              if (!have_common_type(ste_ssidref, ste_rssidref)) {
    2.74                  printkd("%s: Policy violation in event channel domain %x -> domain %x.\n",
    2.75 -                        __func__, (*pd)->domain_id, rdomid);
    2.76 +                        __func__, d->domain_id, rdomid);
    2.77                  goto out;
    2.78              }
    2.79          } 
    2.80          /* b) check for grant table conflicts on shared pages */
    2.81 -        spin_lock(&(*pd)->grant_table->lock);
    2.82 -        for ( i = 0; i < nr_grant_entries((*pd)->grant_table); i++ ) {
    2.83 +        spin_lock(&d->grant_table->lock);
    2.84 +        for ( i = 0; i < nr_grant_entries(d->grant_table); i++ ) {
    2.85  #define SPP (PAGE_SIZE / sizeof(struct grant_entry))
    2.86 -            sha_copy = (*pd)->grant_table->shared[i/SPP][i%SPP];
    2.87 +            sha_copy = d->grant_table->shared[i/SPP][i%SPP];
    2.88              if ( sha_copy.flags ) {
    2.89                  printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
    2.90 -                        __func__, (*pd)->domain_id, i, sha_copy.flags, sha_copy.domid, 
    2.91 +                        __func__, d->domain_id, i, sha_copy.flags, sha_copy.domid, 
    2.92                          (unsigned long)sha_copy.frame);
    2.93                  rdomid = sha_copy.domid;
    2.94                  if ((rdom = get_domain_by_id(rdomid)) == NULL) {
    2.95 -                    spin_unlock(&(*pd)->grant_table->lock);
    2.96 +                    spin_unlock(&d->grant_table->lock);
    2.97                      printkd("%s: domain not found ERROR!\n", __func__);
    2.98                      goto out;
    2.99                  };
   2.100 @@ -254,18 +255,18 @@ ste_init_state(struct acm_ste_policy_buf
   2.101                  ste_rssidref = ste_rssid->ste_ssidref;
   2.102                  put_domain(rdom);
   2.103                  if (!have_common_type(ste_ssidref, ste_rssidref)) {
   2.104 -                    spin_unlock(&(*pd)->grant_table->lock);
   2.105 +                    spin_unlock(&d->grant_table->lock);
   2.106                      printkd("%s: Policy violation in grant table sharing domain %x -> domain %x.\n",
   2.107 -                            __func__, (*pd)->domain_id, rdomid);
   2.108 +                            __func__, d->domain_id, rdomid);
   2.109                      goto out;
   2.110                  }
   2.111              }
   2.112          }
   2.113 -        spin_unlock(&(*pd)->grant_table->lock);
   2.114 +        spin_unlock(&d->grant_table->lock);
   2.115      }
   2.116      violation = 0;
   2.117   out:
   2.118 -    read_unlock(&domlist_lock);
   2.119 +    rcu_read_unlock(&domlist_read_lock);
   2.120      return violation;
   2.121      /* returning "violation != 0" means that existing sharing between domains would not 
   2.122       * have been allowed if the new policy had been enforced before the sharing; for ste, 
   2.123 @@ -281,7 +282,7 @@ ste_set_policy(u8 *buf, u32 buf_size)
   2.124      struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer *)buf;
   2.125      void *ssidrefsbuf;
   2.126      struct ste_ssid *ste_ssid;
   2.127 -    struct domain **pd;
   2.128 +    struct domain *d;
   2.129      int i;
   2.130  
   2.131      if (buf_size < sizeof(struct acm_ste_policy_buffer))
   2.132 @@ -326,15 +327,14 @@ ste_set_policy(u8 *buf, u32 buf_size)
   2.133      ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf;
   2.134  
   2.135      /* clear all ste caches */
   2.136 -    read_lock(&domlist_lock);
   2.137 -    pd = &domain_list;
   2.138 -    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
   2.139 +    rcu_read_lock(&domlist_read_lock);
   2.140 +    for_each_domain ( d ) {
   2.141          ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
   2.142 -                             (struct acm_ssid_domain *)(*pd)->ssid);
   2.143 +                             (struct acm_ssid_domain *)(d)->ssid);
   2.144          for (i=0; i<ACM_TE_CACHE_SIZE; i++)
   2.145              ste_ssid->ste_cache[i].valid = ACM_STE_free;
   2.146      }
   2.147 -    read_unlock(&domlist_lock);
   2.148 +    rcu_read_unlock(&domlist_read_lock);
   2.149      return ACM_OK;
   2.150  
   2.151   error_free:
   2.152 @@ -436,14 +436,14 @@ clean_id_from_cache(domid_t id)
   2.153  {
   2.154      struct ste_ssid *ste_ssid;
   2.155      int i;
   2.156 -    struct domain **pd;
   2.157 +    struct domain *d;
   2.158      struct acm_ssid_domain *ssid;
   2.159  
   2.160      printkd("deleting cache for dom %x.\n", id);
   2.161 -    read_lock(&domlist_lock); /* look through caches of all domains */
   2.162 -    pd = &domain_list;
   2.163 -    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
   2.164 -        ssid = (struct acm_ssid_domain *)((*pd)->ssid);
   2.165 +    rcu_read_lock(&domlist_read_lock);
   2.166 +    /* look through caches of all domains */
   2.167 +    for_each_domain ( d ) {
   2.168 +        ssid = (struct acm_ssid_domain *)(d->ssid);
   2.169  
   2.170          if (ssid == NULL)
   2.171              continue; /* hanging domain structure, no ssid any more ... */
   2.172 @@ -459,7 +459,7 @@ clean_id_from_cache(domid_t id)
   2.173                  ste_ssid->ste_cache[i].valid = ACM_STE_free;
   2.174      }
   2.175   out:
   2.176 -    read_unlock(&domlist_lock);
   2.177 +    rcu_read_unlock(&domlist_read_lock);
   2.178  }
   2.179  
   2.180  /***************************
     3.1 --- a/xen/arch/ia64/linux-xen/mca.c	Wed Feb 21 14:44:09 2007 +0000
     3.2 +++ b/xen/arch/ia64/linux-xen/mca.c	Wed Feb 21 16:13:49 2007 +0000
     3.3 @@ -790,6 +790,7 @@ init_handler_platform (pal_min_state_are
     3.4  			/* this route is for dump routine */
     3.5  			unw_init_running(try_crashdump, pt);
     3.6  		} else {
     3.7 +			rcu_read_lock(&domlist_read_lock);
     3.8  			for_each_domain(d) {
     3.9  				for_each_vcpu(d, v) {
    3.10  					printk("Backtrace of current vcpu "
    3.11 @@ -798,6 +799,7 @@ init_handler_platform (pal_min_state_are
    3.12  					show_stack(v, NULL);
    3.13  				}
    3.14  			}
    3.15 +			rcu_read_unlock(&domlist_read_lock);
    3.16  		}
    3.17  	}
    3.18  	unw_init_running(freeze_cpu_osinit, NULL);
     4.1 --- a/xen/arch/ia64/linux-xen/perfmon.c	Wed Feb 21 14:44:09 2007 +0000
     4.2 +++ b/xen/arch/ia64/linux-xen/perfmon.c	Wed Feb 21 16:13:49 2007 +0000
     4.3 @@ -7225,7 +7225,6 @@ DEFINE_PER_CPU(pfm_context_t*, xenpfm_co
     4.4  /*
     4.5   * note: some functions mask interrupt with this lock held
     4.6   * so that this lock can't be locked from interrupt handler.
     4.7 - * lock order domlist_lock => xenpfm_context_lock
     4.8   */
     4.9  DEFINE_SPINLOCK(xenpfm_context_lock);
    4.10  
    4.11 @@ -7507,10 +7506,8 @@ xenpfm_context_unload(void)
    4.12  		arg.error[cpu] = 0;
    4.13  
    4.14  	BUG_ON(in_irq());
    4.15 -	read_lock(&domlist_lock);
    4.16  	spin_lock(&xenpfm_context_lock);
    4.17  	error = xenpfm_start_stop_locked(0);
    4.18 -	read_unlock(&domlist_lock);
    4.19  	if (error) {
    4.20  		spin_unlock(&xenpfm_context_lock);
    4.21  		return error;
    4.22 @@ -7688,10 +7685,11 @@ xenpfm_start_stop_locked(int is_start)
    4.23  	while (atomic_read(&arg.started) != cpus)
    4.24  		cpu_relax();
    4.25  
    4.26 -	for_each_domain(d) {
    4.27 +	rcu_read_lock(&domlist_read_lock);
    4.28 +	for_each_domain(d)
    4.29  		for_each_vcpu(d, v)
    4.30  			xenpfm_start_stop_vcpu(v, is_start);
    4.31 -	}
    4.32 +	rcu_read_unlock(&domlist_read_lock);
    4.33  
    4.34  	arg.error[smp_processor_id()] = __xenpfm_start_stop(is_start);
    4.35  	atomic_inc(&arg.finished);
    4.36 @@ -7716,11 +7714,9 @@ xenpfm_start_stop(int is_start)
    4.37  	int error;
    4.38  	
    4.39  	BUG_ON(in_irq());
    4.40 -	read_lock(&domlist_lock);
    4.41  	spin_lock(&xenpfm_context_lock);
    4.42 -	error =xenpfm_start_stop_locked(is_start);
    4.43 +	error = xenpfm_start_stop_locked(is_start);
    4.44  	spin_unlock(&xenpfm_context_lock);
    4.45 -	read_unlock(&domlist_lock);
    4.46  
    4.47  	return error;
    4.48  }
     5.1 --- a/xen/arch/powerpc/audit.c	Wed Feb 21 14:44:09 2007 +0000
     5.2 +++ b/xen/arch/powerpc/audit.c	Wed Feb 21 16:13:49 2007 +0000
     5.3 @@ -34,8 +34,10 @@ void audit_domain(struct domain *d)
     5.4  void audit_domains(void)
     5.5  {
     5.6      struct domain *d;
     5.7 +    rcu_read_lock(&domlist_read_lock);
     5.8      for_each_domain ( d )
     5.9          audit_domain(d);
    5.10 +    rcu_read_unlock(&domlist_read_lock);
    5.11  }
    5.12  
    5.13  void audit_domains_key(unsigned char key)
     6.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Wed Feb 21 14:44:09 2007 +0000
     6.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Wed Feb 21 16:13:49 2007 +0000
     6.3 @@ -330,6 +330,9 @@ static void vmcb_dump(unsigned char ch)
     6.4      struct vcpu *v;
     6.5      
     6.6      printk("*********** VMCB Areas **************\n");
     6.7 +
     6.8 +    rcu_read_lock(&domlist_read_lock);
     6.9 +
    6.10      for_each_domain ( d )
    6.11      {
    6.12          if ( !is_hvm_domain(d) )
    6.13 @@ -342,6 +345,8 @@ static void vmcb_dump(unsigned char ch)
    6.14          }
    6.15      }
    6.16  
    6.17 +    rcu_read_unlock(&domlist_read_lock);
    6.18 +
    6.19      printk("**************************************\n");
    6.20  }
    6.21  
     7.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Wed Feb 21 14:44:09 2007 +0000
     7.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Wed Feb 21 16:13:49 2007 +0000
     7.3 @@ -567,6 +567,9 @@ static void vmcs_dump(unsigned char ch)
     7.4      struct vcpu *v;
     7.5      
     7.6      printk("*********** VMCS Areas **************\n");
     7.7 +
     7.8 +    rcu_read_lock(&domlist_read_lock);
     7.9 +
    7.10      for_each_domain ( d )
    7.11      {
    7.12          if ( !is_hvm_domain(d) )
    7.13 @@ -581,6 +584,8 @@ static void vmcs_dump(unsigned char ch)
    7.14          }
    7.15      }
    7.16  
    7.17 +    rcu_read_unlock(&domlist_read_lock);
    7.18 +
    7.19      printk("**************************************\n");
    7.20  }
    7.21  
     8.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Feb 21 14:44:09 2007 +0000
     8.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Feb 21 16:13:49 2007 +0000
     8.3 @@ -890,13 +890,17 @@ static void shadow_blow_all_tables(unsig
     8.4  {
     8.5      struct domain *d;
     8.6      printk("'%c' pressed -> blowing all shadow tables\n", c);
     8.7 +    rcu_read_lock(&domlist_read_lock);
     8.8      for_each_domain(d)
     8.9 +    {
    8.10          if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
    8.11          {
    8.12              shadow_lock(d);
    8.13              shadow_blow_tables(d);
    8.14              shadow_unlock(d);
    8.15          }
    8.16 +    }
    8.17 +    rcu_read_unlock(&domlist_read_lock);
    8.18  }
    8.19  
    8.20  /* Register this function in the Xen console keypress table */
     9.1 --- a/xen/arch/x86/time.c	Wed Feb 21 14:44:09 2007 +0000
     9.2 +++ b/xen/arch/x86/time.c	Wed Feb 21 16:13:49 2007 +0000
     9.3 @@ -720,10 +720,10 @@ void do_settime(unsigned long secs, unsi
     9.4      wc_nsec = _wc_nsec = (u32)y;
     9.5      spin_unlock(&wc_lock);
     9.6  
     9.7 -    read_lock(&domlist_lock);
     9.8 +    rcu_read_lock(&domlist_read_lock);
     9.9      for_each_domain ( d )
    9.10          update_domain_wallclock_time(d);
    9.11 -    read_unlock(&domlist_lock);
    9.12 +    rcu_read_unlock(&domlist_read_lock);
    9.13  }
    9.14  
    9.15  static void local_time_calibration(void *unused)
    10.1 --- a/xen/common/domain.c	Wed Feb 21 14:44:09 2007 +0000
    10.2 +++ b/xen/common/domain.c	Wed Feb 21 16:13:49 2007 +0000
    10.3 @@ -24,13 +24,18 @@
    10.4  #include <xen/shutdown.h>
    10.5  #include <xen/percpu.h>
    10.6  #include <xen/multicall.h>
    10.7 +#include <xen/rcupdate.h>
    10.8  #include <asm/debugger.h>
    10.9  #include <public/sched.h>
   10.10  #include <public/vcpu.h>
   10.11  
   10.12 -/* Both these structures are protected by the domlist_lock. */
   10.13 -DEFINE_RWLOCK(domlist_lock);
   10.14 -struct domain *domain_hash[DOMAIN_HASH_SIZE];
   10.15 +/* Protect updates/reads (resp.) of domain_list and domain_hash. */
   10.16 +DEFINE_SPINLOCK(domlist_update_lock);
   10.17 +DEFINE_RCU_READ_LOCK(domlist_read_lock);
   10.18 +
   10.19 +#define DOMAIN_HASH_SIZE 256
   10.20 +#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
   10.21 +static struct domain *domain_hash[DOMAIN_HASH_SIZE];
   10.22  struct domain *domain_list;
   10.23  
   10.24  struct domain *dom0;
   10.25 @@ -174,16 +179,20 @@ struct domain *domain_create(domid_t dom
   10.26  
   10.27      if ( !is_idle_domain(d) )
   10.28      {
   10.29 -        write_lock(&domlist_lock);
   10.30 +        spin_lock(&domlist_update_lock);
   10.31          pd = &domain_list; /* NB. domain_list maintained in order of domid. */
   10.32          for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
   10.33              if ( (*pd)->domain_id > d->domain_id )
   10.34                  break;
   10.35          d->next_in_list = *pd;
   10.36 -        *pd = d;
   10.37          d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
   10.38 -        domain_hash[DOMAIN_HASH(domid)] = d;
   10.39 -        write_unlock(&domlist_lock);
   10.40 +        /* Two rcu assignments are not atomic 
   10.41 +         * Readers may see inconsistent domlist and hash table
   10.42 +         * That is OK as long as each RCU reader-side critical section uses
   10.43 +         * only one or them  */
   10.44 +        rcu_assign_pointer(*pd, d);
   10.45 +        rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
   10.46 +        spin_unlock(&domlist_update_lock);
   10.47      }
   10.48  
   10.49      return d;
   10.50 @@ -207,8 +216,8 @@ struct domain *get_domain_by_id(domid_t 
   10.51  {
   10.52      struct domain *d;
   10.53  
   10.54 -    read_lock(&domlist_lock);
   10.55 -    d = domain_hash[DOMAIN_HASH(dom)];
   10.56 +    rcu_read_lock(&domlist_read_lock);
   10.57 +    d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
   10.58      while ( d != NULL )
   10.59      {
   10.60          if ( d->domain_id == dom )
   10.61 @@ -217,9 +226,9 @@ struct domain *get_domain_by_id(domid_t 
   10.62                  d = NULL;
   10.63              break;
   10.64          }
   10.65 -        d = d->next_in_hashbucket;
   10.66 +        d = rcu_dereference(d->next_in_hashbucket);
   10.67      }
   10.68 -    read_unlock(&domlist_lock);
   10.69 +    rcu_read_unlock(&domlist_read_lock);
   10.70  
   10.71      return d;
   10.72  }
   10.73 @@ -314,6 +323,23 @@ void domain_pause_for_debugger(void)
   10.74      send_guest_global_virq(dom0, VIRQ_DEBUGGER);
   10.75  }
   10.76  
   10.77 +/* Complete domain destroy after RCU readers are not holding 
   10.78 +   old references */
   10.79 +static void complete_domain_destroy(struct rcu_head *head)
   10.80 +{
   10.81 +    struct domain *d = container_of(head, struct domain, rcu);
   10.82 +
   10.83 +    rangeset_domain_destroy(d);
   10.84 +
   10.85 +    evtchn_destroy(d);
   10.86 +    grant_table_destroy(d);
   10.87 +
   10.88 +    arch_domain_destroy(d);
   10.89 +
   10.90 +    free_domain(d);
   10.91 +
   10.92 +    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
   10.93 +}
   10.94  
   10.95  /* Release resources belonging to task @p. */
   10.96  void domain_destroy(struct domain *d)
   10.97 @@ -331,27 +357,19 @@ void domain_destroy(struct domain *d)
   10.98          return;
   10.99  
  10.100      /* Delete from task list and task hashtable. */
  10.101 -    write_lock(&domlist_lock);
  10.102 +    spin_lock(&domlist_update_lock);
  10.103      pd = &domain_list;
  10.104      while ( *pd != d ) 
  10.105          pd = &(*pd)->next_in_list;
  10.106 -    *pd = d->next_in_list;
  10.107 +    rcu_assign_pointer(*pd, d->next_in_list);
  10.108      pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
  10.109      while ( *pd != d ) 
  10.110          pd = &(*pd)->next_in_hashbucket;
  10.111 -    *pd = d->next_in_hashbucket;
  10.112 -    write_unlock(&domlist_lock);
  10.113 -
  10.114 -    rangeset_domain_destroy(d);
  10.115 +    rcu_assign_pointer(*pd, d->next_in_hashbucket);
  10.116 +    spin_unlock(&domlist_update_lock);
  10.117  
  10.118 -    evtchn_destroy(d);
  10.119 -    grant_table_destroy(d);
  10.120 -
  10.121 -    arch_domain_destroy(d);
  10.122 -
  10.123 -    free_domain(d);
  10.124 -
  10.125 -    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
  10.126 +    /* schedule RCU asynchronous completion of domain destroy */
  10.127 +    call_rcu(&d->rcu, complete_domain_destroy);
  10.128  }
  10.129  
  10.130  static void vcpu_pause_setup(struct vcpu *v)
    11.1 --- a/xen/common/domctl.c	Wed Feb 21 14:44:09 2007 +0000
    11.2 +++ b/xen/common/domctl.c	Wed Feb 21 16:13:49 2007 +0000
    11.3 @@ -17,6 +17,7 @@
    11.4  #include <xen/trace.h>
    11.5  #include <xen/console.h>
    11.6  #include <xen/iocap.h>
    11.7 +#include <xen/rcupdate.h>
    11.8  #include <xen/guest_access.h>
    11.9  #include <xen/bitmap.h>
   11.10  #include <asm/current.h>
   11.11 @@ -140,12 +141,12 @@ static unsigned int default_vcpu0_locati
   11.12      cpumask_t      cpu_exclude_map;
   11.13  
   11.14      /* Do an initial CPU placement. Pick the least-populated CPU. */
   11.15 -    read_lock(&domlist_lock);
   11.16 +    rcu_read_lock(&domlist_read_lock);
   11.17      for_each_domain ( d )
   11.18          for_each_vcpu ( d, v )
   11.19          if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
   11.20              cnt[v->processor]++;
   11.21 -    read_unlock(&domlist_lock);
   11.22 +    rcu_read_unlock(&domlist_read_lock);
   11.23  
   11.24      /*
   11.25       * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
   11.26 @@ -480,7 +481,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   11.27          if ( dom == DOMID_SELF )
   11.28              dom = current->domain->domain_id;
   11.29  
   11.30 -        read_lock(&domlist_lock);
   11.31 +        rcu_read_lock(&domlist_read_lock);
   11.32  
   11.33          for_each_domain ( d )
   11.34          {
   11.35 @@ -490,12 +491,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   11.36  
   11.37          if ( (d == NULL) || !get_domain(d) )
   11.38          {
   11.39 -            read_unlock(&domlist_lock);
   11.40 +            rcu_read_unlock(&domlist_read_lock);
   11.41              ret = -ESRCH;
   11.42              break;
   11.43          }
   11.44  
   11.45 -        read_unlock(&domlist_lock);
   11.46 +        rcu_read_unlock(&domlist_read_lock);
   11.47  
   11.48          getdomaininfo(d, &op->u.getdomaininfo);
   11.49  
    12.1 --- a/xen/common/keyhandler.c	Wed Feb 21 14:44:09 2007 +0000
    12.2 +++ b/xen/common/keyhandler.c	Wed Feb 21 16:13:49 2007 +0000
    12.3 @@ -145,7 +145,7 @@ static void dump_domains(unsigned char k
    12.4      printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
    12.5             (u32)(now>>32), (u32)now);
    12.6  
    12.7 -    read_lock(&domlist_lock);
    12.8 +    rcu_read_lock(&domlist_read_lock);
    12.9  
   12.10      for_each_domain ( d )
   12.11      {
   12.12 @@ -196,7 +196,7 @@ static void dump_domains(unsigned char k
   12.13          }
   12.14      }
   12.15  
   12.16 -    read_unlock(&domlist_lock);
   12.17 +    rcu_read_unlock(&domlist_read_lock);
   12.18  }
   12.19  
   12.20  static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
    13.1 --- a/xen/common/sched_sedf.c	Wed Feb 21 14:44:09 2007 +0000
    13.2 +++ b/xen/common/sched_sedf.c	Wed Feb 21 16:13:49 2007 +0000
    13.3 @@ -1277,6 +1277,7 @@ static void sedf_dump_cpu_state(int i)
    13.4      loop = 0;
    13.5      printk("\nnot on Q\n");
    13.6  
    13.7 +    rcu_read_lock(&domlist_read_lock);
    13.8      for_each_domain ( d )
    13.9      {
   13.10          for_each_vcpu(d, ed)
   13.11 @@ -1288,6 +1289,7 @@ static void sedf_dump_cpu_state(int i)
   13.12              }
   13.13          }
   13.14      }
   13.15 +    rcu_read_unlock(&domlist_read_lock);
   13.16  }
   13.17  
   13.18  
   13.19 @@ -1298,8 +1300,9 @@ static int sedf_adjust_weights(struct xe
   13.20      struct domain      *d;
   13.21      int                 sumw[NR_CPUS] = { 0 };
   13.22      s_time_t            sumt[NR_CPUS] = { 0 };
   13.23 - 
   13.24 +
   13.25      /* Sum across all weights. */
   13.26 +    rcu_read_lock(&domlist_read_lock);
   13.27      for_each_domain( d )
   13.28      {
   13.29          for_each_vcpu( d, p )
   13.30 @@ -1323,8 +1326,10 @@ static int sedf_adjust_weights(struct xe
   13.31              }
   13.32          }
   13.33      }
   13.34 +    rcu_read_unlock(&domlist_read_lock);
   13.35  
   13.36      /* Adjust all slices (and periods) to the new weight. */
   13.37 +    rcu_read_lock(&domlist_read_lock);
   13.38      for_each_domain( d )
   13.39      {
   13.40          for_each_vcpu ( d, p )
   13.41 @@ -1341,6 +1346,7 @@ static int sedf_adjust_weights(struct xe
   13.42              }
   13.43          }
   13.44      }
   13.45 +    rcu_read_unlock(&domlist_read_lock);
   13.46  
   13.47      return 0;
   13.48  }
    14.1 --- a/xen/common/sysctl.c	Wed Feb 21 14:44:09 2007 +0000
    14.2 +++ b/xen/common/sysctl.c	Wed Feb 21 16:13:49 2007 +0000
    14.3 @@ -78,7 +78,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
    14.4          struct xen_domctl_getdomaininfo info;
    14.5          u32 num_domains = 0;
    14.6  
    14.7 -        read_lock(&domlist_lock);
    14.8 +        rcu_read_lock(&domlist_read_lock);
    14.9  
   14.10          for_each_domain ( d )
   14.11          {
   14.12 @@ -106,7 +106,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   14.13              num_domains++;
   14.14          }
   14.15          
   14.16 -        read_unlock(&domlist_lock);
   14.17 +        rcu_read_unlock(&domlist_read_lock);
   14.18          
   14.19          if ( ret != 0 )
   14.20              break;
    15.1 --- a/xen/include/xen/rcupdate.h	Wed Feb 21 14:44:09 2007 +0000
    15.2 +++ b/xen/include/xen/rcupdate.h	Wed Feb 21 16:13:49 2007 +0000
    15.3 @@ -111,6 +111,59 @@ extern struct rcu_ctrlblk rcu_ctrlblk;
    15.4  int rcu_pending(int cpu);
    15.5  int rcu_needs_cpu(int cpu);
    15.6  
    15.7 +/*
    15.8 + * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
    15.9 + * only to document the reason for rcu_read_lock() critical sections.
   15.10 + */
   15.11 +struct _rcu_read_lock {};
   15.12 +typedef struct _rcu_read_lock rcu_read_lock_t;
   15.13 +#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
   15.14 +
   15.15 +/**
   15.16 + * rcu_read_lock - mark the beginning of an RCU read-side critical section.
   15.17 + *
   15.18 + * When call_rcu() is invoked
   15.19 + * on one CPU while other CPUs are within RCU read-side critical
   15.20 + * sections, invocation of the corresponding RCU callback is deferred
   15.21 + * until after the all the other CPUs exit their critical sections.
   15.22 + *
   15.23 + * Note, however, that RCU callbacks are permitted to run concurrently
   15.24 + * with RCU read-side critical sections.  One way that this can happen
   15.25 + * is via the following sequence of events: (1) CPU 0 enters an RCU
   15.26 + * read-side critical section, (2) CPU 1 invokes call_rcu() to register
   15.27 + * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
   15.28 + * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
   15.29 + * callback is invoked.  This is legal, because the RCU read-side critical
   15.30 + * section that was running concurrently with the call_rcu() (and which
   15.31 + * therefore might be referencing something that the corresponding RCU
   15.32 + * callback would free up) has completed before the corresponding
   15.33 + * RCU callback is invoked.
   15.34 + *
   15.35 + * RCU read-side critical sections may be nested.  Any deferred actions
   15.36 + * will be deferred until the outermost RCU read-side critical section
   15.37 + * completes.
   15.38 + *
   15.39 + * It is illegal to block while in an RCU read-side critical section.
   15.40 + */
   15.41 +#define rcu_read_lock(x)       do { } while (0)
   15.42 +
   15.43 +/**
   15.44 + * rcu_read_unlock - marks the end of an RCU read-side critical section.
   15.45 + *
   15.46 + * See rcu_read_lock() for more information.
   15.47 + */
   15.48 +#define rcu_read_unlock(x)     do { } while (0)
   15.49 +
   15.50 +/*
   15.51 + * So where is rcu_write_lock()?  It does not exist, as there is no
   15.52 + * way for writers to lock out RCU readers.  This is a feature, not
   15.53 + * a bug -- this property is what provides RCU's performance benefits.
   15.54 + * Of course, writers must coordinate with each other.  The normal
   15.55 + * spinlock primitives work well for this, but any other technique may be
   15.56 + * used as well.  RCU does not care how the writers keep out of each
   15.57 + * others' way, as long as they do so.
   15.58 + */
   15.59 +
   15.60  /**
   15.61   * rcu_dereference - fetch an RCU-protected pointer in an
   15.62   * RCU read-side critical section.  This pointer may later
    16.1 --- a/xen/include/xen/sched.h	Wed Feb 21 14:44:09 2007 +0000
    16.2 +++ b/xen/include/xen/sched.h	Wed Feb 21 16:13:49 2007 +0000
    16.3 @@ -16,6 +16,7 @@
    16.4  #include <xen/rangeset.h>
    16.5  #include <asm/domain.h>
    16.6  #include <xen/xenoprof.h>
    16.7 +#include <xen/rcupdate.h>
    16.8  #include <xen/irq.h>
    16.9  
   16.10  #ifdef CONFIG_COMPAT
   16.11 @@ -24,7 +25,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_in
   16.12  #endif
   16.13  
   16.14  extern unsigned long volatile jiffies;
   16.15 -extern rwlock_t domlist_lock;
   16.16  
   16.17  /* A global pointer to the initial domain (DOM0). */
   16.18  extern struct domain *dom0;
   16.19 @@ -193,6 +193,8 @@ struct domain
   16.20      /* OProfile support. */
   16.21      struct xenoprof *xenoprof;
   16.22      int32_t time_offset_seconds;
   16.23 +
   16.24 +    struct rcu_head rcu;
   16.25  };
   16.26  
   16.27  struct domain_setup_info
   16.28 @@ -356,16 +358,17 @@ unsigned long hypercall_create_continuat
   16.29          local_events_need_delivery()            \
   16.30      ))
   16.31  
   16.32 -/* This domain_hash and domain_list are protected by the domlist_lock. */
   16.33 -#define DOMAIN_HASH_SIZE 256
   16.34 -#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
   16.35 -extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
   16.36 +/* Protect updates/reads (resp.) of domain_list and domain_hash. */
   16.37 +extern spinlock_t domlist_update_lock;
   16.38 +extern rcu_read_lock_t domlist_read_lock;
   16.39 +
   16.40  extern struct domain *domain_list;
   16.41  
   16.42 +/* Caller must hold the domlist_read_lock or domlist_update_lock. */
   16.43  #define for_each_domain(_d)                     \
   16.44 - for ( (_d) = domain_list;                      \
   16.45 + for ( (_d) = rcu_dereference(domain_list);     \
   16.46         (_d) != NULL;                            \
   16.47 -       (_d) = (_d)->next_in_list )
   16.48 +       (_d) = rcu_dereference((_d)->next_in_list )) \
   16.49  
   16.50  #define for_each_vcpu(_d,_v)                    \
   16.51   for ( (_v) = (_d)->vcpu[0];                    \