ia64/xen-unstable

changeset 19696:4294a04b24bc

tmem: shared ephemeral (SE) pool (clustering) fixes

Tmem can share clean page cache pages for Linux domains
in a virtual cluster (currently only the ocfs2 filesystem
has a patch on the Linux side). So when one domain
"puts" (evicts) a page, any domain in the cluster can
"get" it, thus saving disk reads. This functionality
is already present; these are only bug fixes.

- fix bugs when an SE pool is destroyed
- fixes in parsing tool for xm tmem-list output for SE pools
- incorrect locking in one case for destroying an SE pool
- clearer verbosity for transfer when an SE pool is destroyed
- minor cleanup: merge routines that are mostly duplicate

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jun 01 18:37:27 2009 +0100 (2009-06-01)
parents 027f19e97e28
children 42fe00c6f8b4
files tools/misc/xen-tmem-list-parse.c xen/common/tmem.c
line diff
     1.1 --- a/tools/misc/xen-tmem-list-parse.c	Mon Jun 01 15:52:19 2009 +0100
     1.2 +++ b/tools/misc/xen-tmem-list-parse.c	Mon Jun 01 18:37:27 2009 +0100
     1.3 @@ -29,6 +29,20 @@ unsigned long long parse(char *s,char *m
     1.4      return ret;
     1.5  }
     1.6  
     1.7 +unsigned long long parse_hex(char *s,char *match)
     1.8 +{
     1.9 +    char *s1 = strstr(s,match);
    1.10 +    unsigned long long ret;
    1.11 +
    1.12 +    if ( s1 == NULL )
    1.13 +        return 0LL;
    1.14 +    s1 += 2;
    1.15 +    if ( *s1++ != ':' )
    1.16 +        return 0LL;
    1.17 +    sscanf(s1,"%llx",&ret);
    1.18 +    return ret;
    1.19 +}
    1.20 +
    1.21  unsigned long long parse2(char *s,char *match1, char *match2)
    1.22  {
    1.23      char match[3];
    1.24 @@ -64,7 +78,7 @@ void parse_sharers(char *s, char *match,
    1.25          s1 += 2;
    1.26          if (*s1++ != ':')
    1.27              return;
    1.28 -        while (*s1 <= '0' && *s1 <= '9')
    1.29 +        while (*s1 >= '0' && *s1 <= '9')
    1.30              *b++ = *s1++;
    1.31          *b++ = ',';
    1.32          s1 = strstr(s1,match);
    1.33 @@ -196,6 +210,8 @@ void parse_pool(char *s)
    1.34      unsigned long long flush_objs = parse(s,"ot");
    1.35  
    1.36      parse_string(s,"PT",pool_type,2);
    1.37 +    if (pool_type[1] == 'S')
    1.38 +        return; /* no need to repeat print data for shared pools */
    1.39      printf("domid%lu,id%lu[%s]:pgp=%llu(max=%llu) obj=%llu(%llu) "
    1.40             "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
    1.41             "gets=%llu/%llu(%llu%%) "
    1.42 @@ -216,8 +232,8 @@ void parse_shared_pool(char *s)
    1.43      char pool_type[3];
    1.44      char buf[BUFSIZE];
    1.45      unsigned long pool_id = parse(s,"PI");
    1.46 -    unsigned long long uid0 = parse(s,"U0");
    1.47 -    unsigned long long uid1 = parse(s,"U1");
    1.48 +    unsigned long long uid0 = parse_hex(s,"U0");
    1.49 +    unsigned long long uid1 = parse_hex(s,"U1");
    1.50      unsigned long long pgp_count = parse(s,"Pc");
    1.51      unsigned long long max_pgp_count = parse(s,"Pm");
    1.52      unsigned long long obj_count = parse(s,"Oc");
    1.53 @@ -238,7 +254,7 @@ void parse_shared_pool(char *s)
    1.54  
    1.55      parse_string(s,"PT",pool_type,2);
    1.56      parse_sharers(s,"SC",buf,BUFSIZE);
    1.57 -    printf("poolid=%lu[%s] uuid=%llu.%llu, shared-by:%s: "
    1.58 +    printf("poolid=%lu[%s] uuid=%llx.%llx, shared-by:%s: "
    1.59             "pgp=%llu(max=%llu) obj=%llu(%llu) "
    1.60             "objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
    1.61             "gets=%llu/%llu(%llu%%) "
     2.1 --- a/xen/common/tmem.c	Mon Jun 01 15:52:19 2009 +0100
     2.2 +++ b/xen/common/tmem.c	Mon Jun 01 18:37:27 2009 +0100
     2.3 @@ -581,21 +581,6 @@ static NOINLINE void obj_free(obj_t *obj
     2.4      tmem_free(obj,sizeof(obj_t),pool);
     2.5  }
     2.6  
     2.7 -static NOINLINE void obj_rb_destroy_node(struct rb_node *node)
     2.8 -{
     2.9 -    obj_t * obj;
    2.10 -
    2.11 -    if ( node == NULL )
    2.12 -        return;
    2.13 -    obj_rb_destroy_node(node->rb_left);
    2.14 -    obj_rb_destroy_node(node->rb_right);
    2.15 -    obj = container_of(node, obj_t, rb_tree_node);
    2.16 -    tmem_spin_lock(&obj->obj_spinlock);
    2.17 -    ASSERT(obj->no_evict == 0);
    2.18 -    radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
    2.19 -    obj_free(obj,1);
    2.20 -}
    2.21 -
    2.22  static NOINLINE int obj_rb_insert(struct rb_root *root, obj_t *obj)
    2.23  {
    2.24      struct rb_node **new, *parent = NULL;
    2.25 @@ -650,26 +635,15 @@ static NOINLINE obj_t * obj_new(pool_t *
    2.26  }
    2.27  
    2.28  /* free an object after destroying any pgps in it */
    2.29 -static NOINLINE void obj_destroy(obj_t *obj)
    2.30 +static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance)
    2.31  {
    2.32      ASSERT_WRITELOCK(&obj->pool->pool_rwlock);
    2.33      radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
    2.34 -    obj_free(obj,0);
    2.35 +    obj_free(obj,no_rebalance);
    2.36  }
    2.37  
    2.38 -/* destroy all objects in a pool */
    2.39 -static NOINLINE void obj_rb_destroy_all(pool_t *pool)
    2.40 -{
    2.41 -    int i;
    2.42 -
    2.43 -    tmem_write_lock(&pool->pool_rwlock);
    2.44 -    for (i = 0; i < OBJ_HASH_BUCKETS; i++)
    2.45 -        obj_rb_destroy_node(pool->obj_rb_root[i].rb_node);
    2.46 -    tmem_write_unlock(&pool->pool_rwlock);
    2.47 -}
    2.48 -
    2.49 -/* destroys all objects in a pool that have last_client set to cli_id */
    2.50 -static void obj_free_selective(pool_t *pool, cli_id_t cli_id)
    2.51 +/* destroys all objs in a pool, or only if obj->last_client matches cli_id */
    2.52 +static void pool_destroy_objs(pool_t *pool, bool_t selective, cli_id_t cli_id)
    2.53  {
    2.54      struct rb_node *node;
    2.55      obj_t *obj;
    2.56 @@ -684,8 +658,11 @@ static void obj_free_selective(pool_t *p
    2.57              obj = container_of(node, obj_t, rb_tree_node);
    2.58              tmem_spin_lock(&obj->obj_spinlock);
    2.59              node = rb_next(node);
    2.60 -            if ( obj->last_client == cli_id )
    2.61 -                obj_destroy(obj);
    2.62 +            ASSERT(obj->no_evict == 0);
    2.63 +            if ( !selective )
    2.64 +                obj_destroy(obj,1);
    2.65 +            else if ( obj->last_client == cli_id )
    2.66 +                obj_destroy(obj,0);
    2.67              else
    2.68                  tmem_spin_unlock(&obj->obj_spinlock);
    2.69          }
    2.70 @@ -740,8 +717,9 @@ static int shared_pool_join(pool_t *pool
    2.71          return -1;
    2.72      sl->client = new_client;
    2.73      list_add_tail(&sl->share_list, &pool->share_list);
    2.74 -    printk("adding new %s %d to shared pool owned by %s %d\n",
    2.75 -        client_str, new_client->cli_id, client_str, pool->client->cli_id);
    2.76 +    if ( new_client->cli_id != pool->client->cli_id )
    2.77 +        printk("adding new %s %d to shared pool owned by %s %d\n",
    2.78 +            client_str, new_client->cli_id, client_str, pool->client->cli_id);
    2.79      return ++pool->shared_count;
    2.80  }
    2.81  
    2.82 @@ -766,6 +744,10 @@ static NOINLINE void shared_pool_reassig
    2.83          if (new_client->pools[poolid] == pool)
    2.84              break;
    2.85      ASSERT(poolid != MAX_POOLS_PER_DOMAIN);
    2.86 +    new_client->eph_count += _atomic_read(pool->pgp_count);
    2.87 +    old_client->eph_count -= _atomic_read(pool->pgp_count);
    2.88 +    list_splice_init(&old_client->ephemeral_page_list,
    2.89 +                     &new_client->ephemeral_page_list);
    2.90      printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
    2.91          cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, poolid);
    2.92      pool->pool_id = poolid;
    2.93 @@ -781,7 +763,8 @@ static NOINLINE int shared_pool_quit(poo
    2.94      ASSERT(is_shared(pool));
    2.95      ASSERT(pool->client != NULL);
    2.96      
    2.97 -    obj_free_selective(pool,cli_id);
    2.98 +    ASSERT_WRITELOCK(&tmem_rwlock);
    2.99 +    pool_destroy_objs(pool,1,cli_id);
   2.100      list_for_each_entry(sl,&pool->share_list, share_list)
   2.101      {
   2.102          if (sl->client->cli_id != cli_id)
   2.103 @@ -812,15 +795,15 @@ static void pool_flush(pool_t *pool, cli
   2.104      ASSERT(pool != NULL);
   2.105      if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
   2.106      {
   2.107 -        printk("tmem: unshared shared pool %d from %s=%d\n",
   2.108 -           pool->pool_id, cli_id_str,pool->client->cli_id);
   2.109 +        printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
   2.110 +           cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id);
   2.111          return;
   2.112      }
   2.113      printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing",
   2.114          is_persistent(pool) ? "persistent" : "ephemeral" ,
   2.115          is_shared(pool) ? "shared" : "private");
   2.116      printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id);
   2.117 -    obj_rb_destroy_all(pool);
   2.118 +    pool_destroy_objs(pool,0,CLI_ID_NULL);
   2.119      if ( destroy )
   2.120      {
   2.121          pool->client->pools[pool->pool_id] = NULL;
   2.122 @@ -1378,7 +1361,7 @@ static NOINLINE int do_tmem_flush_object
   2.123      if ( obj == NULL )
   2.124          goto out;
   2.125      tmem_write_lock(&pool->pool_rwlock);
   2.126 -    obj_destroy(obj);
   2.127 +    obj_destroy(obj,0);
   2.128      pool->flush_objs_found++;
   2.129      tmem_write_unlock(&pool->pool_rwlock);
   2.130  
   2.131 @@ -1455,7 +1438,7 @@ static NOINLINE int do_tmem_new_pool(uin
   2.132              {
   2.133                  if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
   2.134                  {
   2.135 -                    printk("(matches shared pool uuid=%"PRIx64".%"PRIu64") ",
   2.136 +                    printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ",
   2.137                          uuid_hi, uuid_lo);
   2.138                      printk("pool_id=%d\n",d_poolid);
   2.139                      client->pools[d_poolid] = global_shared_pools[s_poolid];
   2.140 @@ -1507,10 +1490,8 @@ static int tmemc_freeze_pools(int cli_id
   2.141      if ( cli_id == CLI_ID_NULL )
   2.142      {
   2.143          list_for_each_entry(client,&global_client_list,client_list)
   2.144 -        {
   2.145              client->frozen = freeze;
   2.146 -            printk("tmem: all pools %s for all %ss\n",s,client_str);
   2.147 -        }
   2.148 +        printk("tmem: all pools %s for all %ss\n",s,client_str);
   2.149      }
   2.150      else
   2.151      {
   2.152 @@ -1878,7 +1859,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
   2.153          }
   2.154      }
   2.155  
   2.156 -    if ( op.cmd == TMEM_NEW_POOL )
   2.157 +    if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL )
   2.158      {
   2.159          if ( !tmem_write_lock_set )
   2.160          {