the monitoring service. When the domain doesn't need to be monitored any more,
detach the domain id from the monitoring service.
+Intel Broadwell and later server platforms also offer total/local memory
+bandwidth monitoring. Xen supports per-domain monitoring for these two
+additional monitoring types. Both memory bandwidth monitoring and L3 cache
+occupancy monitoring share the same set of underlying monitoring service. Once
+a domain is attached to the monitoring service, monitoring data can be showed
+for any of these monitoring types.
+
=over 4
=item B<psr-cmt-attach> [I<domain-id>]
Show monitoring data for a certain domain or all domains. Current supported
monitor types are:
- - "cache-occupancy": showing the L3 cache occupancy.
+ - "cache-occupancy": showing the L3 cache occupancy(KB).
+ - "total-mem-bandwidth": showing the total memory bandwidth(KB/s).
+ - "local-mem-bandwidth": showing the local memory bandwidth(KB/s).
=back
L3 cache occupancy.
* `cmt` instructs Xen to enable/disable Cache Monitoring Technology.
* `rmid_max` indicates the max value for rmid.
+* Memory Bandwidth Monitoring (Broadwell and later). Information regarding the
+ total/local memory bandwidth. Follow the same options with Cache Monitoring
+ Technology.
### reboot
> `= t[riple] | k[bd] | a[cpi] | p[ci] | n[o] [, [w]arm | [c]old]`
#if defined(__i386__) || defined(__x86_64__)
enum xc_psr_cmt_type {
XC_PSR_CMT_L3_OCCUPANCY,
+ XC_PSR_CMT_TOTAL_MEM_COUNT,
+ XC_PSR_CMT_LOCAL_MEM_COUNT,
};
typedef enum xc_psr_cmt_type xc_psr_cmt_type;
int xc_psr_cmt_attach(xc_interface *xch, uint32_t domid);
int xc_psr_cmt_get_total_rmid(xc_interface *xch, uint32_t *total_rmid);
int xc_psr_cmt_get_l3_upscaling_factor(xc_interface *xch,
uint32_t *upscaling_factor);
+int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask);
int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu,
uint32_t *l3_cache_size);
int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu,
- uint32_t psr_cmt_type, uint64_t *monitor_data);
+ uint32_t psr_cmt_type, uint64_t *monitor_data,
+ uint64_t *tsc);
int xc_psr_cmt_enabled(xc_interface *xch);
#endif
#ifndef XC_MSR_X86_H
#define XC_MSR_X86_H
+#define MSR_IA32_TSC 0x00000010
#define MSR_IA32_CMT_EVTSEL 0x00000c8d
#define MSR_IA32_CMT_CTR 0x00000c8e
* GNU Lesser General Public License for more details.
*/
+#include <assert.h>
#include "xc_private.h"
#include "xc_msr_x86.h"
#define IA32_CMT_CTR_ERROR_MASK (0x3ull << 62)
#define EVTID_L3_OCCUPANCY 0x1
+#define EVTID_TOTAL_MEM_COUNT 0x2
+#define EVTID_LOCAL_MEM_COUNT 0x3
int xc_psr_cmt_attach(xc_interface *xch, uint32_t domid)
{
return rc;
}
+int xc_psr_cmt_get_l3_event_mask(xc_interface *xch, uint32_t *event_mask)
+{
+ int rc;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_psr_cmt_op;
+ sysctl.u.psr_cmt_op.cmd =
+ XEN_SYSCTL_PSR_CMT_get_l3_event_mask;
+ sysctl.u.psr_cmt_op.flags = 0;
+
+ rc = xc_sysctl(xch, &sysctl);
+ if ( !rc )
+ *event_mask = sysctl.u.psr_cmt_op.u.data;
+
+ return rc;
+}
+
int xc_psr_cmt_get_l3_cache_size(xc_interface *xch, uint32_t cpu,
uint32_t *l3_cache_size)
{
}
int xc_psr_cmt_get_data(xc_interface *xch, uint32_t rmid, uint32_t cpu,
- xc_psr_cmt_type type, uint64_t *monitor_data)
+ xc_psr_cmt_type type, uint64_t *monitor_data,
+ uint64_t *tsc)
{
xc_resource_op_t op;
- xc_resource_entry_t entries[2];
+ xc_resource_entry_t entries[3];
+ xc_resource_entry_t *tsc_entry = NULL;
uint32_t evtid, nr = 0;
int rc;
case XC_PSR_CMT_L3_OCCUPANCY:
evtid = EVTID_L3_OCCUPANCY;
break;
+ case XC_PSR_CMT_TOTAL_MEM_COUNT:
+ evtid = EVTID_TOTAL_MEM_COUNT;
+ break;
+ case XC_PSR_CMT_LOCAL_MEM_COUNT:
+ evtid = EVTID_LOCAL_MEM_COUNT;
+ break;
default:
return -1;
}
entries[nr].rsvd = 0;
nr++;
+ if ( tsc != NULL )
+ {
+ tsc_entry = &entries[nr];
+ entries[nr].u.cmd = XEN_RESOURCE_OP_MSR_READ;
+ entries[nr].idx = MSR_IA32_TSC;
+ entries[nr].val = 0;
+ entries[nr].rsvd = 0;
+ nr++;
+ }
+
+ assert(nr <= ARRAY_SIZE(entries));
+
op.cpu = cpu;
op.nr_entries = nr;
op.entries = entries;
*monitor_data = entries[1].val;
+ if ( tsc_entry != NULL )
+ *tsc = tsc_entry->val;
+
return 0;
}
* If this is defined, the Cache Monitoring Technology feature is supported.
*/
#define LIBXL_HAVE_PSR_CMT 1
+
+/*
+ * LIBXL_HAVE_PSR_MBM
+ *
+ * If this is defined, the Memory Bandwidth Monitoring feature is supported.
+ */
+#define LIBXL_HAVE_PSR_MBM 1
#endif
typedef char **libxl_string_list;
uint32_t *l3_cache_occupancy);
#endif
+#ifdef LIBXL_HAVE_PSR_MBM
+int libxl_psr_cmt_type_supported(libxl_ctx *ctx, libxl_psr_cmt_type type);
+int libxl_psr_cmt_get_sample(libxl_ctx *ctx,
+ uint32_t domid,
+ libxl_psr_cmt_type type,
+ uint64_t scope,
+ uint64_t *sample_r,
+ uint64_t *tsc_r);
+#endif
+
/* misc */
/* Each of these sets or clears the flag according to whether the
return rc;
}
-int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
- uint32_t domid,
- uint32_t socketid,
- uint32_t *l3_cache_occupancy)
+int libxl_psr_cmt_type_supported(libxl_ctx *ctx, libxl_psr_cmt_type type)
{
GC_INIT(ctx);
+ uint32_t event_mask;
+ int rc;
+ rc = xc_psr_cmt_get_l3_event_mask(ctx->xch, &event_mask);
+ if (rc < 0) {
+ libxl__psr_cmt_log_err_msg(gc, errno);
+ rc = 0;
+ } else {
+ rc = event_mask & (1 << (type - 1));
+ }
+
+ GC_FREE;
+ return rc;
+}
+
+int libxl_psr_cmt_get_sample(libxl_ctx *ctx,
+ uint32_t domid,
+ libxl_psr_cmt_type type,
+ uint64_t scope,
+ uint64_t *sample_r,
+ uint64_t *tsc_r)
+{
+ GC_INIT(ctx);
unsigned int rmid;
uint32_t upscaling_factor;
uint64_t monitor_data;
int cpu, rc;
- xc_psr_cmt_type type;
rc = xc_psr_cmt_get_domain_rmid(ctx->xch, domid, &rmid);
if (rc < 0 || rmid == 0) {
goto out;
}
- cpu = libxl__pick_socket_cpu(gc, socketid);
+ cpu = libxl__pick_socket_cpu(gc, scope);
if (cpu < 0) {
LOGE(ERROR, "failed to get socket cpu");
rc = ERROR_FAIL;
goto out;
}
- type = XC_PSR_CMT_L3_OCCUPANCY;
- rc = xc_psr_cmt_get_data(ctx->xch, rmid, cpu, type, &monitor_data);
+ rc = xc_psr_cmt_get_data(ctx->xch, rmid, cpu, type - 1,
+ &monitor_data, tsc_r);
if (rc < 0) {
LOGE(ERROR, "failed to get monitoring data");
rc = ERROR_FAIL;
goto out;
}
- *l3_cache_occupancy = upscaling_factor * monitor_data / 1024;
- rc = 0;
+ *sample_r = monitor_data * upscaling_factor;
out:
GC_FREE;
return rc;
}
+int libxl_psr_cmt_get_cache_occupancy(libxl_ctx *ctx,
+ uint32_t domid,
+ uint32_t socketid,
+ uint32_t *l3_cache_occupancy)
+{
+ uint64_t data;
+ int rc;
+
+ rc = libxl_psr_cmt_get_sample(ctx, domid,
+ LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY,
+ socketid, &data, NULL);
+ if (rc < 0)
+ goto out;
+
+ *l3_cache_occupancy = data / 1024;
+out:
+ return rc;
+}
+
/*
* Local variables:
* mode: C
libxl_psr_cmt_type = Enumeration("psr_cmt_type", [
(1, "CACHE_OCCUPANCY"),
+ (2, "TOTAL_MEM_COUNT"),
+ (3, "LOCAL_MEM_COUNT"),
])
}
#ifdef LIBXL_HAVE_PSR_CMT
+
+#define MBM_SAMPLE_RETRY_MAX 4
+static int psr_cmt_get_mem_bandwidth(uint32_t domid,
+ libxl_psr_cmt_type type,
+ uint32_t socketid,
+ uint64_t *bandwidth_r)
+{
+ uint64_t sample1, sample2;
+ uint64_t tsc1, tsc2;
+ int retry_attempts = 0;
+ int rc;
+
+ while (1) {
+ rc = libxl_psr_cmt_get_sample(ctx, domid, type, socketid,
+ &sample1, &tsc1);
+ if (rc < 0)
+ return rc;
+
+ usleep(10000);
+
+ rc = libxl_psr_cmt_get_sample(ctx, domid, type, socketid,
+ &sample2, &tsc2);
+ if (rc < 0)
+ return rc;
+
+ if (tsc2 <= tsc1)
+ return -1;
+
+ /*
+ * Hardware guarantees at most 1 overflow can happen if the duration
+ * between two samples is less than 1 second. Note that tsc returned
+ * from hypervisor is already-scaled time(ns).
+ */
+ if (tsc2 - tsc1 < 1000000000 && sample2 >= sample1)
+ break;
+
+ if (retry_attempts < MBM_SAMPLE_RETRY_MAX) {
+ retry_attempts++;
+ } else {
+ fprintf(stderr, "event counter overflowed\n");
+ return -1;
+ }
+ }
+
+ *bandwidth_r = (sample2 - sample1) * 1000000000 / (tsc2 - tsc1) / 1024;
+ return 0;
+}
+
static void psr_cmt_print_domain_info(libxl_dominfo *dominfo,
libxl_psr_cmt_type type,
uint32_t nr_sockets)
{
char *domain_name;
uint32_t socketid;
- uint32_t l3_cache_occupancy;
+ uint64_t monitor_data;
if (!libxl_psr_cmt_domain_attached(ctx, dominfo->domid))
return;
for (socketid = 0; socketid < nr_sockets; socketid++) {
switch (type) {
case LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY:
- if (!libxl_psr_cmt_get_cache_occupancy(ctx,
- dominfo->domid,
- socketid,
- &l3_cache_occupancy))
- printf("%13u KB", l3_cache_occupancy);
+ if (!libxl_psr_cmt_get_sample(ctx, dominfo->domid, type, socketid,
+ &monitor_data, NULL))
+ printf("%13"PRIu64" KB", monitor_data / 1024);
+ break;
+ case LIBXL_PSR_CMT_TYPE_TOTAL_MEM_COUNT:
+ case LIBXL_PSR_CMT_TYPE_LOCAL_MEM_COUNT:
+ if (!psr_cmt_get_mem_bandwidth(dominfo->domid, type, socketid,
+ &monitor_data))
+ printf("%11"PRIu64" KB/s", monitor_data);
break;
default:
return;
return -1;
}
+ if (!libxl_psr_cmt_type_supported(ctx, type)) {
+ fprintf(stderr, "Monitor type '%s' is not supported in the system\n",
+ libxl_psr_cmt_type_to_string(type));
+ return -1;
+ }
+
libxl_physinfo_init(&info);
rc = libxl_get_physinfo(ctx, &info);
if (rc < 0) {
/* No options */
}
- libxl_psr_cmt_type_from_string(argv[optind], &type);
+ if (!strcmp(argv[optind], "cache_occupancy"))
+ type = LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY;
+ else if (!strcmp(argv[optind], "total_mem_bandwidth"))
+ type = LIBXL_PSR_CMT_TYPE_TOTAL_MEM_COUNT;
+ else if (!strcmp(argv[optind], "local_mem_bandwidth"))
+ type = LIBXL_PSR_CMT_TYPE_LOCAL_MEM_COUNT;
+ else {
+ help("psr-cmt-show");
+ return 2;
+ }
if (optind + 1 >= argc)
domid = INVALID_DOMID;
return 2;
}
- switch (type) {
- case LIBXL_PSR_CMT_TYPE_CACHE_OCCUPANCY:
- ret = psr_cmt_show(type, domid);
- break;
- default:
- help("psr-cmt-show");
- return 2;
- }
+ ret = psr_cmt_show(type, domid);
return ret;
}
"Show Cache Monitoring Technology information",
"<PSR-CMT-Type> <Domain>",
"Available monitor types:\n"
- "\"cache_occupancy\": Show L3 cache occupancy\n",
+ "\"cache_occupancy\": Show L3 cache occupancy(KB)\n"
+ "\"total_mem_bandwidth\": Show total memory bandwidth(KB/s)\n"
+ "\"local_mem_bandwidth\": Show local memory bandwidth(KB/s)\n",
},
#endif
};