...for receive-side packets.
My recent patch to include/xen/interface/io/netif.h defines a set of
control messages that can be used by a VM frontend driver to configure
toeplitz hashing of receive-side packets and consequent steering of those
packets to particular queues.
This patch introduces an implementation of toeplitz hashing and into
xen-netback and allows it to be configured using the new control messages.
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
#define XEN_NETBK_MCAST_MAX 64
+#define XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE 40
+
+#define XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER 7
+#define XEN_NETBK_MAX_TOEPLITZ_MAPPING_SIZE \
+ BIT(XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER)
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
unsigned int num_queues; /* active queues, resource allocated */
unsigned int stalled_queues;
+ struct {
+ u32 flags;
+ u8 key[XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE];
+ u32 mapping[XEN_NETBK_MAX_TOEPLITZ_MAPPING_SIZE];
+ unsigned int order;
+ } toeplitz;
+
struct xenbus_watch credit_watch;
struct xenbus_watch mcast_ctrl_watch;
netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
}
+static u32 toeplitz_hash(const u8 *k, unsigned int klen,
+ const u8 *d, unsigned int dlen)
+{
+ unsigned int di, ki;
+ u64 prefix = 0;
+ u64 hash = 0;
+
+ /* Pre-load prefix with the first 8 bytes of the key */
+ for (ki = 0; ki < 8; ki++) {
+ prefix |= (ki < klen) ? k[ki] : 0;
+ prefix <<= 8;
+ }
+
+ for (di = 0; di < dlen; di++) {
+ u8 byte = d[di];
+ unsigned int bit;
+
+ for (bit = 0; bit < 8; bit++) {
+ if (byte & 0x80)
+ hash ^= prefix;
+ byte <<= 1;
+ prefix <<= 1;
+ }
+
+ /* prefix has now been left-shifted by 8, so OR in
+ * the next byte.
+ */
+ prefix |= (ki < klen) ? k[ki] : 0;
+ ki++;
+ }
+
+ /* The valid part of the hash is in the upper 32 bits. */
+ return hash >> 32;
+}
+
+static void xenvif_set_toeplitz_hash(struct xenvif *vif, struct sk_buff *skb)
+{
+ struct flow_keys flow;
+ u32 hash = 0;
+ enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
+ const u8 *key = vif->toeplitz.key;
+ const unsigned int len = XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE;
+
+ /* Quick rejection test: If the network protocol doesn't
+ * correspond to any enabled hash type then there's no point
+ * in parsing the packet header.
+ */
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (vif->toeplitz.flags &
+ (XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4))
+ break;
+
+ goto done;
+
+ case htons(ETH_P_IPV6):
+ if (vif->toeplitz.flags &
+ (XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6))
+ break;
+
+ goto done;
+
+ default:
+ goto done;
+ }
+
+ memset(&flow, 0, sizeof(flow));
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+ goto done;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if ((vif->toeplitz.flags &
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP) &&
+ flow.basic.ip_proto == IPPROTO_TCP) {
+ u8 data[12];
+
+ memcpy(&data[0], &flow.addrs.v4addrs.src, 4);
+ memcpy(&data[4], &flow.addrs.v4addrs.dst, 4);
+ memcpy(&data[8], &flow.ports.src, 2);
+ memcpy(&data[10], &flow.ports.dst, 2);
+
+ hash = toeplitz_hash(key, len,
+ data, sizeof(data));
+ type = PKT_HASH_TYPE_L4;
+ } else if (vif->toeplitz.flags &
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4) {
+ u8 data[8];
+
+ memcpy(&data[0], &flow.addrs.v4addrs.src, 4);
+ memcpy(&data[4], &flow.addrs.v4addrs.dst, 4);
+
+ hash = toeplitz_hash(key, len,
+ data, sizeof(data));
+ type = PKT_HASH_TYPE_L3;
+ }
+
+ break;
+
+ case htons(ETH_P_IPV6):
+ if ((vif->toeplitz.flags &
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP) &&
+ flow.basic.ip_proto == IPPROTO_TCP) {
+ u8 data[36];
+
+ memcpy(&data[0], &flow.addrs.v6addrs.src, 16);
+ memcpy(&data[16], &flow.addrs.v6addrs.dst, 16);
+ memcpy(&data[32], &flow.ports.src, 2);
+ memcpy(&data[34], &flow.ports.dst, 2);
+
+ hash = toeplitz_hash(key, len,
+ data, sizeof(data));
+ type = PKT_HASH_TYPE_L4;
+ } else if (vif->toeplitz.flags &
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6) {
+ u8 data[32];
+
+ memcpy(&data[0], &flow.addrs.v6addrs.src, 16);
+ memcpy(&data[16], &flow.addrs.v6addrs.dst, 16);
+
+ hash = toeplitz_hash(key, len,
+ data, sizeof(data));
+ type = PKT_HASH_TYPE_L3;
+ }
+
+ break;
+ }
+
+done:
+ skb_set_hash(skb, hash, type);
+}
+
+static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct xenvif *vif = netdev_priv(dev);
+ unsigned int mask = (1u << vif->toeplitz.order) - 1;
+
+ if (vif->toeplitz.flags == 0)
+ return fallback(dev, skb) % dev->real_num_tx_queues;
+
+ xenvif_set_toeplitz_hash(vif, skb);
+
+ return vif->toeplitz.mapping[skb_get_hash_raw(skb) & mask];
+}
+
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
};
static const struct net_device_ops xenvif_netdev_ops = {
+ .ndo_select_queue = xenvif_select_queue,
.ndo_start_xmit = xenvif_start_xmit,
.ndo_get_stats = xenvif_get_stats,
.ndo_open = xenvif_open,
return 0;
}
+static u32 xenvif_set_toeplitz_flags(struct xenvif *vif, u32 flags)
+{
+ if (!(flags & (XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4 |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6 |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP)))
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ vif->toeplitz.flags = flags;
+
+ return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+static u32 xenvif_set_toeplitz_key(struct xenvif *vif, u32 gref, u32 len)
+{
+ u8 *key = vif->toeplitz.key;
+ struct gnttab_copy copy_op = {
+ .source.u.ref = gref,
+ .source.domid = vif->domid,
+ .dest.u.gmfn = virt_to_gfn(key),
+ .dest.domid = DOMID_SELF,
+ .dest.offset = offset_in_page(key) & ~XEN_PAGE_MASK,
+ .len = len,
+ .flags = GNTCOPY_source_gref
+ };
+
+ if (len > XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ gnttab_batch_copy(©_op, 1);
+
+ if (copy_op.status != GNTST_okay)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ /* Clear any remaining key octets */
+ if (len < XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE)
+ memset(key + len, 0, XEN_NETBK_MAX_TOEPLITZ_KEY_SIZE - len);
+
+ return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+static u32 xenvif_set_toeplitz_mapping_order(struct xenvif *vif,
+ u32 order)
+{
+ if (order > XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ vif->toeplitz.order = order;
+ memset(vif->toeplitz.mapping, 0, sizeof(u32) << order);
+
+ return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+static u32 xenvif_set_toeplitz_mapping(struct xenvif *vif, u32 gref,
+ u32 len, u32 off)
+{
+ u32 *mapping = &vif->toeplitz.mapping[off];
+ struct gnttab_copy copy_op = {
+ .source.u.ref = gref,
+ .source.domid = vif->domid,
+ .dest.u.gmfn = virt_to_gfn(mapping),
+ .dest.domid = DOMID_SELF,
+ .dest.offset = offset_in_page(mapping) & ~XEN_PAGE_MASK,
+ .len = len * sizeof(u32),
+ .flags = GNTCOPY_source_gref
+ };
+
+ if ((off + len > (1u << vif->toeplitz.order)) ||
+ copy_op.len > XEN_PAGE_SIZE)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ while (len-- != 0)
+ if (mapping[off++] >= vif->num_queues)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ gnttab_batch_copy(©_op, 1);
+
+ if (copy_op.status != GNTST_okay)
+ return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+ return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
static void make_ctrl_response(struct xenvif *vif,
const struct xen_netif_ctrl_request *req,
u32 status, u32 data)
static void process_ctrl_request(struct xenvif *vif,
const struct xen_netif_ctrl_request *req)
{
- /* There is no support for control requests yet. */
- make_ctrl_response(vif, req,
- XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED, 0);
+ u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
+ u32 data = 0;
+
+ switch (req->type) {
+ case XEN_NETIF_CTRL_TYPE_GET_TOEPLITZ_FLAGS:
+ status = XEN_NETIF_CTRL_STATUS_SUCCESS;
+ data = XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4 |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV4_TCP |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6 |
+ XEN_NETIF_CTRL_TOEPLITZ_HASH_IPV6_TCP;
+ break;
+
+ case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_FLAGS:
+ status = xenvif_set_toeplitz_flags(vif, req->data[0]);
+ break;
+
+ case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_KEY:
+ status = xenvif_set_toeplitz_key(vif, req->data[0],
+ req->data[1]);
+ break;
+
+ case XEN_NETIF_CTRL_TYPE_GET_TOEPLITZ_MAPPING_ORDER:
+ status = XEN_NETIF_CTRL_STATUS_SUCCESS;
+ data = XEN_NETBK_MAX_TOEPLITZ_MAPPING_ORDER;
+ break;
+
+ case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_MAPPING_ORDER:
+ status = xenvif_set_toeplitz_mapping_order(vif,
+ req->data[0]);
+ break;
+
+ case XEN_NETIF_CTRL_TYPE_SET_TOEPLITZ_MAPPING:
+ status = xenvif_set_toeplitz_mapping(vif, req->data[0],
+ req->data[1],
+ req->data[2]);
+ break;
+
+ default:
+ break;
+ }
+
+ make_ctrl_response(vif, req, status, data);
push_ctrl_response(vif);
}