]> xenbits.xensource.com Git - people/julieng/freebsd.git/commitdiff
Raw import of ThunderX VNIC networking driver components
authorzbb <zbb@FreeBSD.org>
Sun, 18 Oct 2015 21:39:15 +0000 (21:39 +0000)
committerzbb <zbb@FreeBSD.org>
Sun, 18 Oct 2015 21:39:15 +0000 (21:39 +0000)
This import brings following components of the Linux driver:
- Thunder BGX (programmable MAC)
- Physical Function driver
- Virtual Function driver
- Headers

Revision:            1.0
Obtained from:       Cavium
License information: Cavium provided these files under BSD license

sys/dev/vnic/nic.h [new file with mode: 0644]
sys/dev/vnic/nic_main.c [new file with mode: 0644]
sys/dev/vnic/nic_reg.h [new file with mode: 0644]
sys/dev/vnic/nicvf_main.c [new file with mode: 0644]
sys/dev/vnic/nicvf_queues.c [new file with mode: 0644]
sys/dev/vnic/nicvf_queues.h [new file with mode: 0644]
sys/dev/vnic/q_struct.h [new file with mode: 0644]
sys/dev/vnic/thunder_bgx.c [new file with mode: 0644]
sys/dev/vnic/thunder_bgx.h [new file with mode: 0644]

diff --git a/sys/dev/vnic/nic.h b/sys/dev/vnic/nic.h
new file mode 100644 (file)
index 0000000..3023dea
--- /dev/null
@@ -0,0 +1,539 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef NIC_H
+#define        NIC_H
+
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include "thunder_bgx.h"
+
+/* PCI device IDs */
+#define        PCI_DEVICE_ID_THUNDER_NIC_PF            0xA01E
+#define        PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF      0x0011
+#define        PCI_DEVICE_ID_THUNDER_NIC_VF            0xA034
+#define        PCI_DEVICE_ID_THUNDER_BGX               0xA026
+
+/* PCI BAR nos */
+#define        PCI_CFG_REG_BAR_NUM             0
+#define        PCI_MSIX_REG_BAR_NUM            4
+
+/* NIC SRIOV VF count */
+#define        MAX_NUM_VFS_SUPPORTED           128
+#define        DEFAULT_NUM_VF_ENABLED          8
+
+#define        NIC_TNS_BYPASS_MODE             0
+#define        NIC_TNS_MODE                    1
+
+/* NIC priv flags */
+#define        NIC_SRIOV_ENABLED               BIT(0)
+#define        NIC_TNS_ENABLED                 BIT(1)
+
+/* VNIC HW optimiation features */
+#define VNIC_RSS_SUPPORT
+#define VNIC_MULTI_QSET_SUPPORT
+
+/* Min/Max packet size */
+#define        NIC_HW_MIN_FRS                  64
+#define        NIC_HW_MAX_FRS                  9200 /* 9216 max packet including FCS */
+
+/* Max pkinds */
+#define        NIC_MAX_PKIND                   16
+
+/* Rx Channels */
+/* Receive channel configuration in TNS bypass mode
+ * Below is configuration in TNS bypass mode
+ * BGX0-LMAC0-CHAN0 - VNIC CHAN0
+ * BGX0-LMAC1-CHAN0 - VNIC CHAN16
+ * ...
+ * BGX1-LMAC0-CHAN0 - VNIC CHAN128
+ * ...
+ * BGX1-LMAC3-CHAN0 - VNIC CHAN174
+ */
+#define        NIC_INTF_COUNT                  2  /* Interfaces btw VNIC and TNS/BGX */
+#define        NIC_CHANS_PER_INF               128
+#define        NIC_MAX_CHANS                   (NIC_INTF_COUNT * NIC_CHANS_PER_INF)
+#define        NIC_CPI_COUNT                   2048 /* No of channel parse indices */
+
+/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
+#define NIC_MAX_BGX                    MAX_BGX_PER_CN88XX
+#define        NIC_CPI_PER_BGX                 (NIC_CPI_COUNT / NIC_MAX_BGX)
+#define        NIC_MAX_CPI_PER_LMAC            64 /* Max when CPI_ALG is IP diffserv */
+#define        NIC_RSSI_PER_BGX                (NIC_RSSI_COUNT / NIC_MAX_BGX)
+
+/* Tx scheduling */
+#define        NIC_MAX_TL4                     1024
+#define        NIC_MAX_TL4_SHAPERS             256 /* 1 shaper for 4 TL4s */
+#define        NIC_MAX_TL3                     256
+#define        NIC_MAX_TL3_SHAPERS             64  /* 1 shaper for 4 TL3s */
+#define        NIC_MAX_TL2                     64
+#define        NIC_MAX_TL2_SHAPERS             2  /* 1 shaper for 32 TL2s */
+#define        NIC_MAX_TL1                     2
+
+/* TNS bypass mode */
+#define        NIC_TL2_PER_BGX                 32
+#define        NIC_TL4_PER_BGX                 (NIC_MAX_TL4 / NIC_MAX_BGX)
+#define        NIC_TL4_PER_LMAC                (NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+
+/* NIC VF Interrupts */
+#define        NICVF_INTR_CQ                   0
+#define        NICVF_INTR_SQ                   1
+#define        NICVF_INTR_RBDR                 2
+#define        NICVF_INTR_PKT_DROP             3
+#define        NICVF_INTR_TCP_TIMER            4
+#define        NICVF_INTR_MBOX                 5
+#define        NICVF_INTR_QS_ERR               6
+
+#define        NICVF_INTR_CQ_SHIFT             0
+#define        NICVF_INTR_SQ_SHIFT             8
+#define        NICVF_INTR_RBDR_SHIFT           16
+#define        NICVF_INTR_PKT_DROP_SHIFT       20
+#define        NICVF_INTR_TCP_TIMER_SHIFT      21
+#define        NICVF_INTR_MBOX_SHIFT           22
+#define        NICVF_INTR_QS_ERR_SHIFT         23
+
+#define        NICVF_INTR_CQ_MASK              (0xFF << NICVF_INTR_CQ_SHIFT)
+#define        NICVF_INTR_SQ_MASK              (0xFF << NICVF_INTR_SQ_SHIFT)
+#define        NICVF_INTR_RBDR_MASK            (0x03 << NICVF_INTR_RBDR_SHIFT)
+#define        NICVF_INTR_PKT_DROP_MASK        BIT(NICVF_INTR_PKT_DROP_SHIFT)
+#define        NICVF_INTR_TCP_TIMER_MASK       BIT(NICVF_INTR_TCP_TIMER_SHIFT)
+#define        NICVF_INTR_MBOX_MASK            BIT(NICVF_INTR_MBOX_SHIFT)
+#define        NICVF_INTR_QS_ERR_MASK          BIT(NICVF_INTR_QS_ERR_SHIFT)
+
+/* MSI-X interrupts */
+#define        NIC_PF_MSIX_VECTORS             10
+#define        NIC_VF_MSIX_VECTORS             20
+
+#define NIC_PF_INTR_ID_ECC0_SBE                0
+#define NIC_PF_INTR_ID_ECC0_DBE                1
+#define NIC_PF_INTR_ID_ECC1_SBE                2
+#define NIC_PF_INTR_ID_ECC1_DBE                3
+#define NIC_PF_INTR_ID_ECC2_SBE                4
+#define NIC_PF_INTR_ID_ECC2_DBE                5
+#define NIC_PF_INTR_ID_ECC3_SBE                6
+#define NIC_PF_INTR_ID_ECC3_DBE                7
+#define NIC_PF_INTR_ID_MBOX0           8
+#define NIC_PF_INTR_ID_MBOX1           9
+
+/* Global timer for CQ timer thresh interrupts
+ * Calculated for SCLK of 700Mhz
+ * value written should be a 1/16th of what is expected
+ *
+ * 1 tick per 0.05usec = value of 2.2
+ * This 10% would be covered in CQ timer thresh value
+ */
+#define NICPF_CLK_PER_INT_TICK         2
+
+/* Time to wait before we decide that a SQ is stuck.
+ *
+ * Since both pkt rx and tx notifications are done with same CQ,
+ * when packets are being received at very high rate (eg: L2 forwarding)
+ * then freeing transmitted skbs will be delayed and watchdog
+ * will kick in, resetting interface. Hence keeping this value high.
+ */
+#define        NICVF_TX_TIMEOUT                (50 * HZ)
+
+struct nicvf_cq_poll {
+       struct  nicvf *nicvf;
+       u8      cq_idx;         /* Completion queue index */
+       struct  napi_struct napi;
+};
+
+#define        NIC_RSSI_COUNT                  4096 /* Total no of RSS indices */
+#define NIC_MAX_RSS_HASH_BITS          8
+#define NIC_MAX_RSS_IDR_TBL_SIZE       (1 << NIC_MAX_RSS_HASH_BITS)
+#define RSS_HASH_KEY_SIZE              5 /* 320 bit key */
+
+#ifdef VNIC_RSS_SUPPORT
+struct nicvf_rss_info {
+       bool enable;
+#define        RSS_L2_EXTENDED_HASH_ENA        BIT(0)
+#define        RSS_IP_HASH_ENA                 BIT(1)
+#define        RSS_TCP_HASH_ENA                BIT(2)
+#define        RSS_TCP_SYN_DIS                 BIT(3)
+#define        RSS_UDP_HASH_ENA                BIT(4)
+#define RSS_L4_EXTENDED_HASH_ENA       BIT(5)
+#define        RSS_ROCE_ENA                    BIT(6)
+#define        RSS_L3_BI_DIRECTION_ENA         BIT(7)
+#define        RSS_L4_BI_DIRECTION_ENA         BIT(8)
+       u64 cfg;
+       u8  hash_bits;
+       u16 rss_size;
+       u8  ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE];
+       u64 key[RSS_HASH_KEY_SIZE];
+} ____cacheline_aligned_in_smp;
+#endif
+
+enum rx_stats_reg_offset {
+       RX_OCTS = 0x0,
+       RX_UCAST = 0x1,
+       RX_BCAST = 0x2,
+       RX_MCAST = 0x3,
+       RX_RED = 0x4,
+       RX_RED_OCTS = 0x5,
+       RX_ORUN = 0x6,
+       RX_ORUN_OCTS = 0x7,
+       RX_FCS = 0x8,
+       RX_L2ERR = 0x9,
+       RX_DRP_BCAST = 0xa,
+       RX_DRP_MCAST = 0xb,
+       RX_DRP_L3BCAST = 0xc,
+       RX_DRP_L3MCAST = 0xd,
+       RX_STATS_ENUM_LAST,
+};
+
+enum tx_stats_reg_offset {
+       TX_OCTS = 0x0,
+       TX_UCAST = 0x1,
+       TX_BCAST = 0x2,
+       TX_MCAST = 0x3,
+       TX_DROP = 0x4,
+       TX_STATS_ENUM_LAST,
+};
+
+struct nicvf_hw_stats {
+       u64 rx_bytes;
+       u64 rx_ucast_frames;
+       u64 rx_bcast_frames;
+       u64 rx_mcast_frames;
+       u64 rx_fcs_errors;
+       u64 rx_l2_errors;
+       u64 rx_drop_red;
+       u64 rx_drop_red_bytes;
+       u64 rx_drop_overrun;
+       u64 rx_drop_overrun_bytes;
+       u64 rx_drop_bcast;
+       u64 rx_drop_mcast;
+       u64 rx_drop_l3_bcast;
+       u64 rx_drop_l3_mcast;
+       u64 rx_bgx_truncated_pkts;
+       u64 rx_jabber_errs;
+       u64 rx_fcs_errs;
+       u64 rx_bgx_errs;
+       u64 rx_prel2_errs;
+       u64 rx_l2_hdr_malformed;
+       u64 rx_oversize;
+       u64 rx_undersize;
+       u64 rx_l2_len_mismatch;
+       u64 rx_l2_pclp;
+       u64 rx_ip_ver_errs;
+       u64 rx_ip_csum_errs;
+       u64 rx_ip_hdr_malformed;
+       u64 rx_ip_payload_malformed;
+       u64 rx_ip_ttl_errs;
+       u64 rx_l3_pclp;
+       u64 rx_l4_malformed;
+       u64 rx_l4_csum_errs;
+       u64 rx_udp_len_errs;
+       u64 rx_l4_port_errs;
+       u64 rx_tcp_flag_errs;
+       u64 rx_tcp_offset_errs;
+       u64 rx_l4_pclp;
+       u64 rx_truncated_pkts;
+
+       u64 tx_bytes_ok;
+       u64 tx_ucast_frames_ok;
+       u64 tx_bcast_frames_ok;
+       u64 tx_mcast_frames_ok;
+       u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+       /* Rx */
+       u64 rx_frames_ok;
+       u64 rx_frames_64;
+       u64 rx_frames_127;
+       u64 rx_frames_255;
+       u64 rx_frames_511;
+       u64 rx_frames_1023;
+       u64 rx_frames_1518;
+       u64 rx_frames_jumbo;
+       u64 rx_drops;
+
+       /* Tx */
+       u64 tx_frames_ok;
+       u64 tx_drops;
+       u64 tx_tso;
+       u64 txq_stop;
+       u64 txq_wake;
+};
+
+struct nicvf {
+       struct nicvf            *pnicvf;
+       struct net_device       *netdev;
+       struct pci_dev          *pdev;
+       u8                      vf_id;
+       u8                      node;
+       bool                    tns_mode:1;
+       bool                    sqs_mode:1;
+       bool                    loopback_supported:1;
+       u16                     mtu;
+       struct queue_set        *qs;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+#define        MAX_SQS_PER_VF_SINGLE_NODE              5
+#define        MAX_SQS_PER_VF                          11
+       u8                      sqs_id;
+       u8                      sqs_count; /* Secondary Qset count */
+       struct nicvf            *snicvf[MAX_SQS_PER_VF];
+#endif
+       u8                      rx_queues;
+       u8                      tx_queues;
+       u8                      max_queues;
+       void __iomem            *reg_base;
+       bool                    link_up;
+       u8                      duplex;
+       u32                     speed;
+       struct page             *rb_page;
+       u32                     rb_page_offset;
+       bool                    rb_alloc_fail;
+       bool                    rb_work_scheduled;
+       struct delayed_work     rbdr_work;
+       struct tasklet_struct   rbdr_task;
+       struct tasklet_struct   qs_err_task;
+       struct tasklet_struct   cq_task;
+       struct nicvf_cq_poll    *napi[8];
+#ifdef VNIC_RSS_SUPPORT
+       struct nicvf_rss_info   rss_info;
+#endif
+       u8                      cpi_alg;
+       /* Interrupt coalescing settings */
+       u32                     cq_coalesce_usecs;
+
+       u32                     msg_enable;
+       struct nicvf_hw_stats   hw_stats;
+       struct nicvf_drv_stats  drv_stats;
+       struct bgx_stats        bgx_stats;
+       struct work_struct      reset_task;
+
+       /* MSI-X  */
+       bool                    msix_enabled;
+       u8                      num_vec;
+       struct msix_entry       msix_entries[NIC_VF_MSIX_VECTORS];
+       char                    irq_name[NIC_VF_MSIX_VECTORS][20];
+       bool                    irq_allocated[NIC_VF_MSIX_VECTORS];
+
+       /* VF <-> PF mailbox communication */
+       bool                    pf_acked;
+       bool                    pf_nacked;
+} ____cacheline_aligned_in_smp;
+
+/* PF <--> VF Mailbox communication
+ * Eight 64bit registers are shared between PF and VF.
+ * Separate set for each VF.
+ * Writing '1' into last register mbx7 means end of message.
+ */
+
+/* PF <--> VF mailbox communication */
+#define        NIC_PF_VF_MAILBOX_SIZE          2
+#define        NIC_MBOX_MSG_TIMEOUT            2000 /* ms */
+
+/* Mailbox message types */
+#define        NIC_MBOX_MSG_READY              0x01    /* Is PF ready to rcv msgs */
+#define        NIC_MBOX_MSG_ACK                0x02    /* ACK the message received */
+#define        NIC_MBOX_MSG_NACK               0x03    /* NACK the message received */
+#define        NIC_MBOX_MSG_QS_CFG             0x04    /* Configure Qset */
+#define        NIC_MBOX_MSG_RQ_CFG             0x05    /* Configure receive queue */
+#define        NIC_MBOX_MSG_SQ_CFG             0x06    /* Configure Send queue */
+#define        NIC_MBOX_MSG_RQ_DROP_CFG        0x07    /* Configure receive queue */
+#define        NIC_MBOX_MSG_SET_MAC            0x08    /* Add MAC ID to DMAC filter */
+#define        NIC_MBOX_MSG_SET_MAX_FRS        0x09    /* Set max frame size */
+#define        NIC_MBOX_MSG_CPI_CFG            0x0A    /* Config CPI, RSSI */
+#define        NIC_MBOX_MSG_RSS_SIZE           0x0B    /* Get RSS indir_tbl size */
+#define        NIC_MBOX_MSG_RSS_CFG            0x0C    /* Config RSS table */
+#define        NIC_MBOX_MSG_RSS_CFG_CONT       0x0D    /* RSS config continuation */
+#define        NIC_MBOX_MSG_RQ_BP_CFG          0x0E    /* RQ backpressure config */
+#define        NIC_MBOX_MSG_RQ_SW_SYNC         0x0F    /* Flush inflight pkts to RQ */
+#define        NIC_MBOX_MSG_BGX_STATS          0x10    /* Get stats from BGX */
+#define        NIC_MBOX_MSG_BGX_LINK_CHANGE    0x11    /* BGX:LMAC link status */
+#define        NIC_MBOX_MSG_ALLOC_SQS          0x12    /* Allocate secondary Qset */
+#define        NIC_MBOX_MSG_NICVF_PTR          0x13    /* Send nicvf ptr to PF */
+#define        NIC_MBOX_MSG_PNICVF_PTR         0x14    /* Get primary qset nicvf ptr */
+#define        NIC_MBOX_MSG_SNICVF_PTR         0x15    /* Send sqet nicvf ptr to PVF */
+#define        NIC_MBOX_MSG_LOOPBACK           0x16    /* Set interface in loopback */
+#define        NIC_MBOX_MSG_CFG_DONE           0xF0    /* VF configuration done */
+#define        NIC_MBOX_MSG_SHUTDOWN           0xF1    /* VF is being shutdown */
+
+struct nic_cfg_msg {
+       u8    msg;
+       u8    vf_id;
+       u8    node_id;
+       bool  tns_mode:1;
+       bool  sqs_mode:1;
+       bool  loopback_supported:1;
+       u8    mac_addr[ETH_ALEN];
+};
+
+/* Qset configuration */
+struct qs_cfg_msg {
+       u8    msg;
+       u8    num;
+       u8    sqs_count;
+       u64   cfg;
+};
+
+/* Receive queue configuration */
+struct rq_cfg_msg {
+       u8    msg;
+       u8    qs_num;
+       u8    rq_num;
+       u64   cfg;
+};
+
+/* Send queue configuration */
+struct sq_cfg_msg {
+       u8    msg;
+       u8    qs_num;
+       u8    sq_num;
+       bool  sqs_mode;
+       u64   cfg;
+};
+
+/* Set VF's MAC address */
+struct set_mac_msg {
+       u8    msg;
+       u8    vf_id;
+       u8    mac_addr[ETH_ALEN];
+};
+
+/* Set Maximum frame size */
+struct set_frs_msg {
+       u8    msg;
+       u8    vf_id;
+       u16   max_frs;
+};
+
+/* Set CPI algorithm type */
+struct cpi_cfg_msg {
+       u8    msg;
+       u8    vf_id;
+       u8    rq_cnt;
+       u8    cpi_alg;
+};
+
+/* Get RSS table size */
+struct rss_sz_msg {
+       u8    msg;
+       u8    vf_id;
+       u16   ind_tbl_size;
+};
+
+/* Set RSS configuration */
+struct rss_cfg_msg {
+       u8    msg;
+       u8    vf_id;
+       u8    hash_bits;
+       u8    tbl_len;
+       u8    tbl_offset;
+#define RSS_IND_TBL_LEN_PER_MBX_MSG    8
+       u8    ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG];
+};
+
+struct bgx_stats_msg {
+       u8    msg;
+       u8    vf_id;
+       u8    rx;
+       u8    idx;
+       u64   stats;
+};
+
+/* Physical interface link status */
+struct bgx_link_status {
+       u8    msg;
+       u8    link_up;
+       u8    duplex;
+       u32   speed;
+};
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+/* Get Extra Qset IDs */
+struct sqs_alloc {
+       u8    msg;
+       u8    vf_id;
+       u8    qs_count;
+};
+
+struct nicvf_ptr {
+       u8    msg;
+       u8    vf_id;
+       bool  sqs_mode;
+       u8    sqs_id;
+       u64   nicvf;
+};
+#endif
+
+/* Set interface in loopback mode */
+struct set_loopback {
+       u8    msg;
+       u8    vf_id;
+       bool  enable;
+};
+
+/* 128 bit shared memory between PF and each VF */
+union nic_mbx {
+       struct { u8 msg; }      msg;
+       struct nic_cfg_msg      nic_cfg;
+       struct qs_cfg_msg       qs;
+       struct rq_cfg_msg       rq;
+       struct sq_cfg_msg       sq;
+       struct set_mac_msg      mac;
+       struct set_frs_msg      frs;
+       struct cpi_cfg_msg      cpi_cfg;
+       struct rss_sz_msg       rss_size;
+       struct rss_cfg_msg      rss_cfg;
+       struct bgx_stats_msg    bgx_stats;
+       struct bgx_link_status  link_status;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       struct sqs_alloc        sqs_alloc;
+       struct nicvf_ptr        nicvf;
+#endif
+       struct set_loopback     lbk;
+};
+
+#define NIC_NODE_ID_MASK       0x03
+#define NIC_NODE_ID_SHIFT      44
+
+static inline int nic_get_node_id(struct pci_dev *pdev)
+{
+       u64 addr = pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM);
+       return ((addr >> NIC_NODE_ID_SHIFT) & NIC_NODE_ID_MASK);
+}
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+                             int tx_queues, int rx_queues);
+int nicvf_open(struct net_device *netdev);
+int nicvf_stop(struct net_device *netdev);
+int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx);
+void nicvf_config_rss(struct nicvf *nic);
+void nicvf_set_rss_key(struct nicvf *nic);
+void nicvf_set_ethtool_ops(struct net_device *netdev);
+void nicvf_update_stats(struct nicvf *nic);
+void nicvf_update_lmac_stats(struct nicvf *nic);
+
+#endif /* NIC_H */
diff --git a/sys/dev/vnic/nic_main.c b/sys/dev/vnic/nic_main.c
new file mode 100644 (file)
index 0000000..de2f22c
--- /dev/null
@@ -0,0 +1,1192 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include <linux/of.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME       "thunder-nic"
+#define DRV_VERSION    "1.0"
+
+struct nicpf {
+       struct pci_dev          *pdev;
+       u8                      rev_id;
+       u8                      node;
+       unsigned int            flags;
+       u8                      num_vf_en;      /* No of VF enabled */
+       bool                    vf_enabled[MAX_NUM_VFS_SUPPORTED];
+       void __iomem            *reg_base;       /* Register start address */
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       u8                      num_sqs_en;     /* Secondary qsets enabled */
+       u64                     nicvf[MAX_NUM_VFS_SUPPORTED];
+       u8                      vf_sqs[MAX_NUM_VFS_SUPPORTED][MAX_SQS_PER_VF];
+       u8                      pqs_vf[MAX_NUM_VFS_SUPPORTED];
+       bool                    sqs_used[MAX_NUM_VFS_SUPPORTED];
+#endif
+       struct pkind_cfg        pkind;
+#define        NIC_SET_VF_LMAC_MAP(bgx, lmac)  (((bgx & 0xF) << 4) | (lmac & 0xF))
+#define        NIC_GET_BGX_FROM_VF_LMAC_MAP(map)       ((map >> 4) & 0xF)
+#define        NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)      (map & 0xF)
+       u8                      vf_lmac_map[MAX_LMAC];
+       struct delayed_work     dwork;
+       struct workqueue_struct *check_link;
+       u8                      link[MAX_LMAC];
+       u8                      duplex[MAX_LMAC];
+       u32                     speed[MAX_LMAC];
+       u16                     cpi_base[MAX_NUM_VFS_SUPPORTED];
+       u16                     rss_ind_tbl_size;
+       bool                    mbx_lock[MAX_NUM_VFS_SUPPORTED];
+
+       /* MSI-X */
+       bool                    msix_enabled;
+       u8                      num_vec;
+       struct msix_entry       msix_entries[NIC_PF_MSIX_VECTORS];
+       bool                    irq_allocated[NIC_PF_MSIX_VECTORS];
+};
+
+/* Supported devices */
+static const struct pci_device_id nic_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) },
+       { 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nic_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver.  The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val)
+{
+       writeq_relaxed(val, nic->reg_base + offset);
+}
+
+static u64 nic_reg_read(struct nicpf *nic, u64 offset)
+{
+       return readq_relaxed(nic->reg_base + offset);
+}
+
+/* PF -> VF mailbox communication APIs */
+static void nic_enable_mbx_intr(struct nicpf *nic)
+{
+       /* Enable mailbox interrupt for all 128 VFs */
+       nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
+       nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+}
+
+static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
+{
+       nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf));
+}
+
+static u64 nic_get_mbx_addr(int vf)
+{
+       return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT);
+}
+
+/* Send a mailbox message to VF
+ * @vf: vf to which this message to be sent
+ * @mbx: Message to be sent
+ */
+static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx)
+{
+       void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf);
+       u64 *msg = (u64 *)mbx;
+
+       /* In first revision HW, mbox interrupt is triggerred
+        * when PF writes to MBOX(1), in next revisions when
+        * PF writes to MBOX(0)
+        */
+       if (nic->rev_id == 0) {
+               /* see the comment for nic_reg_write()/nic_reg_read()
+                * functions above
+                */
+               writeq_relaxed(msg[0], mbx_addr);
+               writeq_relaxed(msg[1], mbx_addr + 8);
+       } else {
+               writeq_relaxed(msg[1], mbx_addr + 8);
+               writeq_relaxed(msg[0], mbx_addr);
+       }
+}
+
+/* Responds to VF's READY message with VF's
+ * ID, node, MAC address e.t.c
+ * @vf: VF which sent READY message
+ */
+static void nic_mbx_send_ready(struct nicpf *nic, int vf)
+{
+       union nic_mbx mbx = {};
+       int bgx_idx, lmac;
+       const char *mac;
+
+       mbx.nic_cfg.msg = NIC_MBOX_MSG_READY;
+       mbx.nic_cfg.vf_id = vf;
+
+       if (nic->flags & NIC_TNS_ENABLED)
+               mbx.nic_cfg.tns_mode = NIC_TNS_MODE;
+       else
+               mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
+
+       if (vf < MAX_LMAC) {
+               bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+               lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+               mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac);
+               if (mac)
+                       ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac);
+       }
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
+#endif
+       mbx.nic_cfg.node_id = nic->node;
+
+       mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+
+       nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* ACKs VF's mailbox message
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_ack(struct nicpf *nic, int vf)
+{
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_ACK;
+       nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* NACKs VF's mailbox message that PF is not able to
+ * complete the action
+ * @vf: VF to which ACK to be sent
+ */
+static void nic_mbx_send_nack(struct nicpf *nic, int vf)
+{
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_NACK;
+       nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Flush all in flight receive packets to memory and
+ * bring down an active RQ
+ */
+static int nic_rcv_queue_sw_sync(struct nicpf *nic)
+{
+       u16 timeout = ~0x00;
+
+       nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01);
+       /* Wait till sync cycle is finished */
+       while (timeout) {
+               if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1)
+                       break;
+               timeout--;
+       }
+       nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00);
+       if (!timeout) {
+               dev_err(&nic->pdev->dev, "Receive queue software sync failed");
+               return 1;
+       }
+       return 0;
+}
+
+/* Get BGX Rx/Tx stats and respond to VF's request */
+static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
+{
+       int bgx_idx, lmac;
+       union nic_mbx mbx = {};
+
+       bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]);
+
+       mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+       mbx.bgx_stats.vf_id = bgx->vf_id;
+       mbx.bgx_stats.rx = bgx->rx;
+       mbx.bgx_stats.idx = bgx->idx;
+       if (bgx->rx)
+               mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx,
+                                                           lmac, bgx->idx);
+       else
+               mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx,
+                                                           lmac, bgx->idx);
+       nic_send_msg_to_vf(nic, bgx->vf_id, &mbx);
+}
+
+/* Update hardware min/max frame size */
+static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
+{
+       if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) {
+               dev_err(&nic->pdev->dev,
+                       "Invalid MTU setting from VF%d rejected, should be between %d and %d\n",
+                          vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS);
+               return 1;
+       }
+       new_frs += ETH_HLEN;
+       if (new_frs <= nic->pkind.maxlen)
+               return 0;
+
+       nic->pkind.maxlen = new_frs;
+       nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind);
+       return 0;
+}
+
+/* Set minimum transmit packet size */
+static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
+{
+       int lmac;
+       u64 lmac_cfg;
+
+       /* Max value that can be set is 60 */
+       if (size > 60)
+               size = 60;
+
+       for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+               lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
+               lmac_cfg &= ~(0xF << 2);
+               lmac_cfg |= ((size / 4) << 2);
+               nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg);
+       }
+}
+
+/* Function to check number of LMACs present and set VF::LMAC mapping.
+ * Mapping will be used while initializing channels.
+ */
+static void nic_set_lmac_vf_mapping(struct nicpf *nic)
+{
+       unsigned bgx_map = bgx_get_map(nic->node);
+       int bgx, next_bgx_lmac = 0;
+       int lmac, lmac_cnt = 0;
+       u64 lmac_credit;
+
+       nic->num_vf_en = 0;
+       if (nic->flags & NIC_TNS_ENABLED) {
+               nic->num_vf_en = DEFAULT_NUM_VF_ENABLED;
+               return;
+       }
+
+       for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+               if (!(bgx_map & (1 << bgx)))
+                       continue;
+               lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+               for (lmac = 0; lmac < lmac_cnt; lmac++)
+                       nic->vf_lmac_map[next_bgx_lmac++] =
+                                               NIC_SET_VF_LMAC_MAP(bgx, lmac);
+               nic->num_vf_en += lmac_cnt;
+
+               /* Program LMAC credits */
+               lmac_credit = (1ull << 1); /* channel credit enable */
+               lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */
+               /* 48KB BGX Tx buffer size, each unit is of size 16bytes */
+               lmac_credit |= (((((48 * 1024) / lmac_cnt) -
+                               NIC_HW_MAX_FRS) / 16) << 12);
+               lmac = bgx * MAX_LMAC_PER_BGX;
+               for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++)
+                       nic_reg_write(nic,
+                                     NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
+                                     lmac_credit);
+       }
+}
+
+#define TNS_PORT0_BLOCK 6
+#define TNS_PORT1_BLOCK 7
+#define BGX0_BLOCK 8
+#define BGX1_BLOCK 9
+
+static void nic_init_hw(struct nicpf *nic)
+{
+       int i;
+
+       /* Reset NIC, in case the driver is repeatedly inserted and removed */
+       nic_reg_write(nic, NIC_PF_SOFT_RESET, 1);
+
+       /* Enable NIC HW block */
+       nic_reg_write(nic, NIC_PF_CFG, 0x3);
+
+       /* Enable backpressure */
+       nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
+
+       if (nic->flags & NIC_TNS_ENABLED) {
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+                             (NIC_TNS_MODE << 7) | TNS_PORT0_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+                             (NIC_TNS_MODE << 7) | TNS_PORT1_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+                             (1ULL << 63) | TNS_PORT0_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+                             (1ULL << 63) | TNS_PORT1_BLOCK);
+
+       } else {
+               /* Disable TNS mode on both interfaces */
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+                             (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+                             (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
+                             (1ULL << 63) | BGX0_BLOCK);
+               nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
+                             (1ULL << 63) | BGX1_BLOCK);
+       }
+
+       /* PKIND configuration */
+       nic->pkind.minlen = 0;
+       nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN;
+       nic->pkind.lenerr_en = 1;
+       nic->pkind.rx_hdr = 0;
+       nic->pkind.hdr_sl = 0;
+
+       for (i = 0; i < NIC_MAX_PKIND; i++)
+               nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3),
+                             *(u64 *)&nic->pkind);
+
+       nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS);
+
+       /* Timer config */
+       nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK);
+
+       /* Enable VLAN ethertype matching and stripping */
+       nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
+                     (2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+}
+
+/* Channel parse index configuration */
+static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
+{
+       u32 vnic, bgx, lmac, chan;
+       u32 padd, cpi_count = 0;
+       u64 cpi_base, cpi, rssi_base, rssi;
+       u8  qset, rq_idx = 0;
+
+       vnic = cfg->vf_id;
+       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
+
+       chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
+       cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
+       rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+
+       /* Rx channel configuration */
+       nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
+                     (1ull << 63) | (vnic << 0));
+       nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3),
+                     ((u64)cfg->cpi_alg << 62) | (cpi_base << 48));
+
+       if (cfg->cpi_alg == CPI_ALG_NONE)
+               cpi_count = 1;
+       else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */
+               cpi_count = 8;
+       else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */
+               cpi_count = 16;
+       else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */
+               cpi_count = NIC_MAX_CPI_PER_LMAC;
+
+       /* RSS Qset, Qidx mapping */
+       qset = cfg->vf_id;
+       rssi = rssi_base;
+       for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) {
+               nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+                             (qset << 3) | rq_idx);
+               rq_idx++;
+       }
+
+       rssi = 0;
+       cpi = cpi_base;
+       for (; cpi < (cpi_base + cpi_count); cpi++) {
+               /* Determine port to channel adder */
+               if (cfg->cpi_alg != CPI_ALG_DIFF)
+                       padd = cpi % cpi_count;
+               else
+                       padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */
+
+               /* Leave RSS_SIZE as '0' to disable RSS */
+               nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3),
+                             (vnic << 24) | (padd << 16) | (rssi_base + rssi));
+
+               if ((rssi + 1) >= cfg->rq_cnt)
+                       continue;
+
+               if (cfg->cpi_alg == CPI_ALG_VLAN)
+                       rssi++;
+               else if (cfg->cpi_alg == CPI_ALG_VLAN16)
+                       rssi = ((cpi - cpi_base) & 0xe) >> 1;
+               else if (cfg->cpi_alg == CPI_ALG_DIFF)
+                       rssi = ((cpi - cpi_base) & 0x38) >> 3;
+       }
+       nic->cpi_base[cfg->vf_id] = cpi_base;
+}
+
+#ifdef VNIC_RSS_SUPPORT
+/* Responsds to VF with its RSS indirection table size */
+static void nic_send_rss_size(struct nicpf *nic, int vf)
+{
+       union nic_mbx mbx = {};
+       u64  *msg;
+
+       msg = (u64 *)&mbx;
+
+       mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
+       mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
+       nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
+/* Receive side scaling configuration
+ * configure:
+ * - RSS index
+ * - indir table i.e hash::RQ mapping
+ * - no of hash bits to consider
+ */
+static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg)
+{
+       u8  qset, idx = 0;
+       u64 cpi_cfg, cpi_base, rssi_base, rssi;
+
+       cpi_base = nic->cpi_base[cfg->vf_id];
+       cpi_cfg = nic_reg_read(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3));
+       rssi_base = (cpi_cfg & 0x0FFF) + cfg->tbl_offset;
+
+       rssi = rssi_base;
+       qset = cfg->vf_id;
+
+       for (; rssi < (rssi_base + cfg->tbl_len); rssi++) {
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               u8 svf = cfg->ind_tbl[idx] >> 3;
+
+               if (svf)
+                       qset = nic->vf_sqs[cfg->vf_id][svf - 1];
+               else
+                       qset = cfg->vf_id;
+#endif
+               nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3),
+                             (qset << 3) | (cfg->ind_tbl[idx] & 0x7));
+               idx++;
+       }
+
+       cpi_cfg &= ~(0xFULL << 20);
+       cpi_cfg |= (cfg->hash_bits << 20);
+       nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3), cpi_cfg);
+}
+#endif
+
+/* 4 level transmit side scheduler configutation
+ * for TNS bypass mode
+ *
+ * Sample configuration for SQ0
+ * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
+ * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
+ * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
+ * VNIC3-SQ0 -> TL4(24)  -> TL3[6]   -> TL2[1]  -> TL1[0] -> BGX0
+ * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1
+ * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1
+ * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1
+ */
+static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
+                              struct sq_cfg_msg *sq)
+{
+       u32 bgx, lmac, chan;
+       u32 tl2, tl3, tl4;
+       u32 rr_quantum;
+       u8 sq_idx = sq->sq_num;
+       u8 pqs_vnic;
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       if (sq->sqs_mode)
+               pqs_vnic = nic->pqs_vf[vnic];
+       else
+#endif
+               pqs_vnic = vnic;
+
+       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[pqs_vnic]);
+
+       /* 24 bytes for FCS, IPG and preamble */
+       rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
+
+       tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+       tl4 += sq_idx;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       if (sq->sqs_mode)
+               tl4 += vnic * 8;
+#endif
+
+       tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
+       nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
+                     ((u64)vnic << NIC_QS_ID_SHIFT) |
+                     ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
+       nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3),
+                     ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
+
+       nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
+       chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
+       nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+       /* Enable backpressure on the channel */
+       nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
+
+       tl2 = tl3 >> 2;
+       nic_reg_write(nic, NIC_PF_TL3A_0_63_CFG | (tl2 << 3), tl2);
+       nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
+       /* No priorities as of now */
+       nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
+}
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+/* Send primary nicvf pointer to secondary QS's VF */
+static void nic_send_pnicvf(struct nicpf *nic, int sqs)
+{
+       union nic_mbx mbx = {};
+
+       mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+       mbx.nicvf.nicvf = nic->nicvf[nic->pqs_vf[sqs]];
+       nic_send_msg_to_vf(nic, sqs, &mbx);
+}
+
+/* Send SQS's nicvf pointer to primary QS's VF */
+static void nic_send_snicvf(struct nicpf *nic, struct nicvf_ptr *nicvf)
+{
+       union nic_mbx mbx = {};
+       int sqs_id = nic->vf_sqs[nicvf->vf_id][nicvf->sqs_id];
+
+       mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+       mbx.nicvf.sqs_id = nicvf->sqs_id;
+       mbx.nicvf.nicvf = nic->nicvf[sqs_id];
+       nic_send_msg_to_vf(nic, nicvf->vf_id, &mbx);
+}
+
+/* Find next available Qset that can be assigned as a
+ * secondary Qset to a VF.
+ */
+static int nic_nxt_avail_sqs(struct nicpf *nic)
+{
+       int sqs;
+
+       for (sqs = 0; sqs < nic->num_sqs_en; sqs++) {
+               if (!nic->sqs_used[sqs])
+                       nic->sqs_used[sqs] = true;
+               else
+                       continue;
+               return sqs + nic->num_vf_en;
+       }
+       return -1;
+}
+
+/* Allocate additional Qsets for requested VF */
+static void nic_alloc_sqs(struct nicpf *nic, struct sqs_alloc *sqs)
+{
+       union nic_mbx mbx = {};
+       int idx, alloc_qs = 0;
+       int sqs_id;
+
+       if (!nic->num_sqs_en)
+               goto send_mbox;
+
+       for (idx = 0; idx < sqs->qs_count; idx++) {
+               sqs_id = nic_nxt_avail_sqs(nic);
+               if (sqs_id < 0)
+                       break;
+               nic->vf_sqs[sqs->vf_id][idx] = sqs_id;
+               nic->pqs_vf[sqs_id] = sqs->vf_id;
+               alloc_qs++;
+       }
+
+send_mbox:
+       mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+       mbx.sqs_alloc.vf_id = sqs->vf_id;
+       mbx.sqs_alloc.qs_count = alloc_qs;
+       nic_send_msg_to_vf(nic, sqs->vf_id, &mbx);
+}
+#endif
+
+static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
+{
+       int bgx_idx, lmac_idx;
+
+       if (lbk->vf_id > MAX_LMAC)
+               return -1;
+
+       bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+       lmac_idx = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
+
+       bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
+
+       return 0;
+}
+
+/* Interrupt handler to handle mailbox messages from VFs */
+static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
+{
+       union nic_mbx mbx = {};
+       u64 *mbx_data;
+       u64 mbx_addr;
+       u64 reg_addr;
+       u64 cfg;
+       int bgx, lmac;
+       int i;
+       int ret = 0;
+
+       nic->mbx_lock[vf] = true;
+
+       mbx_addr = nic_get_mbx_addr(vf);
+       mbx_data = (u64 *)&mbx;
+
+       for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
+               *mbx_data = nic_reg_read(nic, mbx_addr);
+               mbx_data++;
+               mbx_addr += sizeof(u64);
+       }
+
+       dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n",
+               __func__, mbx.msg.msg, vf);
+       switch (mbx.msg.msg) {
+       case NIC_MBOX_MSG_READY:
+               nic_mbx_send_ready(nic, vf);
+               if (vf < MAX_LMAC) {
+                       nic->link[vf] = 0;
+                       nic->duplex[vf] = 0;
+                       nic->speed[vf] = 0;
+               }
+               ret = 1;
+               break;
+       case NIC_MBOX_MSG_QS_CFG:
+               reg_addr = NIC_PF_QSET_0_127_CFG |
+                          (mbx.qs.num << NIC_QS_ID_SHIFT);
+               cfg = mbx.qs.cfg;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               /* Check if its a secondary Qset */
+               if (vf >= nic->num_vf_en) {
+                       cfg = cfg & (~0x7FULL);
+                       /* Assign this Qset to primary Qset's VF */
+                       cfg |= nic->pqs_vf[vf];
+               }
+#endif
+               nic_reg_write(nic, reg_addr, cfg);
+               break;
+       case NIC_MBOX_MSG_RQ_CFG:
+               reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG |
+                          (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+                          (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+               nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+               break;
+       case NIC_MBOX_MSG_RQ_BP_CFG:
+               reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
+                          (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+                          (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+               nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+               break;
+       case NIC_MBOX_MSG_RQ_SW_SYNC:
+               ret = nic_rcv_queue_sw_sync(nic);
+               break;
+       case NIC_MBOX_MSG_RQ_DROP_CFG:
+               reg_addr = NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG |
+                          (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
+                          (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
+               nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+               break;
+       case NIC_MBOX_MSG_SQ_CFG:
+               reg_addr = NIC_PF_QSET_0_127_SQ_0_7_CFG |
+                          (mbx.sq.qs_num << NIC_QS_ID_SHIFT) |
+                          (mbx.sq.sq_num << NIC_Q_NUM_SHIFT);
+               nic_reg_write(nic, reg_addr, mbx.sq.cfg);
+               nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
+               break;
+       case NIC_MBOX_MSG_SET_MAC:
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               if (vf >= nic->num_vf_en)
+                       break;
+#endif
+               lmac = mbx.mac.vf_id;
+               bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
+               lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
+               bgx_set_lmac_mac(nic->node, bgx, lmac, mbx.mac.mac_addr);
+               break;
+       case NIC_MBOX_MSG_SET_MAX_FRS:
+               ret = nic_update_hw_frs(nic, mbx.frs.max_frs,
+                                       mbx.frs.vf_id);
+               break;
+       case NIC_MBOX_MSG_CPI_CFG:
+               nic_config_cpi(nic, &mbx.cpi_cfg);
+               break;
+#ifdef VNIC_RSS_SUPPORT
+       case NIC_MBOX_MSG_RSS_SIZE:
+               nic_send_rss_size(nic, vf);
+               goto unlock;
+       case NIC_MBOX_MSG_RSS_CFG:
+       case NIC_MBOX_MSG_RSS_CFG_CONT:
+               nic_config_rss(nic, &mbx.rss_cfg);
+               break;
+#endif
+       case NIC_MBOX_MSG_CFG_DONE:
+               /* Last message of VF config msg sequence */
+               nic->vf_enabled[vf] = true;
+               goto unlock;
+       case NIC_MBOX_MSG_SHUTDOWN:
+               /* First msg in VF teardown sequence */
+               nic->vf_enabled[vf] = false;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               if (vf >= nic->num_vf_en)
+                       nic->sqs_used[vf - nic->num_vf_en] = false;
+               nic->pqs_vf[vf] = 0;
+#endif
+               break;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       case NIC_MBOX_MSG_ALLOC_SQS:
+               nic_alloc_sqs(nic, &mbx.sqs_alloc);
+               goto unlock;
+       case NIC_MBOX_MSG_NICVF_PTR:
+               nic->nicvf[vf] = mbx.nicvf.nicvf;
+               break;
+       case NIC_MBOX_MSG_PNICVF_PTR:
+               nic_send_pnicvf(nic, vf);
+               goto unlock;
+       case NIC_MBOX_MSG_SNICVF_PTR:
+               nic_send_snicvf(nic, &mbx.nicvf);
+               goto unlock;
+#endif
+       case NIC_MBOX_MSG_BGX_STATS:
+               nic_get_bgx_stats(nic, &mbx.bgx_stats);
+               goto unlock;
+       case NIC_MBOX_MSG_LOOPBACK:
+               ret = nic_config_loopback(nic, &mbx.lbk);
+               break;
+       default:
+               dev_err(&nic->pdev->dev,
+                       "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
+               break;
+       }
+
+       if (!ret)
+               nic_mbx_send_ack(nic, vf);
+       else if (mbx.msg.msg != NIC_MBOX_MSG_READY)
+               nic_mbx_send_nack(nic, vf);
+unlock:
+       nic->mbx_lock[vf] = false;
+}
+
+static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
+{
+       u64 intr;
+       u8  vf, vf_per_mbx_reg = 64;
+
+       intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
+       dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
+       for (vf = 0; vf < vf_per_mbx_reg; vf++) {
+               if (intr & (1ULL << vf)) {
+                       dev_dbg(&nic->pdev->dev, "Intr from VF %d\n",
+                               vf + (mbx * vf_per_mbx_reg));
+
+                       nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg));
+                       nic_clear_mbx_intr(nic, vf, mbx);
+               }
+       }
+}
+
+static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq)
+{
+       struct nicpf *nic = (struct nicpf *)nic_irq;
+
+       nic_mbx_intr_handler(nic, 0);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq)
+{
+       struct nicpf *nic = (struct nicpf *)nic_irq;
+
+       nic_mbx_intr_handler(nic, 1);
+
+       return IRQ_HANDLED;
+}
+
+static int nic_enable_msix(struct nicpf *nic)
+{
+       int i, ret;
+
+       nic->num_vec = NIC_PF_MSIX_VECTORS;
+
+       for (i = 0; i < nic->num_vec; i++)
+               nic->msix_entries[i].entry = i;
+
+       ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
+       if (ret) {
+               dev_err(&nic->pdev->dev,
+                       "Request for #%d msix vectors failed\n",
+                          nic->num_vec);
+               return ret;
+       }
+
+       nic->msix_enabled = 1;
+       return 0;
+}
+
+static void nic_disable_msix(struct nicpf *nic)
+{
+       if (nic->msix_enabled) {
+               pci_disable_msix(nic->pdev);
+               nic->msix_enabled = 0;
+               nic->num_vec = 0;
+       }
+}
+
+static void nic_free_all_interrupts(struct nicpf *nic)
+{
+       int irq;
+
+       for (irq = 0; irq < nic->num_vec; irq++) {
+               if (nic->irq_allocated[irq])
+                       free_irq(nic->msix_entries[irq].vector, nic);
+               nic->irq_allocated[irq] = false;
+       }
+}
+
+static int nic_register_interrupts(struct nicpf *nic)
+{
+       int ret;
+
+       /* Enable MSI-X */
+       ret = nic_enable_msix(nic);
+       if (ret)
+               return ret;
+
+       /* Register mailbox interrupt handlers */
+       ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector,
+                         nic_mbx0_intr_handler, 0, "NIC Mbox0", nic);
+       if (ret)
+               goto fail;
+
+       nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true;
+
+       ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector,
+                         nic_mbx1_intr_handler, 0, "NIC Mbox1", nic);
+       if (ret)
+               goto fail;
+
+       nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true;
+
+       /* Enable mailbox interrupt */
+       nic_enable_mbx_intr(nic);
+       return 0;
+
+fail:
+       dev_err(&nic->pdev->dev, "Request irq failed\n");
+       nic_free_all_interrupts(nic);
+       return ret;
+}
+
+static void nic_unregister_interrupts(struct nicpf *nic)
+{
+       nic_free_all_interrupts(nic);
+       nic_disable_msix(nic);
+}
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+int nic_num_sqs_en(struct nicpf *nic, int vf_en)
+{
+       int pos = 0, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
+       u16 total_vf;
+
+       /* Check if its a multi-node environment */
+       if (nr_node_ids > 1)
+               sqs_per_vf = MAX_SQS_PER_VF;
+
+       pos = pci_find_ext_capability(nic->pdev, PCI_EXT_CAP_ID_SRIOV);
+       pci_read_config_word(nic->pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf);
+       return min(total_vf - vf_en, vf_en * sqs_per_vf);
+}
+#endif
+
+int nic_sriov_configure(struct pci_dev *pdev, int num_vfs_requested)
+{
+       struct nicpf *nic = pci_get_drvdata(pdev);
+       int vf_en;
+       int err;
+
+       if (nic->num_vf_en == num_vfs_requested)
+               return num_vfs_requested;
+
+       if (nic->flags & NIC_SRIOV_ENABLED) {
+               nic->flags &= ~NIC_SRIOV_ENABLED;
+       }
+
+       nic->num_vf_en = 0;
+       if (num_vfs_requested > MAX_NUM_VFS_SUPPORTED ||
+           num_vfs_requested < 0)
+               return -EINVAL;
+
+       if (num_vfs_requested) {
+               vf_en = num_vfs_requested;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               nic->num_sqs_en = nic_num_sqs_en(nic, num_vfs_requested);
+               vf_en += nic->num_sqs_en;
+#endif
+               nic->num_vf_en = num_vfs_requested;
+               nic->flags |= NIC_SRIOV_ENABLED;
+       }
+
+       return num_vfs_requested;
+}
+
+static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
+{
+       int pos = 0;
+       int vf_en;
+       int err;
+       u16 total_vf_cnt;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+       if (!pos) {
+               dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n");
+               return -ENODEV;
+       }
+
+       pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt);
+       if (total_vf_cnt < nic->num_vf_en)
+               nic->num_vf_en = total_vf_cnt;
+
+       if (!total_vf_cnt)
+               return 0;
+
+       vf_en = nic->num_vf_en;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       nic->num_sqs_en = nic_num_sqs_en(nic, nic->num_vf_en);
+       vf_en += nic->num_sqs_en;
+#endif
+
+       if (err) {
+               dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n",
+                       vf_en);
+               nic->num_vf_en = 0;
+               return err;
+       }
+
+       dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n",
+                vf_en);
+
+       nic->flags |= NIC_SRIOV_ENABLED;
+       return 0;
+}
+
+/* Poll for BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
+static void nic_poll_for_link(struct work_struct *work)
+{
+       union nic_mbx mbx = {};
+       struct nicpf *nic;
+       struct bgx_link_status link;
+       u8 vf, bgx, lmac;
+
+       nic = container_of(work, struct nicpf, dwork.work);
+
+       mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+       for (vf = 0; vf < nic->num_vf_en; vf++) {
+               /* Poll only if VF is UP */
+               if (!nic->vf_enabled[vf])
+                       continue;
+
+               /* Get BGX, LMAC indices for the VF */
+               bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+               lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+               /* Get interface link status */
+               bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+               /* Inform VF only if link status changed */
+               if (nic->link[vf] == link.link_up)
+                       continue;
+
+               if (!nic->mbx_lock[vf]) {
+                       nic->link[vf] = link.link_up;
+                       nic->duplex[vf] = link.duplex;
+                       nic->speed[vf] = link.speed;
+
+                       /* Send a mbox message to VF with current link status */
+                       mbx.link_status.link_up = link.link_up;
+                       mbx.link_status.duplex = link.duplex;
+                       mbx.link_status.speed = link.speed;
+                       nic_send_msg_to_vf(nic, vf, &mbx);
+               }
+       }
+       queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
+}
+
+static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       struct device *dev = &pdev->dev;
+       struct nicpf *nic;
+       int    err;
+
+       BUILD_BUG_ON(sizeof(union nic_mbx) > 16);
+
+       nic = devm_kzalloc(dev, sizeof(*nic), GFP_KERNEL);
+       if (!nic)
+               return -ENOMEM;
+
+       pci_set_drvdata(pdev, nic);
+
+       nic->pdev = pdev;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(dev, "Failed to enable PCI device\n");
+               pci_set_drvdata(pdev, NULL);
+               return err;
+       }
+
+       err = pci_request_regions(pdev, DRV_NAME);
+       if (err) {
+               dev_err(dev, "PCI request regions failed 0x%x\n", err);
+               goto err_disable_device;
+       }
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       if (err) {
+               dev_err(dev, "Unable to get usable DMA configuration\n");
+               goto err_release_regions;
+       }
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+       if (err) {
+               dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
+               goto err_release_regions;
+       }
+
+       /* MAP PF's configuration registers */
+       nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+       if (!nic->reg_base) {
+               dev_err(dev, "Cannot map config register space, aborting\n");
+               err = -ENOMEM;
+               goto err_release_regions;
+       }
+
+       pci_read_config_byte(pdev, PCI_REVISION_ID, &nic->rev_id);
+
+       nic->node = nic_get_node_id(pdev);
+
+       /* By default set NIC in TNS bypass mode */
+       nic->flags &= ~NIC_TNS_ENABLED;
+       nic_set_lmac_vf_mapping(nic);
+
+       /* Initialize hardware */
+       nic_init_hw(nic);
+
+       /* Set RSS TBL size for each VF */
+       nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+
+       /* Register interrupts */
+       err = nic_register_interrupts(nic);
+       if (err)
+               goto err_release_regions;
+
+       /* Configure SRIOV */
+       err = nic_sriov_init(pdev, nic);
+       if (err)
+               goto err_unregister_interrupts;
+
+       if (nic->flags & NIC_TNS_ENABLED)
+               return 0;
+
+       /* Register a physical link status poll fn() */
+       nic->check_link = alloc_workqueue("check_link_status",
+                                         WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+       if (!nic->check_link) {
+               err = -ENOMEM;
+               goto err_disable_sriov;
+       }
+
+       INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
+       queue_delayed_work(nic->check_link, &nic->dwork, 0);
+
+       return 0;
+
+err_disable_sriov:
+err_unregister_interrupts:
+       nic_unregister_interrupts(nic);
+err_release_regions:
+       pci_release_regions(pdev);
+err_disable_device:
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+       return err;
+}
+
+static void nic_remove(struct pci_dev *pdev)
+{
+       struct nicpf *nic = pci_get_drvdata(pdev);
+
+       if (nic->check_link) {
+               /* Destroy work Queue */
+               cancel_delayed_work(&nic->dwork);
+               flush_workqueue(nic->check_link);
+               destroy_workqueue(nic->check_link);
+       }
+
+       nic_unregister_interrupts(nic);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver nic_driver = {
+       .name = DRV_NAME,
+       .id_table = nic_id_table,
+       .probe = nic_probe,
+       .remove = nic_remove,
+       .sriov_configure = nic_sriov_configure,
+};
+
+static int __init nic_init_module(void)
+{
+       pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+       return pci_register_driver(&nic_driver);
+}
+
+static void __exit nic_cleanup_module(void)
+{
+       pci_unregister_driver(&nic_driver);
+}
+
+module_init(nic_init_module);
+module_exit(nic_cleanup_module);
diff --git a/sys/dev/vnic/nic_reg.h b/sys/dev/vnic/nic_reg.h
new file mode 100644 (file)
index 0000000..b66f7e3
--- /dev/null
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef NIC_REG_H
+#define NIC_REG_H
+
+#define   NIC_PF_REG_COUNT                     29573
+#define   NIC_VF_REG_COUNT                     249
+
+/* Physical function register offsets */
+#define   NIC_PF_CFG                           (0x0000)
+#define   NIC_PF_STATUS                                (0x0010)
+#define   NIC_PF_INTR_TIMER_CFG                        (0x0030)
+#define   NIC_PF_BIST_STATUS                   (0x0040)
+#define   NIC_PF_SOFT_RESET                    (0x0050)
+#define   NIC_PF_TCP_TIMER                     (0x0060)
+#define   NIC_PF_BP_CFG                                (0x0080)
+#define   NIC_PF_RRM_CFG                       (0x0088)
+#define   NIC_PF_CQM_CF                                (0x00A0)
+#define   NIC_PF_CNM_CF                                (0x00A8)
+#define   NIC_PF_CNM_STATUS                    (0x00B0)
+#define   NIC_PF_CQ_AVG_CFG                    (0x00C0)
+#define   NIC_PF_RRM_AVG_CFG                   (0x00C8)
+#define   NIC_PF_INTF_0_1_SEND_CFG             (0x0200)
+#define   NIC_PF_INTF_0_1_BP_CFG               (0x0208)
+#define   NIC_PF_INTF_0_1_BP_DIS_0_1           (0x0210)
+#define   NIC_PF_INTF_0_1_BP_SW_0_1            (0x0220)
+#define   NIC_PF_RBDR_BP_STATE_0_3             (0x0240)
+#define   NIC_PF_MAILBOX_INT                   (0x0410)
+#define   NIC_PF_MAILBOX_INT_W1S               (0x0430)
+#define   NIC_PF_MAILBOX_ENA_W1C               (0x0450)
+#define   NIC_PF_MAILBOX_ENA_W1S               (0x0470)
+#define   NIC_PF_RX_ETYPE_0_7                  (0x0500)
+#define   NIC_PF_PKIND_0_15_CFG                        (0x0600)
+#define   NIC_PF_ECC0_FLIP0                    (0x1000)
+#define   NIC_PF_ECC1_FLIP0                    (0x1008)
+#define   NIC_PF_ECC2_FLIP0                    (0x1010)
+#define   NIC_PF_ECC3_FLIP0                    (0x1018)
+#define   NIC_PF_ECC0_FLIP1                    (0x1080)
+#define   NIC_PF_ECC1_FLIP1                    (0x1088)
+#define   NIC_PF_ECC2_FLIP1                    (0x1090)
+#define   NIC_PF_ECC3_FLIP1                    (0x1098)
+#define   NIC_PF_ECC0_CDIS                     (0x1100)
+#define   NIC_PF_ECC1_CDIS                     (0x1108)
+#define   NIC_PF_ECC2_CDIS                     (0x1110)
+#define   NIC_PF_ECC3_CDIS                     (0x1118)
+#define   NIC_PF_BIST0_STATUS                  (0x1280)
+#define   NIC_PF_BIST1_STATUS                  (0x1288)
+#define   NIC_PF_BIST2_STATUS                  (0x1290)
+#define   NIC_PF_BIST3_STATUS                  (0x1298)
+#define   NIC_PF_ECC0_SBE_INT                  (0x2000)
+#define   NIC_PF_ECC0_SBE_INT_W1S              (0x2008)
+#define   NIC_PF_ECC0_SBE_ENA_W1C              (0x2010)
+#define   NIC_PF_ECC0_SBE_ENA_W1S              (0x2018)
+#define   NIC_PF_ECC0_DBE_INT                  (0x2100)
+#define   NIC_PF_ECC0_DBE_INT_W1S              (0x2108)
+#define   NIC_PF_ECC0_DBE_ENA_W1C              (0x2110)
+#define   NIC_PF_ECC0_DBE_ENA_W1S              (0x2118)
+#define   NIC_PF_ECC1_SBE_INT                  (0x2200)
+#define   NIC_PF_ECC1_SBE_INT_W1S              (0x2208)
+#define   NIC_PF_ECC1_SBE_ENA_W1C              (0x2210)
+#define   NIC_PF_ECC1_SBE_ENA_W1S              (0x2218)
+#define   NIC_PF_ECC1_DBE_INT                  (0x2300)
+#define   NIC_PF_ECC1_DBE_INT_W1S              (0x2308)
+#define   NIC_PF_ECC1_DBE_ENA_W1C              (0x2310)
+#define   NIC_PF_ECC1_DBE_ENA_W1S              (0x2318)
+#define   NIC_PF_ECC2_SBE_INT                  (0x2400)
+#define   NIC_PF_ECC2_SBE_INT_W1S              (0x2408)
+#define   NIC_PF_ECC2_SBE_ENA_W1C              (0x2410)
+#define   NIC_PF_ECC2_SBE_ENA_W1S              (0x2418)
+#define   NIC_PF_ECC2_DBE_INT                  (0x2500)
+#define   NIC_PF_ECC2_DBE_INT_W1S              (0x2508)
+#define   NIC_PF_ECC2_DBE_ENA_W1C              (0x2510)
+#define   NIC_PF_ECC2_DBE_ENA_W1S              (0x2518)
+#define   NIC_PF_ECC3_SBE_INT                  (0x2600)
+#define   NIC_PF_ECC3_SBE_INT_W1S              (0x2608)
+#define   NIC_PF_ECC3_SBE_ENA_W1C              (0x2610)
+#define   NIC_PF_ECC3_SBE_ENA_W1S              (0x2618)
+#define   NIC_PF_ECC3_DBE_INT                  (0x2700)
+#define   NIC_PF_ECC3_DBE_INT_W1S              (0x2708)
+#define   NIC_PF_ECC3_DBE_ENA_W1C              (0x2710)
+#define   NIC_PF_ECC3_DBE_ENA_W1S              (0x2718)
+#define   NIC_PF_CPI_0_2047_CFG                        (0x200000)
+#define   NIC_PF_RSSI_0_4097_RQ                        (0x220000)
+#define   NIC_PF_LMAC_0_7_CFG                  (0x240000)
+#define   NIC_PF_LMAC_0_7_SW_XOFF              (0x242000)
+#define   NIC_PF_LMAC_0_7_CREDIT               (0x244000)
+#define   NIC_PF_CHAN_0_255_TX_CFG             (0x400000)
+#define   NIC_PF_CHAN_0_255_RX_CFG             (0x420000)
+#define   NIC_PF_CHAN_0_255_SW_XOFF            (0x440000)
+#define   NIC_PF_CHAN_0_255_CREDIT             (0x460000)
+#define   NIC_PF_CHAN_0_255_RX_BP_CFG          (0x480000)
+#define   NIC_PF_SW_SYNC_RX                    (0x490000)
+#define   NIC_PF_SW_SYNC_RX_DONE               (0x490008)
+#define   NIC_PF_TL2_0_63_CFG                  (0x500000)
+#define   NIC_PF_TL2_0_63_PRI                  (0x520000)
+#define   NIC_PF_TL2_0_63_SH_STATUS            (0x580000)
+#define   NIC_PF_TL3A_0_63_CFG                 (0x5F0000)
+#define   NIC_PF_TL3_0_255_CFG                 (0x600000)
+#define   NIC_PF_TL3_0_255_CHAN                        (0x620000)
+#define   NIC_PF_TL3_0_255_PIR                 (0x640000)
+#define   NIC_PF_TL3_0_255_SW_XOFF             (0x660000)
+#define   NIC_PF_TL3_0_255_CNM_RATE            (0x680000)
+#define   NIC_PF_TL3_0_255_SH_STATUS           (0x6A0000)
+#define   NIC_PF_TL4A_0_255_CFG                        (0x6F0000)
+#define   NIC_PF_TL4_0_1023_CFG                        (0x800000)
+#define   NIC_PF_TL4_0_1023_SW_XOFF            (0x820000)
+#define   NIC_PF_TL4_0_1023_SH_STATUS          (0x840000)
+#define   NIC_PF_TL4A_0_1023_CNM_RATE          (0x880000)
+#define   NIC_PF_TL4A_0_1023_CNM_STATUS                (0x8A0000)
+#define   NIC_PF_VF_0_127_MAILBOX_0_1          (0x20002030)
+#define   NIC_PF_VNIC_0_127_TX_STAT_0_4                (0x20004000)
+#define   NIC_PF_VNIC_0_127_RX_STAT_0_13       (0x20004100)
+#define   NIC_PF_QSET_0_127_LOCK_0_15          (0x20006000)
+#define   NIC_PF_QSET_0_127_CFG                        (0x20010000)
+#define   NIC_PF_QSET_0_127_RQ_0_7_CFG         (0x20010400)
+#define   NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG    (0x20010420)
+#define   NIC_PF_QSET_0_127_RQ_0_7_BP_CFG      (0x20010500)
+#define   NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1    (0x20010600)
+#define   NIC_PF_QSET_0_127_SQ_0_7_CFG         (0x20010C00)
+#define   NIC_PF_QSET_0_127_SQ_0_7_CFG2                (0x20010C08)
+#define   NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1    (0x20010D00)
+
+#define   NIC_PF_MSIX_VEC_0_18_ADDR            (0x000000)
+#define   NIC_PF_MSIX_VEC_0_CTL                        (0x000008)
+#define   NIC_PF_MSIX_PBA_0                    (0x0F0000)
+
+/* Virtual function register offsets */
+#define   NIC_VNIC_CFG                         (0x000020)
+#define   NIC_VF_PF_MAILBOX_0_1                        (0x000130)
+#define   NIC_VF_INT                           (0x000200)
+#define   NIC_VF_INT_W1S                       (0x000220)
+#define   NIC_VF_ENA_W1C                       (0x000240)
+#define   NIC_VF_ENA_W1S                       (0x000260)
+
+#define   NIC_VNIC_RSS_CFG                     (0x0020E0)
+#define   NIC_VNIC_RSS_KEY_0_4                 (0x002200)
+#define   NIC_VNIC_TX_STAT_0_4                 (0x004000)
+#define   NIC_VNIC_RX_STAT_0_13                        (0x004100)
+#define   NIC_QSET_RQ_GEN_CFG                  (0x010010)
+
+#define   NIC_QSET_CQ_0_7_CFG                  (0x010400)
+#define   NIC_QSET_CQ_0_7_CFG2                 (0x010408)
+#define   NIC_QSET_CQ_0_7_THRESH               (0x010410)
+#define   NIC_QSET_CQ_0_7_BASE                 (0x010420)
+#define   NIC_QSET_CQ_0_7_HEAD                 (0x010428)
+#define   NIC_QSET_CQ_0_7_TAIL                 (0x010430)
+#define   NIC_QSET_CQ_0_7_DOOR                 (0x010438)
+#define   NIC_QSET_CQ_0_7_STATUS               (0x010440)
+#define   NIC_QSET_CQ_0_7_STATUS2              (0x010448)
+#define   NIC_QSET_CQ_0_7_DEBUG                        (0x010450)
+
+#define   NIC_QSET_RQ_0_7_CFG                  (0x010600)
+#define   NIC_QSET_RQ_0_7_STAT_0_1             (0x010700)
+
+#define   NIC_QSET_SQ_0_7_CFG                  (0x010800)
+#define   NIC_QSET_SQ_0_7_THRESH               (0x010810)
+#define   NIC_QSET_SQ_0_7_BASE                 (0x010820)
+#define   NIC_QSET_SQ_0_7_HEAD                 (0x010828)
+#define   NIC_QSET_SQ_0_7_TAIL                 (0x010830)
+#define   NIC_QSET_SQ_0_7_DOOR                 (0x010838)
+#define   NIC_QSET_SQ_0_7_STATUS               (0x010840)
+#define   NIC_QSET_SQ_0_7_DEBUG                        (0x010848)
+#define   NIC_QSET_SQ_0_7_CNM_CHG              (0x010860)
+#define   NIC_QSET_SQ_0_7_STAT_0_1             (0x010900)
+
+#define   NIC_QSET_RBDR_0_1_CFG                        (0x010C00)
+#define   NIC_QSET_RBDR_0_1_THRESH             (0x010C10)
+#define   NIC_QSET_RBDR_0_1_BASE               (0x010C20)
+#define   NIC_QSET_RBDR_0_1_HEAD               (0x010C28)
+#define   NIC_QSET_RBDR_0_1_TAIL               (0x010C30)
+#define   NIC_QSET_RBDR_0_1_DOOR               (0x010C38)
+#define   NIC_QSET_RBDR_0_1_STATUS0            (0x010C40)
+#define   NIC_QSET_RBDR_0_1_STATUS1            (0x010C48)
+#define   NIC_QSET_RBDR_0_1_PREFETCH_STATUS    (0x010C50)
+
+#define   NIC_VF_MSIX_VECTOR_0_19_ADDR         (0x000000)
+#define   NIC_VF_MSIX_VECTOR_0_19_CTL          (0x000008)
+#define   NIC_VF_MSIX_PBA                      (0x0F0000)
+
+/* Offsets within registers */
+#define   NIC_MSIX_VEC_SHIFT                   4
+#define   NIC_Q_NUM_SHIFT                      18
+#define   NIC_QS_ID_SHIFT                      21
+#define   NIC_VF_NUM_SHIFT                     21
+
+/* Port kind configuration register */
+struct pkind_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_42_63:22;
+       u64 hdr_sl:5;   /* Header skip length */
+       u64 rx_hdr:3;   /* TNS Receive header present */
+       u64 lenerr_en:1;/* L2 length error check enable */
+       u64 reserved_32_32:1;
+       u64 maxlen:16;  /* Max frame size */
+       u64 minlen:16;  /* Min frame size */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 minlen:16;
+       u64 maxlen:16;
+       u64 reserved_32_32:1;
+       u64 lenerr_en:1;
+       u64 rx_hdr:3;
+       u64 hdr_sl:5;
+       u64 reserved_42_63:22;
+#endif
+};
+
+#endif /* NIC_REG_H */
diff --git a/sys/dev/vnic/nicvf_main.c b/sys/dev/vnic/nicvf_main.c
new file mode 100644 (file)
index 0000000..9e9e0d5
--- /dev/null
@@ -0,0 +1,1667 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/log2.h>
+#include <linux/prefetch.h>
+#include <linux/irq.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "nicvf_queues.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME       "thunder-nicvf"
+#define DRV_VERSION    "1.0"
+
+/* Supported devices */
+static const struct pci_device_id nicvf_id_table[] = {
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+                        PCI_DEVICE_ID_THUNDER_NIC_VF,
+                        PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+                        PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
+                        PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+       { 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Sunil Goutham");
+MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, nicvf_id_table);
+
+static int debug = 0x00;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug message level bitmap");
+
+static int cpi_alg = CPI_ALG_NONE;
+module_param(cpi_alg, int, S_IRUGO);
+MODULE_PARM_DESC(cpi_alg,
+                "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
+
+static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
+{
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       if (nic->sqs_mode)
+               return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS);
+       else
+#endif
+               return qidx;
+}
+
+static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic,
+                                         struct sk_buff *skb)
+{
+       if (skb->len <= 64)
+               nic->drv_stats.rx_frames_64++;
+       else if (skb->len <= 127)
+               nic->drv_stats.rx_frames_127++;
+       else if (skb->len <= 255)
+               nic->drv_stats.rx_frames_255++;
+       else if (skb->len <= 511)
+               nic->drv_stats.rx_frames_511++;
+       else if (skb->len <= 1023)
+               nic->drv_stats.rx_frames_1023++;
+       else if (skb->len <= 1518)
+               nic->drv_stats.rx_frames_1518++;
+       else
+               nic->drv_stats.rx_frames_jumbo++;
+}
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver.  The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val)
+{
+       writeq_relaxed(val, nic->reg_base + offset);
+}
+
+u64 nicvf_reg_read(struct nicvf *nic, u64 offset)
+{
+       return readq_relaxed(nic->reg_base + offset);
+}
+
+void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
+                          u64 qidx, u64 val)
+{
+       void __iomem *addr = nic->reg_base + offset;
+
+       writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT));
+}
+
+u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx)
+{
+       void __iomem *addr = nic->reg_base + offset;
+
+       return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT));
+}
+
+/* VF -> PF mailbox communication */
+static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
+{
+       u64 *msg = (u64 *)mbx;
+
+       nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]);
+       nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]);
+}
+
+int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
+{
+       int timeout = NIC_MBOX_MSG_TIMEOUT;
+       int sleep = 10;
+
+       nic->pf_acked = false;
+       nic->pf_nacked = false;
+
+       nicvf_write_to_mbx(nic, mbx);
+
+       /* Wait for previous message to be acked, timeout 2sec */
+       while (!nic->pf_acked) {
+               if (nic->pf_nacked)
+                       return -EINVAL;
+               msleep(sleep);
+               if (nic->pf_acked)
+                       break;
+               timeout -= sleep;
+               if (!timeout) {
+                       netdev_err(nic->netdev,
+                                  "PF didn't ack to mbox msg %d from VF%d\n",
+                                  (mbx->msg.msg & 0xFF), nic->vf_id);
+                       return -EBUSY;
+               }
+       }
+       return 0;
+}
+
+/* Checks if VF is able to comminicate with PF
+* and also gets the VNIC number this VF is associated to.
+*/
+static int nicvf_check_pf_ready(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_READY;
+       if (nicvf_send_msg_to_pf(nic, &mbx)) {
+               netdev_err(nic->netdev,
+                          "PF didn't respond to READY msg\n");
+               return 0;
+       }
+
+       return 1;
+}
+
+static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
+{
+       if (bgx->rx)
+               nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats;
+       else
+               nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats;
+}
+
+static void  nicvf_handle_mbx_intr(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+       u64 *mbx_data;
+       u64 mbx_addr;
+       int i;
+
+       mbx_addr = NIC_VF_PF_MAILBOX_0_1;
+       mbx_data = (u64 *)&mbx;
+
+       for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
+               *mbx_data = nicvf_reg_read(nic, mbx_addr);
+               mbx_data++;
+               mbx_addr += sizeof(u64);
+       }
+
+       netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg);
+       switch (mbx.msg.msg) {
+       case NIC_MBOX_MSG_READY:
+               nic->pf_acked = true;
+               nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
+               nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
+               nic->node = mbx.nic_cfg.node_id;
+               ether_addr_copy(nic->netdev->dev_addr, mbx.nic_cfg.mac_addr);
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               nic->sqs_mode = mbx.nic_cfg.sqs_mode;
+#endif
+               nic->loopback_supported = mbx.nic_cfg.loopback_supported;
+               nic->link_up = false;
+               nic->duplex = 0;
+               nic->speed = 0;
+               break;
+       case NIC_MBOX_MSG_ACK:
+               nic->pf_acked = true;
+               break;
+       case NIC_MBOX_MSG_NACK:
+               nic->pf_nacked = true;
+               break;
+#ifdef VNIC_RSS_SUPPORT
+       case NIC_MBOX_MSG_RSS_SIZE:
+               nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size;
+               nic->pf_acked = true;
+               break;
+#endif
+       case NIC_MBOX_MSG_BGX_STATS:
+               nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
+               nic->pf_acked = true;
+               break;
+       case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+               nic->pf_acked = true;
+               nic->link_up = mbx.link_status.link_up;
+               nic->duplex = mbx.link_status.duplex;
+               nic->speed = mbx.link_status.speed;
+               if (nic->link_up) {
+                       netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
+                                   nic->netdev->name, nic->speed,
+                                   nic->duplex == DUPLEX_FULL ?
+                               "Full duplex" : "Half duplex");
+                       netif_carrier_on(nic->netdev);
+                       netif_tx_start_all_queues(nic->netdev);
+               } else {
+                       netdev_info(nic->netdev, "%s: Link is Down\n",
+                                   nic->netdev->name);
+                       netif_carrier_off(nic->netdev);
+                       netif_tx_stop_all_queues(nic->netdev);
+               }
+               break;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       case NIC_MBOX_MSG_ALLOC_SQS:
+               nic->sqs_count = mbx.sqs_alloc.qs_count;
+               nic->pf_acked = true;
+               break;
+       case NIC_MBOX_MSG_SNICVF_PTR:
+               /* Primary VF: make note of secondary VF's pointer
+                * to be used while packet transmission.
+                */
+               nic->snicvf[mbx.nicvf.sqs_id] =
+                       (struct nicvf *)mbx.nicvf.nicvf;
+               nic->pf_acked = true;
+               break;
+       case NIC_MBOX_MSG_PNICVF_PTR:
+               /* Secondary VF/Qset: make note of primary VF's pointer
+                * to be used while packet reception, to handover packet
+                * to primary VF's netdev.
+                */
+               nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
+               nic->pf_acked = true;
+               break;
+#endif
+       default:
+               netdev_err(nic->netdev,
+                          "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
+               break;
+       }
+       nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0);
+}
+
+static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev)
+{
+       union nic_mbx mbx = {};
+
+       mbx.mac.msg = NIC_MBOX_MSG_SET_MAC;
+       mbx.mac.vf_id = nic->vf_id;
+       ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr);
+
+       return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_config_cpi(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG;
+       mbx.cpi_cfg.vf_id = nic->vf_id;
+       mbx.cpi_cfg.cpi_alg = nic->cpi_alg;
+       mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt;
+
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+#ifdef VNIC_RSS_SUPPORT
+static void nicvf_get_rss_size(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
+       mbx.rss_size.vf_id = nic->vf_id;
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+void nicvf_config_rss(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+       struct nicvf_rss_info *rss = &nic->rss_info;
+       int ind_tbl_len = rss->rss_size;
+       int i, nextq = 0;
+
+       mbx.rss_cfg.vf_id = nic->vf_id;
+       mbx.rss_cfg.hash_bits = rss->hash_bits;
+       while (ind_tbl_len) {
+               mbx.rss_cfg.tbl_offset = nextq;
+               mbx.rss_cfg.tbl_len = min(ind_tbl_len,
+                                              RSS_IND_TBL_LEN_PER_MBX_MSG);
+               mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ?
+                         NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG;
+
+               for (i = 0; i < mbx.rss_cfg.tbl_len; i++)
+                       mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++];
+
+               nicvf_send_msg_to_pf(nic, &mbx);
+
+               ind_tbl_len -= mbx.rss_cfg.tbl_len;
+       }
+}
+
+void nicvf_set_rss_key(struct nicvf *nic)
+{
+       struct nicvf_rss_info *rss = &nic->rss_info;
+       u64 key_addr = NIC_VNIC_RSS_KEY_0_4;
+       int idx;
+
+       for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) {
+               nicvf_reg_write(nic, key_addr, rss->key[idx]);
+               key_addr += sizeof(u64);
+       }
+}
+
+static int nicvf_rss_init(struct nicvf *nic)
+{
+       struct nicvf_rss_info *rss = &nic->rss_info;
+       int idx;
+
+       nicvf_get_rss_size(nic);
+
+       if (cpi_alg != CPI_ALG_NONE) {
+               rss->enable = false;
+               rss->hash_bits = 0;
+               return 0;
+       }
+
+       rss->enable = true;
+
+       /* Using the HW reset value for now */
+       rss->key[0] = 0xFEED0BADFEED0BADULL;
+       rss->key[1] = 0xFEED0BADFEED0BADULL;
+       rss->key[2] = 0xFEED0BADFEED0BADULL;
+       rss->key[3] = 0xFEED0BADFEED0BADULL;
+       rss->key[4] = 0xFEED0BADFEED0BADULL;
+
+       nicvf_set_rss_key(nic);
+
+       rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
+       nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg);
+
+       rss->hash_bits =  ilog2(rounddown_pow_of_two(rss->rss_size));
+
+       for (idx = 0; idx < rss->rss_size; idx++)
+               rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx,
+                                                              nic->rx_queues);
+       nicvf_config_rss(nic);
+       return 1;
+}
+#endif
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+/* Request PF to allocate additional Qsets */
+static void nicvf_request_sqs(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+       int sqs;
+       int sqs_count = nic->sqs_count;
+       int rx_queues = 0, tx_queues = 0;
+
+       /* Only primary VF should request */
+       if (nic->sqs_mode ||  !nic->sqs_count)
+               return;
+
+       mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS;
+       mbx.sqs_alloc.vf_id = nic->vf_id;
+       mbx.sqs_alloc.qs_count = nic->sqs_count;
+       if (nicvf_send_msg_to_pf(nic, &mbx)) {
+               /* No response from PF */
+               nic->sqs_count = 0;
+               return;
+       }
+
+       /* Return if no Secondary Qsets available */
+       if (!nic->sqs_count)
+               return;
+
+       if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS)
+               rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS;
+       if (nic->tx_queues > MAX_SND_QUEUES_PER_QS)
+               tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS;
+
+       /* Set no of Rx/Tx queues in each of the SQsets */
+       for (sqs = 0; sqs < nic->sqs_count; sqs++) {
+               mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR;
+               mbx.nicvf.vf_id = nic->vf_id;
+               mbx.nicvf.sqs_id = sqs;
+               nicvf_send_msg_to_pf(nic, &mbx);
+
+               nic->snicvf[sqs]->sqs_id = sqs;
+               if (rx_queues > MAX_RCV_QUEUES_PER_QS) {
+                       nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS;
+                       rx_queues -= MAX_RCV_QUEUES_PER_QS;
+               } else {
+                       nic->snicvf[sqs]->qs->rq_cnt = rx_queues;
+                       rx_queues = 0;
+               }
+
+               if (tx_queues > MAX_SND_QUEUES_PER_QS) {
+                       nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS;
+                       tx_queues -= MAX_SND_QUEUES_PER_QS;
+               } else {
+                       nic->snicvf[sqs]->qs->sq_cnt = tx_queues;
+                       tx_queues = 0;
+               }
+
+               nic->snicvf[sqs]->qs->cq_cnt =
+               max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt);
+
+               /* Initialize secondary Qset's queues and its interrupts */
+               nicvf_open(nic->snicvf[sqs]->netdev);
+       }
+
+       /* Update stack with actual Rx/Tx queue count allocated */
+       if (sqs_count != nic->sqs_count)
+               nicvf_set_real_num_queues(nic->netdev,
+                                         nic->tx_queues, nic->rx_queues);
+}
+
+/* Send this Qset's nicvf pointer to PF.
+ * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs
+ * so that packets received by these Qsets can use primary VF's netdev
+ */
+static void nicvf_send_vf_struct(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR;
+       mbx.nicvf.sqs_mode = nic->sqs_mode;
+       mbx.nicvf.nicvf = (u64)nic;
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_get_primary_vf_struct(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR;
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+#endif
+
+int nicvf_set_real_num_queues(struct net_device *netdev,
+                             int tx_queues, int rx_queues)
+{
+       int err = 0;
+
+       err = netif_set_real_num_tx_queues(netdev, tx_queues);
+       if (err) {
+               netdev_err(netdev,
+                          "Failed to set no of Tx queues: %d\n", tx_queues);
+               return err;
+       }
+
+       err = netif_set_real_num_rx_queues(netdev, rx_queues);
+       if (err)
+               netdev_err(netdev,
+                          "Failed to set no of Rx queues: %d\n", rx_queues);
+       return err;
+}
+
+static int nicvf_init_resources(struct nicvf *nic)
+{
+       int err;
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+
+       /* Enable Qset */
+       nicvf_qset_config(nic, true);
+
+       /* Initialize queues and HW for data transfer */
+       err = nicvf_config_data_transfer(nic, true);
+       if (err) {
+               netdev_err(nic->netdev,
+                          "Failed to alloc/config VF's QSet resources\n");
+               return err;
+       }
+
+       /* Send VF config done msg to PF */
+       nicvf_write_to_mbx(nic, &mbx);
+
+       return 0;
+}
+
+static void nicvf_snd_pkt_handler(struct net_device *netdev,
+                                 struct cmp_queue *cq,
+                                 struct cqe_send_t *cqe_tx, int cqe_type)
+{
+       struct sk_buff *skb = NULL;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct snd_queue *sq;
+       struct sq_hdr_subdesc *hdr;
+
+       sq = &nic->qs->sq[cqe_tx->sq_idx];
+
+       hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr);
+       if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER)
+               return;
+
+       netdev_dbg(nic->netdev,
+                  "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n",
+                  __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
+                  cqe_tx->sqe_ptr, hdr->subdesc_cnt);
+
+       nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
+       nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
+       skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
+       /* For TSO offloaded packets only one head SKB needs to be freed */
+       if (skb) {
+               prefetch(skb);
+               dev_consume_skb_any(skb);
+               sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
+       }
+}
+
+static inline void nicvf_set_rxhash(struct net_device *netdev,
+                                   struct cqe_rx_t *cqe_rx,
+                                   struct sk_buff *skb)
+{
+       u8 hash_type;
+       u32 hash;
+
+       if (!(netdev->features & NETIF_F_RXHASH))
+               return;
+
+       switch (cqe_rx->rss_alg) {
+       case RSS_ALG_TCP_IP:
+       case RSS_ALG_UDP_IP:
+               hash_type = PKT_HASH_TYPE_L4;
+               hash = cqe_rx->rss_tag;
+               break;
+       case RSS_ALG_IP:
+               hash_type = PKT_HASH_TYPE_L3;
+               hash = cqe_rx->rss_tag;
+               break;
+       default:
+               hash_type = PKT_HASH_TYPE_NONE;
+               hash = 0;
+       }
+
+       skb_set_hash(skb, hash, hash_type);
+}
+
+static void nicvf_rcv_pkt_handler(struct net_device *netdev,
+                                 struct napi_struct *napi,
+                                 struct cmp_queue *cq,
+                                 struct cqe_rx_t *cqe_rx, int cqe_type)
+{
+       struct sk_buff *skb;
+       struct nicvf *nic = netdev_priv(netdev);
+       int err = 0;
+       int rq_idx;
+
+       rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx);
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       if (nic->sqs_mode) {
+               /* Use primary VF's 'nicvf' struct */
+               nic = nic->pnicvf;
+               netdev = nic->netdev;
+       }
+#endif
+       /* Check for errors */
+       err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
+       if (err && !cqe_rx->rb_cnt)
+               return;
+
+       skb = nicvf_get_rcv_skb(nic, cqe_rx);
+       if (!skb) {
+               netdev_dbg(nic->netdev, "Packet not received\n");
+               return;
+       }
+
+       if (netif_msg_pktdata(nic)) {
+               netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name,
+                           skb, skb->len);
+               print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
+                              skb->data, skb->len, true);
+       }
+
+       /* If error packet, drop it here */
+       if (err) {
+               dev_kfree_skb_any(skb);
+               return;
+       }
+
+       nicvf_set_rx_frame_cnt(nic, skb);
+
+       nicvf_set_rxhash(netdev, cqe_rx, skb);
+
+       skb_record_rx_queue(skb, rq_idx);
+       if (netdev->hw_features & NETIF_F_RXCSUM) {
+               /* HW by default verifies TCP/UDP/SCTP checksums */
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       } else {
+               skb_checksum_none_assert(skb);
+       }
+
+       skb->protocol = eth_type_trans(skb, netdev);
+
+       /* Check for stripped VLAN */
+       if (cqe_rx->vlan_found && cqe_rx->vlan_stripped)
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                      ntohs(cqe_rx->vlan_tci));
+
+       if (napi && (netdev->features & NETIF_F_GRO))
+               napi_gro_receive(napi, skb);
+       else
+               netif_receive_skb(skb);
+}
+
+static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
+                                struct napi_struct *napi, int budget)
+{
+       int processed_cqe, work_done = 0, tx_done = 0;
+       int cqe_count, cqe_head;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct queue_set *qs = nic->qs;
+       struct cmp_queue *cq = &qs->cq[cq_idx];
+       struct cqe_rx_t *cq_desc;
+       struct netdev_queue *txq;
+
+       spin_lock_bh(&cq->lock);
+loop:
+       processed_cqe = 0;
+       /* Get no of valid CQ entries to process */
+       cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx);
+       cqe_count &= CQ_CQE_COUNT;
+       if (!cqe_count)
+               goto done;
+
+       /* Get head of the valid CQ entries */
+       cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9;
+       cqe_head &= 0xFFFF;
+
+       netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n",
+                  __func__, cq_idx, cqe_count, cqe_head);
+       while (processed_cqe < cqe_count) {
+               /* Get the CQ descriptor */
+               cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head);
+               cqe_head++;
+               cqe_head &= (cq->dmem.q_len - 1);
+               /* Initiate prefetch for next descriptor */
+               prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head));
+
+               if ((work_done >= budget) && napi &&
+                   (cq_desc->cqe_type != CQE_TYPE_SEND)) {
+                       break;
+               }
+
+               netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n",
+                          cq_idx, cq_desc->cqe_type);
+               switch (cq_desc->cqe_type) {
+               case CQE_TYPE_RX:
+                       nicvf_rcv_pkt_handler(netdev, napi, cq,
+                                             cq_desc, CQE_TYPE_RX);
+                       work_done++;
+               break;
+               case CQE_TYPE_SEND:
+                       nicvf_snd_pkt_handler(netdev, cq,
+                                             (void *)cq_desc, CQE_TYPE_SEND);
+                       tx_done++;
+               break;
+               case CQE_TYPE_INVALID:
+               case CQE_TYPE_RX_SPLIT:
+               case CQE_TYPE_RX_TCP:
+               case CQE_TYPE_SEND_PTP:
+                       /* Ignore for now */
+               break;
+               }
+               processed_cqe++;
+       }
+       netdev_dbg(nic->netdev,
+                  "%s CQ%d processed_cqe %d work_done %d budget %d\n",
+                  __func__, cq_idx, processed_cqe, work_done, budget);
+
+       /* Ring doorbell to inform H/W to reuse processed CQEs */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR,
+                             cq_idx, processed_cqe);
+
+       if ((work_done < budget) && napi)
+               goto loop;
+
+done:
+       /* Wakeup TXQ if its stopped earlier due to SQ full */
+       if (tx_done) {
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               netdev = nic->pnicvf->netdev;
+#endif
+               txq = netdev_get_tx_queue(netdev,
+                                         nicvf_netdev_qidx(nic, cq_idx));
+#ifdef VNIC_MULTI_QSET_SUPPORT
+               nic = nic->pnicvf;
+#endif
+               if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
+                       netif_tx_start_queue(txq);
+                       nic->drv_stats.txq_wake++;
+                       if (netif_msg_tx_err(nic))
+                               netdev_warn(netdev,
+                                           "%s: Transmit queue wakeup SQ%d\n",
+                                           netdev->name, cq_idx);
+               }
+       }
+
+       spin_unlock_bh(&cq->lock);
+       return work_done;
+}
+
+static int nicvf_poll(struct napi_struct *napi, int budget)
+{
+       u64  cq_head;
+       int  work_done = 0;
+       struct net_device *netdev = napi->dev;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct nicvf_cq_poll *cq;
+
+       cq = container_of(napi, struct nicvf_cq_poll, napi);
+       work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget);
+
+       if (work_done < budget) {
+               /* Slow packet rate, exit polling */
+               napi_complete(napi);
+               /* Re-enable interrupts */
+               cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD,
+                                              cq->cq_idx);
+               nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
+               nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD,
+                                     cq->cq_idx, cq_head);
+               nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx);
+       }
+       return work_done;
+}
+
+/* Qset error interrupt handler
+ *
+ * As of now only CQ errors are handled
+ */
+static void nicvf_handle_qs_err(unsigned long data)
+{
+       struct nicvf *nic = (struct nicvf *)data;
+       struct queue_set *qs = nic->qs;
+       int qidx;
+       u64 status;
+
+       netif_tx_disable(nic->netdev);
+
+       /* Check if it is CQ err */
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+               status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS,
+                                             qidx);
+               if (!(status & CQ_ERR_MASK))
+                       continue;
+               /* Process already queued CQEs and reconfig CQ */
+               nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+               nicvf_sq_disable(nic, qidx);
+               nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0);
+               nicvf_cmp_queue_config(nic, qs, qidx, true);
+               nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx);
+               nicvf_sq_enable(nic, &qs->sq[qidx], qidx);
+
+               nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
+       }
+
+       netif_tx_start_all_queues(nic->netdev);
+       /* Re-enable Qset error interrupt */
+       nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
+}
+
+static inline void nicvf_dump_intr_status(struct nicvf *nic)
+{
+       if (netif_msg_intr(nic))
+               netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
+                           nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+}
+
+static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
+{
+       struct nicvf *nic = (struct nicvf *)nicvf_irq;
+       u64 intr;
+
+       nicvf_dump_intr_status(nic);
+
+       intr = nicvf_reg_read(nic, NIC_VF_INT);
+       /* Check for spurious interrupt */
+       if (!(intr & NICVF_INTR_MBOX_MASK))
+               return IRQ_HANDLED;
+
+       nicvf_handle_mbx_intr(nic);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq)
+{
+       struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq;
+       struct nicvf *nic = cq_poll->nicvf;
+       int qidx = cq_poll->cq_idx;
+
+       nicvf_dump_intr_status(nic);
+
+       /* Disable interrupts */
+       nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+
+       /* Schedule NAPI */
+       napi_schedule(&cq_poll->napi);
+
+       /* Clear interrupt */
+       nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq)
+{
+       struct nicvf *nic = (struct nicvf *)nicvf_irq;
+       u8 qidx;
+
+
+       nicvf_dump_intr_status(nic);
+
+       /* Disable RBDR interrupt and schedule softirq */
+       for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
+               if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
+                       continue;
+               nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
+               tasklet_hi_schedule(&nic->rbdr_task);
+               /* Clear interrupt */
+               nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq)
+{
+       struct nicvf *nic = (struct nicvf *)nicvf_irq;
+
+       nicvf_dump_intr_status(nic);
+
+       /* Disable Qset err interrupt and schedule softirq */
+       nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
+       tasklet_hi_schedule(&nic->qs_err_task);
+       nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+       return IRQ_HANDLED;
+}
+
+static int nicvf_enable_msix(struct nicvf *nic)
+{
+       int ret, vec;
+
+       nic->num_vec = NIC_VF_MSIX_VECTORS;
+
+       for (vec = 0; vec < nic->num_vec; vec++)
+               nic->msix_entries[vec].entry = vec;
+
+       ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
+       if (ret) {
+               netdev_err(nic->netdev,
+                          "Req for #%d msix vectors failed\n", nic->num_vec);
+               return 0;
+       }
+       nic->msix_enabled = 1;
+       return 1;
+}
+
+static void nicvf_disable_msix(struct nicvf *nic)
+{
+       if (nic->msix_enabled) {
+               pci_disable_msix(nic->pdev);
+               nic->msix_enabled = 0;
+               nic->num_vec = 0;
+       }
+}
+
+static int nicvf_register_interrupts(struct nicvf *nic)
+{
+       int irq, ret = 0;
+       int vector;
+
+       for_each_cq_irq(irq)
+               sprintf(nic->irq_name[irq], "NICVF%d CQ%d",
+                       nic->vf_id, irq);
+
+       for_each_sq_irq(irq)
+               sprintf(nic->irq_name[irq], "NICVF%d SQ%d",
+                       nic->vf_id, irq - NICVF_INTR_ID_SQ);
+
+       for_each_rbdr_irq(irq)
+               sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
+                       nic->vf_id, irq - NICVF_INTR_ID_RBDR);
+
+       /* Register CQ interrupts */
+       for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
+               vector = nic->msix_entries[irq].vector;
+               ret = request_irq(vector, nicvf_intr_handler,
+                                 0, nic->irq_name[irq], nic->napi[irq]);
+               if (ret)
+                       goto err;
+               nic->irq_allocated[irq] = true;
+       }
+
+       /* Register RBDR interrupt */
+       for (irq = NICVF_INTR_ID_RBDR;
+            irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) {
+               vector = nic->msix_entries[irq].vector;
+               ret = request_irq(vector, nicvf_rbdr_intr_handler,
+                                 0, nic->irq_name[irq], nic);
+               if (ret)
+                       goto err;
+               nic->irq_allocated[irq] = true;
+       }
+
+       /* Register QS error interrupt */
+       sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
+               "NICVF%d Qset error", nic->vf_id);
+       irq = NICVF_INTR_ID_QS_ERR;
+       ret = request_irq(nic->msix_entries[irq].vector,
+                         nicvf_qs_err_intr_handler,
+                         0, nic->irq_name[irq], nic);
+       if (!ret)
+               nic->irq_allocated[irq] = true;
+
+err:
+       if (ret)
+               netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq);
+
+       return ret;
+}
+
+static void nicvf_unregister_interrupts(struct nicvf *nic)
+{
+       int irq;
+
+       /* Free registered interrupts */
+       for (irq = 0; irq < nic->num_vec; irq++) {
+               if (!nic->irq_allocated[irq])
+                       continue;
+
+               if (irq < NICVF_INTR_ID_SQ)
+                       free_irq(nic->msix_entries[irq].vector, nic->napi[irq]);
+               else
+                       free_irq(nic->msix_entries[irq].vector, nic);
+
+               nic->irq_allocated[irq] = false;
+       }
+
+       /* Disable MSI-X */
+       nicvf_disable_msix(nic);
+}
+
+/* Initialize MSIX vectors and register MISC interrupt.
+ * Send READY message to PF to check if its alive
+ */
+static int nicvf_register_misc_interrupt(struct nicvf *nic)
+{
+       int ret = 0;
+       int irq = NICVF_INTR_ID_MISC;
+
+       /* Return if mailbox interrupt is already registered */
+       if (nic->msix_enabled)
+               return 0;
+
+       /* Enable MSI-X */
+       if (!nicvf_enable_msix(nic))
+               return 1;
+
+       sprintf(nic->irq_name[irq], "%s Mbox", "NICVF");
+       /* Register Misc interrupt */
+       ret = request_irq(nic->msix_entries[irq].vector,
+                         nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic);
+
+       if (ret)
+               return ret;
+       nic->irq_allocated[irq] = true;
+
+       /* Enable mailbox interrupt */
+       nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0);
+
+       /* Check if VF is able to communicate with PF */
+       if (!nicvf_check_pf_ready(nic)) {
+               nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+               nicvf_unregister_interrupts(nic);
+               return 1;
+       }
+
+       return 0;
+}
+
+static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct nicvf *nic = netdev_priv(netdev);
+       int qid = skb_get_queue_mapping(skb);
+       struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
+
+       /* Check for minimum packet length */
+       if (skb->len <= ETH_HLEN) {
+               dev_kfree_skb(skb);
+               return NETDEV_TX_OK;
+       }
+
+       if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
+               netif_tx_stop_queue(txq);
+               nic->drv_stats.txq_stop++;
+               if (netif_msg_tx_err(nic))
+                       netdev_warn(netdev,
+                                   "%s: Transmit ring full, stopping SQ%d\n",
+                                   netdev->name, qid);
+               return NETDEV_TX_BUSY;
+       }
+
+       return NETDEV_TX_OK;
+}
+
+static inline void nicvf_free_cq_poll(struct nicvf *nic)
+{
+       struct nicvf_cq_poll *cq_poll = NULL;
+       int qidx;
+
+       for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
+               cq_poll = nic->napi[qidx];
+               if (!cq_poll)
+                       continue;
+               nic->napi[qidx] = NULL;
+               kfree(cq_poll);
+       }
+}
+
+int nicvf_stop(struct net_device *netdev)
+{
+       int irq, qidx;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct queue_set *qs = nic->qs;
+       struct nicvf_cq_poll *cq_poll = NULL;
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
+       nicvf_send_msg_to_pf(nic, &mbx);
+
+       netif_carrier_off(netdev);
+       netif_tx_stop_all_queues(nic->netdev);
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       /* Teardown secondary qsets first */
+       if (!nic->sqs_mode) {
+               for (qidx = 0; qidx < nic->sqs_count; qidx++) {
+                       if (!nic->snicvf[qidx])
+                               continue;
+                       nicvf_stop(nic->snicvf[qidx]->netdev);
+                       nic->snicvf[qidx] = NULL;
+               }
+       }
+#endif
+
+       /* Disable RBDR & QS error interrupts */
+       for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
+               nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
+               nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
+       }
+       nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
+       nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+       /* Wait for pending IRQ handlers to finish */
+       for (irq = 0; irq < nic->num_vec; irq++)
+               synchronize_irq(nic->msix_entries[irq].vector);
+
+       tasklet_kill(&nic->rbdr_task);
+       tasklet_kill(&nic->qs_err_task);
+       if (nic->rb_work_scheduled)
+               cancel_delayed_work_sync(&nic->rbdr_work);
+
+       for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) {
+               cq_poll = nic->napi[qidx];
+               if (!cq_poll)
+                       continue;
+               napi_synchronize(&cq_poll->napi);
+               /* CQ intr is enabled while napi_complete,
+                * so disable it now
+                */
+               nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
+               nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
+               napi_disable(&cq_poll->napi);
+               netif_napi_del(&cq_poll->napi);
+       }
+
+       netif_tx_disable(netdev);
+
+       /* Free resources */
+       nicvf_config_data_transfer(nic, false);
+
+       /* Disable HW Qset */
+       nicvf_qset_config(nic, false);
+
+       /* disable mailbox interrupt */
+       nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+
+       nicvf_unregister_interrupts(nic);
+
+       nicvf_free_cq_poll(nic);
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       /* Clear multiqset info */
+       nic->pnicvf = nic;
+       nic->sqs_count = 0;
+#endif
+
+       return 0;
+}
+
+int nicvf_open(struct net_device *netdev)
+{
+       int err, qidx;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct queue_set *qs = nic->qs;
+       struct nicvf_cq_poll *cq_poll = NULL;
+
+       nic->mtu = netdev->mtu;
+
+       netif_carrier_off(netdev);
+
+       err = nicvf_register_misc_interrupt(nic);
+       if (err)
+               return err;
+
+       /* Register NAPI handler for processing CQEs */
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+               cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL);
+               if (!cq_poll) {
+                       err = -ENOMEM;
+                       goto napi_del;
+               }
+               cq_poll->cq_idx = qidx;
+               cq_poll->nicvf = nic;
+               netif_napi_add(netdev, &cq_poll->napi, nicvf_poll,
+                              NAPI_POLL_WEIGHT);
+               napi_enable(&cq_poll->napi);
+               nic->napi[qidx] = cq_poll;
+       }
+
+       /* Check if we got MAC address from PF or else generate a radom MAC */
+       if (is_zero_ether_addr(netdev->dev_addr)) {
+               eth_hw_addr_random(netdev);
+               nicvf_hw_set_mac_addr(nic, netdev);
+       }
+
+       /* Init tasklet for handling Qset err interrupt */
+       tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err,
+                    (unsigned long)nic);
+
+       /* Init RBDR tasklet which will refill RBDR */
+       tasklet_init(&nic->rbdr_task, nicvf_rbdr_task,
+                    (unsigned long)nic);
+       INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work);
+
+       /* Configure CPI alorithm */
+       nic->cpi_alg = cpi_alg;
+       if (!nic->sqs_mode)
+               nicvf_config_cpi(nic);
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       nicvf_request_sqs(nic);
+       if (nic->sqs_mode)
+               nicvf_get_primary_vf_struct(nic);
+#endif
+
+#ifdef VNIC_RSS_SUPPORT
+       /* Configure receive side scaling */
+       if (!nic->sqs_mode)
+               nicvf_rss_init(nic);
+#endif
+
+       err = nicvf_register_interrupts(nic);
+       if (err)
+               goto cleanup;
+
+       /* Initialize the queues */
+       err = nicvf_init_resources(nic);
+       if (err)
+               goto cleanup;
+
+       /* Make sure queue initialization is written */
+       wmb();
+
+       nicvf_reg_write(nic, NIC_VF_INT, -1);
+       /* Enable Qset err interrupt */
+       nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
+
+       /* Enable completion queue interrupt */
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+               nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
+
+       /* Enable RBDR threshold interrupt */
+       for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+               nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
+
+       nic->drv_stats.txq_stop = 0;
+       nic->drv_stats.txq_wake = 0;
+
+       netif_carrier_on(netdev);
+       netif_tx_start_all_queues(netdev);
+
+       return 0;
+cleanup:
+       nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
+       nicvf_unregister_interrupts(nic);
+       tasklet_kill(&nic->qs_err_task);
+       tasklet_kill(&nic->rbdr_task);
+napi_del:
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+               cq_poll = nic->napi[qidx];
+               if (!cq_poll)
+                       continue;
+               napi_disable(&cq_poll->napi);
+               netif_napi_del(&cq_poll->napi);
+       }
+       nicvf_free_cq_poll(nic);
+       return err;
+}
+
+static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
+{
+       union nic_mbx mbx = {};
+
+       mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
+       mbx.frs.max_frs = mtu;
+       mbx.frs.vf_id = nic->vf_id;
+
+       return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct nicvf *nic = netdev_priv(netdev);
+
+       if (new_mtu > NIC_HW_MAX_FRS)
+               return -EINVAL;
+
+       if (new_mtu < NIC_HW_MIN_FRS)
+               return -EINVAL;
+
+       if (nicvf_update_hw_max_frs(nic, new_mtu))
+               return -EINVAL;
+       netdev->mtu = new_mtu;
+       nic->mtu = new_mtu;
+
+       return 0;
+}
+
+static int nicvf_set_mac_address(struct net_device *netdev, void *p)
+{
+       struct sockaddr *addr = p;
+       struct nicvf *nic = netdev_priv(netdev);
+
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
+
+       memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+
+       if (nic->msix_enabled)
+               if (nicvf_hw_set_mac_addr(nic, netdev))
+                       return -EBUSY;
+
+       return 0;
+}
+
+void nicvf_update_lmac_stats(struct nicvf *nic)
+{
+       int stat = 0;
+       union nic_mbx mbx = {};
+
+       if (!netif_running(nic->netdev))
+               return;
+
+       mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS;
+       mbx.bgx_stats.vf_id = nic->vf_id;
+       /* Rx stats */
+       mbx.bgx_stats.rx = 1;
+       while (stat < BGX_RX_STATS_COUNT) {
+               mbx.bgx_stats.idx = stat;
+               if (nicvf_send_msg_to_pf(nic, &mbx))
+                       return;
+               stat++;
+       }
+
+       stat = 0;
+
+       /* Tx stats */
+       mbx.bgx_stats.rx = 0;
+       while (stat < BGX_TX_STATS_COUNT) {
+               mbx.bgx_stats.idx = stat;
+               if (nicvf_send_msg_to_pf(nic, &mbx))
+                       return;
+               stat++;
+       }
+}
+
+void nicvf_update_stats(struct nicvf *nic)
+{
+       int qidx;
+       struct nicvf_hw_stats *stats = &nic->hw_stats;
+       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
+       struct queue_set *qs = nic->qs;
+
+#define GET_RX_STATS(reg) \
+       nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3))
+#define GET_TX_STATS(reg) \
+       nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3))
+
+       stats->rx_bytes = GET_RX_STATS(RX_OCTS);
+       stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
+       stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
+       stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
+       stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
+       stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
+       stats->rx_drop_red = GET_RX_STATS(RX_RED);
+       stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
+       stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
+       stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
+       stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
+       stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
+       stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
+       stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
+
+       stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS);
+       stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST);
+       stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST);
+       stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
+       stats->tx_drops = GET_TX_STATS(TX_DROP);
+
+       drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
+                                 stats->tx_bcast_frames_ok +
+                                 stats->tx_mcast_frames_ok;
+       drv_stats->rx_drops = stats->rx_drop_red +
+                             stats->rx_drop_overrun;
+       drv_stats->tx_drops = stats->tx_drops;
+
+       /* Update RQ and SQ stats */
+       for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+               nicvf_update_rq_stats(nic, qidx);
+       for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+               nicvf_update_sq_stats(nic, qidx);
+}
+
+static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev,
+                                           struct rtnl_link_stats64 *stats)
+{
+       struct nicvf *nic = netdev_priv(netdev);
+       struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
+       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
+
+       nicvf_update_stats(nic);
+
+       stats->rx_bytes = hw_stats->rx_bytes;
+       stats->rx_packets = drv_stats->rx_frames_ok;
+       stats->rx_dropped = drv_stats->rx_drops;
+       stats->multicast = hw_stats->rx_mcast_frames;
+
+       stats->tx_bytes = hw_stats->tx_bytes_ok;
+       stats->tx_packets = drv_stats->tx_frames_ok;
+       stats->tx_dropped = drv_stats->tx_drops;
+
+       return stats;
+}
+
+static void nicvf_tx_timeout(struct net_device *dev)
+{
+       struct nicvf *nic = netdev_priv(dev);
+
+       if (netif_msg_tx_err(nic))
+               netdev_warn(dev, "%s: Transmit timed out, resetting\n",
+                           dev->name);
+
+       schedule_work(&nic->reset_task);
+}
+
+static void nicvf_reset_task(struct work_struct *work)
+{
+       struct nicvf *nic;
+
+       nic = container_of(work, struct nicvf, reset_task);
+
+       if (!netif_running(nic->netdev))
+               return;
+
+       nicvf_stop(nic->netdev);
+       nicvf_open(nic->netdev);
+       nic->netdev->trans_start = jiffies;
+}
+
+static int nicvf_config_loopback(struct nicvf *nic,
+                                netdev_features_t features)
+{
+       union nic_mbx mbx = {};
+
+       mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK;
+       mbx.lbk.vf_id = nic->vf_id;
+       mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0;
+
+       return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static netdev_features_t nicvf_fix_features(struct net_device *netdev,
+                                           netdev_features_t features)
+{
+       struct nicvf *nic = netdev_priv(netdev);
+
+       if ((features & NETIF_F_LOOPBACK) &&
+           netif_running(netdev) && !nic->loopback_supported)
+               features &= ~NETIF_F_LOOPBACK;
+
+       return features;
+}
+
+static int nicvf_set_features(struct net_device *netdev,
+                             netdev_features_t features)
+{
+       struct nicvf *nic = netdev_priv(netdev);
+       netdev_features_t changed = features ^ netdev->features;
+
+       if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+               nicvf_config_vlan_stripping(nic, features);
+
+       if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
+               return nicvf_config_loopback(nic, features);
+
+       return 0;
+}
+
+static const struct net_device_ops nicvf_netdev_ops = {
+       .ndo_open               = nicvf_open,
+       .ndo_stop               = nicvf_stop,
+       .ndo_start_xmit         = nicvf_xmit,
+       .ndo_change_mtu         = nicvf_change_mtu,
+       .ndo_set_mac_address    = nicvf_set_mac_address,
+       .ndo_get_stats64        = nicvf_get_stats64,
+       .ndo_tx_timeout         = nicvf_tx_timeout,
+       .ndo_fix_features       = nicvf_fix_features,
+       .ndo_set_features       = nicvf_set_features,
+};
+
+static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       struct device *dev = &pdev->dev;
+       struct net_device *netdev;
+       struct nicvf *nic;
+       int    err, qcount;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(dev, "Failed to enable PCI device\n");
+               return err;
+       }
+
+       err = pci_request_regions(pdev, DRV_NAME);
+       if (err) {
+               dev_err(dev, "PCI request regions failed 0x%x\n", err);
+               goto err_disable_device;
+       }
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       if (err) {
+               dev_err(dev, "Unable to get usable DMA configuration\n");
+               goto err_release_regions;
+       }
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
+       if (err) {
+               dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
+               goto err_release_regions;
+       }
+
+       qcount = MAX_CMP_QUEUES_PER_QS;
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       /* Restrict multiqset support only for host bound VFs */
+       if (pdev->is_virtfn) {
+               /* Set max number of queues per VF */
+               qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
+               qcount = min(qcount,
+                            (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+       }
+#endif
+
+       netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
+       if (!netdev) {
+               err = -ENOMEM;
+               goto err_release_regions;
+       }
+
+       pci_set_drvdata(pdev, netdev);
+
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+
+       nic = netdev_priv(netdev);
+       nic->netdev = netdev;
+       nic->pdev = pdev;
+       nic->pnicvf = nic;
+       nic->max_queues = qcount;
+
+       /* MAP VF's configuration registers */
+       nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+       if (!nic->reg_base) {
+               dev_err(dev, "Cannot map config register space, aborting\n");
+               err = -ENOMEM;
+               goto err_free_netdev;
+       }
+
+       err = nicvf_set_qset_resources(nic);
+       if (err)
+               goto err_free_netdev;
+
+       /* Check if PF is alive and get MAC address for this VF */
+       err = nicvf_register_misc_interrupt(nic);
+       if (err)
+               goto err_free_netdev;
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       nicvf_send_vf_struct(nic);
+
+       /* Check if this VF is in QS only mode */
+       if (nic->sqs_mode)
+               return 0;
+#endif
+
+       err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues);
+       if (err)
+               goto err_unregister_interrupts;
+
+       netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+                              NETIF_F_GRO |
+                              NETIF_F_HW_VLAN_CTAG_RX);
+#ifdef VNIC_RSS_SUPPORT
+       netdev->hw_features |= NETIF_F_RXHASH;
+#endif
+
+       netdev->features |= netdev->hw_features;
+       netdev->hw_features |= NETIF_F_LOOPBACK;
+
+       netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM;
+
+       netdev->netdev_ops = &nicvf_netdev_ops;
+       netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
+
+       INIT_WORK(&nic->reset_task, nicvf_reset_task);
+
+       err = register_netdev(netdev);
+       if (err) {
+               dev_err(dev, "Failed to register netdevice\n");
+               goto err_unregister_interrupts;
+       }
+
+       nic->msg_enable = debug;
+
+       nicvf_set_ethtool_ops(netdev);
+
+       return 0;
+
+err_unregister_interrupts:
+       nicvf_unregister_interrupts(nic);
+err_free_netdev:
+       pci_set_drvdata(pdev, NULL);
+       free_netdev(netdev);
+err_release_regions:
+       pci_release_regions(pdev);
+err_disable_device:
+       pci_disable_device(pdev);
+       return err;
+}
+
+static void nicvf_remove(struct pci_dev *pdev)
+{
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct nicvf *nic = netdev_priv(netdev);
+       struct net_device *pnetdev = nic->pnicvf->netdev;
+
+       /* Check if this Qset is assigned to different VF.
+        * If yes, clean primary and all secondary Qsets.
+        */
+       if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
+               unregister_netdev(pnetdev);
+       nicvf_unregister_interrupts(nic);
+       pci_set_drvdata(pdev, NULL);
+       free_netdev(netdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+}
+
+static void nicvf_shutdown(struct pci_dev *pdev)
+{
+       nicvf_remove(pdev);
+}
+
+static struct pci_driver nicvf_driver = {
+       .name = DRV_NAME,
+       .id_table = nicvf_id_table,
+       .probe = nicvf_probe,
+       .remove = nicvf_remove,
+       .shutdown = nicvf_shutdown,
+};
+
+static int __init nicvf_init_module(void)
+{
+       pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+       return pci_register_driver(&nicvf_driver);
+}
+
+static void __exit nicvf_cleanup_module(void)
+{
+       pci_unregister_driver(&nicvf_driver);
+}
+
+module_init(nicvf_init_module);
+module_exit(nicvf_cleanup_module);
diff --git a/sys/dev/vnic/nicvf_queues.c b/sys/dev/vnic/nicvf_queues.c
new file mode 100644 (file)
index 0000000..9fa9024
--- /dev/null
@@ -0,0 +1,1467 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
+#include <net/tso.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "q_struct.h"
+#include "nicvf_queues.h"
+
+struct rbuf_info {
+       struct page *page;
+       void    *data;
+       u64     offset;
+};
+
+#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES))
+
+/* Poll a register for a specific value */
+static int nicvf_poll_reg(struct nicvf *nic, int qidx,
+                         u64 reg, int bit_pos, int bits, int val)
+{
+       u64 bit_mask;
+       u64 reg_val;
+       int timeout = 10;
+
+       bit_mask = (1ULL << bits) - 1;
+       bit_mask = (bit_mask << bit_pos);
+
+       while (timeout) {
+               reg_val = nicvf_queue_reg_read(nic, reg, qidx);
+               if (((reg_val & bit_mask) >> bit_pos) == val)
+                       return 0;
+               usleep_range(1000, 2000);
+               timeout--;
+       }
+       netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
+       return 1;
+}
+
+/* Allocate memory for a queue's descriptors */
+static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
+                                 int q_len, int desc_size, int align_bytes)
+{
+       dmem->q_len = q_len;
+       dmem->size = (desc_size * q_len) + align_bytes;
+       /* Save address, need it while freeing */
+       dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
+                                               &dmem->dma, GFP_KERNEL);
+       if (!dmem->unalign_base)
+               return -ENOMEM;
+
+       /* Align memory address for 'align_bytes' */
+       dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
+       dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
+       return 0;
+}
+
+/* Free queue's descriptor memory */
+static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
+{
+       if (!dmem)
+               return;
+
+       dma_free_coherent(&nic->pdev->dev, dmem->size,
+                         dmem->unalign_base, dmem->dma);
+       dmem->unalign_base = NULL;
+       dmem->base = NULL;
+}
+
+/* Allocate buffer for packet reception
+ * HW returns memory address where packet is DMA'ed but not a pointer
+ * into RBDR ring, so save buffer address at the start of fragment and
+ * align the start address to a cache aligned address
+ */
+static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
+                                        u32 buf_len, u64 **rbuf)
+{
+       u64 data;
+       struct rbuf_info *rinfo;
+       int order = get_order(buf_len);
+
+       /* Check if request can be accomodated in previous allocated page */
+       if (nic->rb_page) {
+               if ((nic->rb_page_offset + buf_len + buf_len) >
+                   (PAGE_SIZE << order)) {
+                       nic->rb_page = NULL;
+               } else {
+                       nic->rb_page_offset += buf_len;
+                       get_page(nic->rb_page);
+               }
+       }
+
+       /* Allocate a new page */
+       if (!nic->rb_page) {
+               nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+                                          order);
+               if (!nic->rb_page) {
+                       netdev_err(nic->netdev,
+                                  "Failed to allocate new rcv buffer\n");
+                       return -ENOMEM;
+               }
+               nic->rb_page_offset = 0;
+       }
+
+       data = (u64)page_address(nic->rb_page) + nic->rb_page_offset;
+
+       /* Align buffer addr to cache line i.e 128 bytes */
+       rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data));
+       /* Save page address for reference updation */
+       rinfo->page = nic->rb_page;
+       /* Store start address for later retrieval */
+       rinfo->data = (void *)data;
+       /* Store alignment offset */
+       rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data);
+
+       data += rinfo->offset;
+
+       /* Give next aligned address to hw for DMA */
+       *rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES);
+       return 0;
+}
+
+/* Retrieve actual buffer start address and build skb for received packet */
+static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
+                                          u64 rb_ptr, int len)
+{
+       struct sk_buff *skb;
+       struct rbuf_info *rinfo;
+
+       rb_ptr = (u64)phys_to_virt(rb_ptr);
+       /* Get buffer start address and alignment offset */
+       rinfo = GET_RBUF_INFO(rb_ptr);
+
+       /* Now build an skb to give to stack */
+       skb = build_skb(rinfo->data, RCV_FRAG_LEN);
+       if (!skb) {
+               put_page(rinfo->page);
+               return NULL;
+       }
+
+       /* Set correct skb->data */
+       skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES);
+
+       prefetch((void *)rb_ptr);
+       return skb;
+}
+
+/* Allocate RBDR ring and populate receive buffers */
+static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
+                           int ring_len, int buf_size)
+{
+       int idx;
+       u64 *rbuf;
+       struct rbdr_entry_t *desc;
+       int err;
+
+       err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
+                                    sizeof(struct rbdr_entry_t),
+                                    NICVF_RCV_BUF_ALIGN_BYTES);
+       if (err)
+               return err;
+
+       rbdr->desc = rbdr->dmem.base;
+       /* Buffer size has to be in multiples of 128 bytes */
+       rbdr->dma_size = buf_size;
+       rbdr->enable = true;
+       rbdr->thresh = RBDR_THRESH;
+
+       nic->rb_page = NULL;
+       for (idx = 0; idx < ring_len; idx++) {
+               err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
+                                            &rbuf);
+               if (err)
+                       return err;
+
+               desc = GET_RBDR_DESC(rbdr, idx);
+               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+       }
+       return 0;
+}
+
+/* Free RBDR ring and its receive buffers */
+static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
+{
+       int head, tail;
+       u64 buf_addr;
+       struct rbdr_entry_t *desc;
+       struct rbuf_info *rinfo;
+
+       if (!rbdr)
+               return;
+
+       rbdr->enable = false;
+       if (!rbdr->dmem.base)
+               return;
+
+       head = rbdr->head;
+       tail = rbdr->tail;
+
+       /* Free SKBs */
+       while (head != tail) {
+               desc = GET_RBDR_DESC(rbdr, head);
+               buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
+               rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr));
+               put_page(rinfo->page);
+               head++;
+               head &= (rbdr->dmem.q_len - 1);
+       }
+       /* Free SKB of tail desc */
+       desc = GET_RBDR_DESC(rbdr, tail);
+       buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
+       rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr));
+       put_page(rinfo->page);
+
+       /* Free RBDR ring */
+       nicvf_free_q_desc_mem(nic, &rbdr->dmem);
+}
+
+/* Refill receive buffer descriptors with new buffers.
+ */
+static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
+{
+       struct queue_set *qs = nic->qs;
+       int rbdr_idx = qs->rbdr_cnt;
+       int tail, qcount;
+       int refill_rb_cnt;
+       struct rbdr *rbdr;
+       struct rbdr_entry_t *desc;
+       u64 *rbuf;
+       int new_rb = 0;
+
+refill:
+       if (!rbdr_idx)
+               return;
+       rbdr_idx--;
+       rbdr = &qs->rbdr[rbdr_idx];
+       /* Check if it's enabled */
+       if (!rbdr->enable)
+               goto next_rbdr;
+
+       /* Get no of desc's to be refilled */
+       qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
+       qcount &= 0x7FFFF;
+       /* Doorbell can be ringed with a max of ring size minus 1 */
+       if (qcount >= (qs->rbdr_len - 1))
+               goto next_rbdr;
+       else
+               refill_rb_cnt = qs->rbdr_len - qcount - 1;
+
+       /* Start filling descs from tail */
+       tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
+       while (refill_rb_cnt) {
+               tail++;
+               tail &= (rbdr->dmem.q_len - 1);
+
+               if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf))
+                       break;
+
+               desc = GET_RBDR_DESC(rbdr, tail);
+               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+               refill_rb_cnt--;
+               new_rb++;
+       }
+
+       /* make sure all memory stores are done before ringing doorbell */
+       smp_wmb();
+
+       /* Check if buffer allocation failed */
+       if (refill_rb_cnt)
+               nic->rb_alloc_fail = true;
+       else
+               nic->rb_alloc_fail = false;
+
+       /* Notify HW */
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
+                             rbdr_idx, new_rb);
+next_rbdr:
+       /* Re-enable RBDR interrupts only if buffer allocation is success */
+       if (!nic->rb_alloc_fail && rbdr->enable)
+               nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
+
+       if (rbdr_idx)
+               goto refill;
+}
+
+/* Alloc rcv buffers in non-atomic mode for better success */
+void nicvf_rbdr_work(struct work_struct *work)
+{
+       struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
+
+       nicvf_refill_rbdr(nic, GFP_KERNEL);
+       if (nic->rb_alloc_fail)
+               schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
+       else
+               nic->rb_work_scheduled = false;
+}
+
+/* In Softirq context, alloc rcv buffers in atomic mode */
+void nicvf_rbdr_task(unsigned long data)
+{
+       struct nicvf *nic = (struct nicvf *)data;
+
+       nicvf_refill_rbdr(nic, GFP_ATOMIC);
+       if (nic->rb_alloc_fail) {
+               nic->rb_work_scheduled = true;
+               schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
+       }
+}
+
+/* Initialize completion queue */
+static int nicvf_init_cmp_queue(struct nicvf *nic,
+                               struct cmp_queue *cq, int q_len)
+{
+       int err;
+
+       err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
+                                    NICVF_CQ_BASE_ALIGN_BYTES);
+       if (err)
+               return err;
+
+       cq->desc = cq->dmem.base;
+       cq->thresh = CMP_QUEUE_CQE_THRESH;
+       nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
+
+       return 0;
+}
+
+static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
+{
+       if (!cq)
+               return;
+       if (!cq->dmem.base)
+               return;
+
+       nicvf_free_q_desc_mem(nic, &cq->dmem);
+}
+
+/* Initialize transmit queue */
+static int nicvf_init_snd_queue(struct nicvf *nic,
+                               struct snd_queue *sq, int q_len)
+{
+       int err;
+
+       err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
+                                    NICVF_SQ_BASE_ALIGN_BYTES);
+       if (err)
+               return err;
+
+       sq->desc = sq->dmem.base;
+       sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
+       if (!sq->skbuff)
+               return -ENOMEM;
+       sq->head = 0;
+       sq->tail = 0;
+       atomic_set(&sq->free_cnt, q_len - 1);
+       sq->thresh = SND_QUEUE_THRESH;
+
+       /* Preallocate memory for TSO segment's header */
+       sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
+                                         q_len * TSO_HEADER_SIZE,
+                                         &sq->tso_hdrs_phys, GFP_KERNEL);
+       if (!sq->tso_hdrs)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
+{
+       if (!sq)
+               return;
+       if (!sq->dmem.base)
+               return;
+
+       if (sq->tso_hdrs)
+               dma_free_coherent(&nic->pdev->dev,
+                                 sq->dmem.q_len * TSO_HEADER_SIZE,
+                                 sq->tso_hdrs, sq->tso_hdrs_phys);
+
+       kfree(sq->skbuff);
+       nicvf_free_q_desc_mem(nic, &sq->dmem);
+}
+
+static void nicvf_reclaim_snd_queue(struct nicvf *nic,
+                                   struct queue_set *qs, int qidx)
+{
+       /* Disable send queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
+       /* Check if SQ is stopped */
+       if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
+               return;
+       /* Reset send queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
+}
+
+static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
+                                   struct queue_set *qs, int qidx)
+{
+       union nic_mbx mbx = {};
+
+       /* Make sure all packets in the pipeline are written back into mem */
+       mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
+                                   struct queue_set *qs, int qidx)
+{
+       /* Disable timer threshold (doesn't get reset upon CQ reset */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
+       /* Disable completion queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
+       /* Reset completion queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
+}
+
+static void nicvf_reclaim_rbdr(struct nicvf *nic,
+                              struct rbdr *rbdr, int qidx)
+{
+       u64 tmp, fifo_state;
+       int timeout = 10;
+
+       /* Save head and tail pointers for feeing up buffers */
+       rbdr->head = nicvf_queue_reg_read(nic,
+                                         NIC_QSET_RBDR_0_1_HEAD,
+                                         qidx) >> 3;
+       rbdr->tail = nicvf_queue_reg_read(nic,
+                                         NIC_QSET_RBDR_0_1_TAIL,
+                                         qidx) >> 3;
+
+       /* If RBDR FIFO is in 'FAIL' state then do a reset first
+        * before relaiming.
+        */
+       fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
+       if (((fifo_state >> 62) & 0x03) == 0x3)
+               nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+                                     qidx, NICVF_RBDR_RESET);
+
+       /* Disable RBDR */
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
+       if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
+               return;
+       while (1) {
+               tmp = nicvf_queue_reg_read(nic,
+                                          NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
+                                          qidx);
+               if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
+                       break;
+               usleep_range(1000, 2000);
+               timeout--;
+               if (!timeout) {
+                       netdev_err(nic->netdev,
+                                  "Failed polling on prefetch status\n");
+                       return;
+               }
+       }
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+                             qidx, NICVF_RBDR_RESET);
+
+       if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
+               return;
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
+       if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
+               return;
+}
+
+void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
+{
+       u64 rq_cfg;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       int sqs = 0;
+#endif
+
+       rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
+
+       /* Enable first VLAN stripping */
+       if (features & NETIF_F_HW_VLAN_CTAG_RX)
+               rq_cfg |= (1ULL << 25);
+       else
+               rq_cfg &= ~(1ULL << 25);
+       nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       /* Configure Secondary Qsets, if any */
+       for (sqs = 0; sqs < nic->sqs_count; sqs++)
+               if (nic->snicvf[sqs])
+                       nicvf_queue_reg_write(nic->snicvf[sqs],
+                                             NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
+#endif
+}
+
+/* Configures receive queue */
+static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
+                                  int qidx, bool enable)
+{
+       union nic_mbx mbx = {};
+       struct rcv_queue *rq;
+       struct cmp_queue *cq;
+       struct rq_cfg rq_cfg;
+
+       rq = &qs->rq[qidx];
+       rq->enable = enable;
+
+       /* Disable receive queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
+
+       if (!rq->enable) {
+               nicvf_reclaim_rcv_queue(nic, qs, qidx);
+               return;
+       }
+
+       rq->cq_qs = qs->vnic_id;
+       rq->cq_idx = qidx;
+       rq->start_rbdr_qs = qs->vnic_id;
+       rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
+       rq->cont_rbdr_qs = qs->vnic_id;
+       rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
+       /* all writes of RBDR data to be loaded into L2 Cache as well*/
+       rq->caching = 1;
+
+       /* Send a mailbox msg to PF to config RQ */
+       mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
+       mbx.rq.qs_num = qs->vnic_id;
+       mbx.rq.rq_num = qidx;
+       mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
+                         (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
+                         (rq->cont_qs_rbdr_idx << 8) |
+                         (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
+       nicvf_send_msg_to_pf(nic, &mbx);
+
+       mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
+       mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0);
+       nicvf_send_msg_to_pf(nic, &mbx);
+
+       /* RQ drop config
+        * Enable CQ drop to reserve sufficient CQEs for all tx packets
+        */
+       mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
+       mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8);
+       nicvf_send_msg_to_pf(nic, &mbx);
+
+       nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
+       if (!nic->sqs_mode)
+               nicvf_config_vlan_stripping(nic, nic->netdev->features);
+
+       /* Enable Receive queue */
+       rq_cfg.ena = 1;
+       rq_cfg.tcp_ena = 0;
+       nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
+}
+
+/* Configures completion queue */
+void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
+                           int qidx, bool enable)
+{
+       struct cmp_queue *cq;
+       struct cq_cfg cq_cfg;
+
+       cq = &qs->cq[qidx];
+       cq->enable = enable;
+
+       if (!cq->enable) {
+               nicvf_reclaim_cmp_queue(nic, qs, qidx);
+               return;
+       }
+
+       /* Reset completion queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
+
+       if (!cq->enable)
+               return;
+
+       spin_lock_init(&cq->lock);
+       /* Set completion queue base address */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
+                             qidx, (u64)(cq->dmem.phys_base));
+
+       /* Enable Completion queue */
+       cq_cfg.ena = 1;
+       cq_cfg.reset = 0;
+       cq_cfg.caching = 0;
+       cq_cfg.qsize = CMP_QSIZE;
+       cq_cfg.avg_con = 0;
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
+
+       /* Set threshold value for interrupt generation */
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
+       nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
+                             qidx, nic->cq_coalesce_usecs);
+}
+
+/* Configures transmit queue */
+static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
+                                  int qidx, bool enable)
+{
+       union nic_mbx mbx = {};
+       struct snd_queue *sq;
+       struct sq_cfg sq_cfg;
+
+       sq = &qs->sq[qidx];
+       sq->enable = enable;
+
+       if (!sq->enable) {
+               nicvf_reclaim_snd_queue(nic, qs, qidx);
+               return;
+       }
+
+       /* Reset send queue */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
+
+       sq->cq_qs = qs->vnic_id;
+       sq->cq_idx = qidx;
+
+       /* Send a mailbox msg to PF to config SQ */
+       mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
+       mbx.sq.qs_num = qs->vnic_id;
+       mbx.sq.sq_num = qidx;
+       mbx.sq.sqs_mode = nic->sqs_mode;
+       mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
+       nicvf_send_msg_to_pf(nic, &mbx);
+
+       /* Set queue base address */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
+                             qidx, (u64)(sq->dmem.phys_base));
+
+       /* Enable send queue  & set queue size */
+       sq_cfg.ena = 1;
+       sq_cfg.reset = 0;
+       sq_cfg.ldwb = 0;
+       sq_cfg.qsize = SND_QSIZE;
+       sq_cfg.tstmp_bgx_intf = 0;
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
+
+       /* Set threshold value for interrupt generation */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
+}
+
+/* Configures receive buffer descriptor ring */
+static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
+                             int qidx, bool enable)
+{
+       struct rbdr *rbdr;
+       struct rbdr_cfg rbdr_cfg;
+
+       rbdr = &qs->rbdr[qidx];
+       nicvf_reclaim_rbdr(nic, rbdr, qidx);
+       if (!enable)
+               return;
+
+       /* Set descriptor base address */
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
+                             qidx, (u64)(rbdr->dmem.phys_base));
+
+       /* Enable RBDR  & set queue size */
+       /* Buffer size should be in multiples of 128 bytes */
+       rbdr_cfg.ena = 1;
+       rbdr_cfg.reset = 0;
+       rbdr_cfg.ldwb = 0;
+       rbdr_cfg.qsize = RBDR_SIZE;
+       rbdr_cfg.avg_con = 0;
+       rbdr_cfg.lines = rbdr->dma_size / 128;
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
+                             qidx, *(u64 *)&rbdr_cfg);
+
+       /* Notify HW */
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
+                             qidx, qs->rbdr_len - 1);
+
+       /* Set threshold value for interrupt generation */
+       nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
+                             qidx, rbdr->thresh - 1);
+}
+
+/* Requests PF to assign and enable Qset */
+void nicvf_qset_config(struct nicvf *nic, bool enable)
+{
+       union nic_mbx mbx = {};
+       struct queue_set *qs = nic->qs;
+       struct qs_cfg *qs_cfg;
+
+       if (!qs) {
+               netdev_warn(nic->netdev,
+                           "Qset is still not allocated, don't init queues\n");
+               return;
+       }
+
+       qs->enable = enable;
+       qs->vnic_id = nic->vf_id;
+
+       /* Send a mailbox msg to PF to config Qset */
+       mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
+       mbx.qs.num = qs->vnic_id;
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       mbx.qs.sqs_count = nic->sqs_count;
+#endif
+
+       mbx.qs.cfg = 0;
+       qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
+       if (qs->enable) {
+               qs_cfg->ena = 1;
+#ifdef __BIG_ENDIAN
+               qs_cfg->be = 1;
+#endif
+               qs_cfg->vnic = qs->vnic_id;
+       }
+       nicvf_send_msg_to_pf(nic, &mbx);
+}
+
+static void nicvf_free_resources(struct nicvf *nic)
+{
+       int qidx;
+       struct queue_set *qs = nic->qs;
+
+       /* Free receive buffer descriptor ring */
+       for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+               nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
+
+       /* Free completion queue */
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+               nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
+
+       /* Free send queue */
+       for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+               nicvf_free_snd_queue(nic, &qs->sq[qidx]);
+}
+
+static int nicvf_alloc_resources(struct nicvf *nic)
+{
+       int qidx;
+       struct queue_set *qs = nic->qs;
+
+       /* Alloc receive buffer descriptor ring */
+       for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
+               if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
+                                   DMA_BUFFER_LEN))
+                       goto alloc_fail;
+       }
+
+       /* Alloc send queue */
+       for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
+               if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len))
+                       goto alloc_fail;
+       }
+
+       /* Alloc completion queue */
+       for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
+               if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
+                       goto alloc_fail;
+       }
+
+       return 0;
+alloc_fail:
+       nicvf_free_resources(nic);
+       return -ENOMEM;
+}
+
+int nicvf_set_qset_resources(struct nicvf *nic)
+{
+       struct queue_set *qs;
+
+       qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
+       if (!qs)
+               return -ENOMEM;
+       nic->qs = qs;
+
+       /* Set count of each queue */
+       qs->rbdr_cnt = RBDR_CNT;
+#ifdef VNIC_RSS_SUPPORT
+       qs->rq_cnt = RCV_QUEUE_CNT;
+#else
+       qs->rq_cnt = 1;
+#endif
+       qs->sq_cnt = SND_QUEUE_CNT;
+       qs->cq_cnt = CMP_QUEUE_CNT;
+
+       /* Set queue lengths */
+       qs->rbdr_len = RCV_BUF_COUNT;
+       qs->sq_len = SND_QUEUE_LEN;
+       qs->cq_len = CMP_QUEUE_LEN;
+
+       nic->rx_queues = qs->rq_cnt;
+       nic->tx_queues = qs->sq_cnt;
+
+       return 0;
+}
+
+int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
+{
+       bool disable = false;
+       struct queue_set *qs = nic->qs;
+       int qidx;
+
+       if (!qs)
+               return 0;
+
+       if (enable) {
+               if (nicvf_alloc_resources(nic))
+                       return -ENOMEM;
+
+               for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+                       nicvf_snd_queue_config(nic, qs, qidx, enable);
+               for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+                       nicvf_cmp_queue_config(nic, qs, qidx, enable);
+               for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+                       nicvf_rbdr_config(nic, qs, qidx, enable);
+               for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+                       nicvf_rcv_queue_config(nic, qs, qidx, enable);
+       } else {
+               for (qidx = 0; qidx < qs->rq_cnt; qidx++)
+                       nicvf_rcv_queue_config(nic, qs, qidx, disable);
+               for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
+                       nicvf_rbdr_config(nic, qs, qidx, disable);
+               for (qidx = 0; qidx < qs->sq_cnt; qidx++)
+                       nicvf_snd_queue_config(nic, qs, qidx, disable);
+               for (qidx = 0; qidx < qs->cq_cnt; qidx++)
+                       nicvf_cmp_queue_config(nic, qs, qidx, disable);
+
+               nicvf_free_resources(nic);
+       }
+
+       return 0;
+}
+
+/* Get a free desc from SQ
+ * returns descriptor ponter & descriptor number
+ */
+static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
+{
+       int qentry;
+
+       qentry = sq->tail;
+       atomic_sub(desc_cnt, &sq->free_cnt);
+       sq->tail += desc_cnt;
+       sq->tail &= (sq->dmem.q_len - 1);
+
+       return qentry;
+}
+
+/* Free descriptor back to SQ for future use */
+void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
+{
+       atomic_add(desc_cnt, &sq->free_cnt);
+       sq->head += desc_cnt;
+       sq->head &= (sq->dmem.q_len - 1);
+}
+
+static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
+{
+       qentry++;
+       qentry &= (sq->dmem.q_len - 1);
+       return qentry;
+}
+
+void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
+{
+       u64 sq_cfg;
+
+       sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
+       sq_cfg |= NICVF_SQ_EN;
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
+       /* Ring doorbell so that H/W restarts processing SQEs */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
+}
+
+void nicvf_sq_disable(struct nicvf *nic, int qidx)
+{
+       u64 sq_cfg;
+
+       sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
+       sq_cfg &= ~NICVF_SQ_EN;
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
+}
+
+void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
+                             int qidx)
+{
+       u64 head, tail;
+       struct sk_buff *skb;
+       struct nicvf *nic = netdev_priv(netdev);
+       struct sq_hdr_subdesc *hdr;
+
+       head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
+       tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
+       while (sq->head != head) {
+               hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+               if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
+                       nicvf_put_sq_desc(sq, 1);
+                       continue;
+               }
+               skb = (struct sk_buff *)sq->skbuff[sq->head];
+               if (skb)
+                       dev_kfree_skb_any(skb);
+               atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
+               atomic64_add(hdr->tot_len,
+                            (atomic64_t *)&netdev->stats.tx_bytes);
+               nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
+       }
+}
+
+/* Get the number of SQ descriptors needed to xmit this skb */
+static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
+{
+       int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
+
+       if (skb_shinfo(skb)->gso_size) {
+               subdesc_cnt = nicvf_tso_count_subdescs(skb);
+               return subdesc_cnt;
+       }
+
+       if (skb_shinfo(skb)->nr_frags)
+               subdesc_cnt += skb_shinfo(skb)->nr_frags;
+
+       return subdesc_cnt;
+}
+
+/* Add SQ HEADER subdescriptor.
+ * First subdescriptor for every send descriptor.
+ */
+static inline void
+nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
+                        int subdesc_cnt, struct sk_buff *skb, int len)
+{
+       int proto;
+       struct sq_hdr_subdesc *hdr;
+
+       hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+       sq->skbuff[qentry] = (u64)skb;
+
+       memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+       hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+       /* Enable notification via CQE after processing SQE */
+       hdr->post_cqe = 1;
+       /* No of subdescriptors following this */
+       hdr->subdesc_cnt = subdesc_cnt;
+       hdr->tot_len = len;
+
+       /* Offload checksum calculation to HW */
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+               hdr->csum_l3 = 1; /* Enable IP csum calculation */
+               hdr->l3_offset = skb_network_offset(skb);
+               hdr->l4_offset = skb_transport_offset(skb);
+
+               proto = ip_hdr(skb)->protocol;
+               switch (proto) {
+               case IPPROTO_TCP:
+                       hdr->csum_l4 = SEND_L4_CSUM_TCP;
+                       break;
+               case IPPROTO_UDP:
+                       hdr->csum_l4 = SEND_L4_CSUM_UDP;
+                       break;
+               case IPPROTO_SCTP:
+                       hdr->csum_l4 = SEND_L4_CSUM_SCTP;
+                       break;
+               }
+       }
+}
+
+/* SQ GATHER subdescriptor
+ * Must follow HDR descriptor
+ */
+static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
+                                              int size, u64 data)
+{
+       struct sq_gather_subdesc *gather;
+
+       qentry &= (sq->dmem.q_len - 1);
+       gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
+
+       memset(gather, 0, SND_QUEUE_DESC_SIZE);
+       gather->subdesc_type = SQ_DESC_TYPE_GATHER;
+       gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
+       gather->size = size;
+       gather->addr = data;
+}
+
+/* Append an skb to a SQ for packet transfer. */
+int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
+{
+       int i, size;
+       int subdesc_cnt;
+       int sq_num, qentry;
+       struct queue_set *qs;
+       struct snd_queue *sq;
+
+       sq_num = skb_get_queue_mapping(skb);
+#ifdef VNIC_MULTI_QSET_SUPPORT
+       if (sq_num >= MAX_SND_QUEUES_PER_QS) {
+               /* Get secondary Qset's SQ structure */
+               i = sq_num / MAX_SND_QUEUES_PER_QS;
+               if (!nic->snicvf[i - 1]) {
+                       netdev_warn(nic->netdev,
+                                   "Secondary Qset#%d's ptr not initialized\n",
+                                   i - 1);
+                       return 1;
+               }
+               nic = (struct nicvf *)nic->snicvf[i - 1];
+               sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
+       }
+#endif
+
+       qs = nic->qs;
+       sq = &qs->sq[sq_num];
+
+       subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
+       if (subdesc_cnt > atomic_read(&sq->free_cnt))
+               goto append_fail;
+
+       qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
+
+       /* Add SQ header subdesc */
+       nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len);
+
+       /* Add SQ gather subdescs */
+       qentry = nicvf_get_nxt_sqentry(sq, qentry);
+       size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
+       nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+
+       /* Check for scattered buffer */
+       if (!skb_is_nonlinear(skb))
+               goto doorbell;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               const struct skb_frag_struct *frag;
+
+               frag = &skb_shinfo(skb)->frags[i];
+
+               qentry = nicvf_get_nxt_sqentry(sq, qentry);
+               size = skb_frag_size(frag);
+               nicvf_sq_add_gather_subdesc(sq, qentry, size,
+                                           virt_to_phys(
+                                           skb_frag_address(frag)));
+       }
+
+doorbell:
+       /* make sure all memory stores are done before ringing doorbell */
+       smp_wmb();
+
+       /* Inform HW to xmit new packet */
+       nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+                             sq_num, subdesc_cnt);
+       return 1;
+
+append_fail:
+       /* Use original PCI dev for debug log */
+       nic = nic->pnicvf;
+       netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
+       return 0;
+}
+
+static inline unsigned frag_num(unsigned i)
+{
+#ifdef __BIG_ENDIAN
+       return (i & ~3) + 3 - (i & 3);
+#else
+       return i;
+#endif
+}
+
+/* Returns SKB for a received packet */
+struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
+{
+       int frag;
+       int payload_len = 0;
+       struct sk_buff *skb = NULL;
+       struct sk_buff *skb_frag = NULL;
+       struct sk_buff *prev_frag = NULL;
+       u16 *rb_lens = NULL;
+       u64 *rb_ptrs = NULL;
+
+       rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
+       rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+
+       netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
+                  __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
+
+       for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
+               payload_len = rb_lens[frag_num(frag)];
+               if (!frag) {
+                       /* First fragment */
+                       skb = nicvf_rb_ptr_to_skb(nic,
+                                                 *rb_ptrs - cqe_rx->align_pad,
+                                                 payload_len);
+                       if (!skb)
+                               return NULL;
+                       skb_reserve(skb, cqe_rx->align_pad);
+                       skb_put(skb, payload_len);
+               } else {
+                       /* Add fragments */
+                       skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs,
+                                                      payload_len);
+                       if (!skb_frag) {
+                               dev_kfree_skb(skb);
+                               return NULL;
+                       }
+
+                       if (!skb_shinfo(skb)->frag_list)
+                               skb_shinfo(skb)->frag_list = skb_frag;
+                       else
+                               prev_frag->next = skb_frag;
+
+                       prev_frag = skb_frag;
+                       skb->len += payload_len;
+                       skb->data_len += payload_len;
+                       skb_frag->len = payload_len;
+               }
+               /* Next buffer pointer */
+               rb_ptrs++;
+       }
+       return skb;
+}
+
+/* Enable interrupt */
+void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+       u64 reg_val;
+
+       reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
+
+       switch (int_type) {
+       case NICVF_INTR_CQ:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+               break;
+       case NICVF_INTR_SQ:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+               break;
+       case NICVF_INTR_RBDR:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+               break;
+       case NICVF_INTR_PKT_DROP:
+               reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
+               break;
+       case NICVF_INTR_TCP_TIMER:
+               reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
+               break;
+       case NICVF_INTR_MBOX:
+               reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT);
+               break;
+       case NICVF_INTR_QS_ERR:
+               reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
+               break;
+       default:
+               netdev_err(nic->netdev,
+                          "Failed to enable interrupt: unknown type\n");
+               break;
+       }
+
+       nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val);
+}
+
+/* Disable interrupt */
+void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+       u64 reg_val = 0;
+
+       switch (int_type) {
+       case NICVF_INTR_CQ:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+               break;
+       case NICVF_INTR_SQ:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+               break;
+       case NICVF_INTR_RBDR:
+               reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+               break;
+       case NICVF_INTR_PKT_DROP:
+               reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
+               break;
+       case NICVF_INTR_TCP_TIMER:
+               reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
+               break;
+       case NICVF_INTR_MBOX:
+               reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT);
+               break;
+       case NICVF_INTR_QS_ERR:
+               reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
+               break;
+       default:
+               netdev_err(nic->netdev,
+                          "Failed to disable interrupt: unknown type\n");
+               break;
+       }
+
+       nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val);
+}
+
+/* Clear interrupt */
+void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
+{
+       u64 reg_val = 0;
+
+       switch (int_type) {
+       case NICVF_INTR_CQ:
+               reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+               break;
+       case NICVF_INTR_SQ:
+               reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+               break;
+       case NICVF_INTR_RBDR:
+               reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+               break;
+       case NICVF_INTR_PKT_DROP:
+               reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
+               break;
+       case NICVF_INTR_TCP_TIMER:
+               reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
+               break;
+       case NICVF_INTR_MBOX:
+               reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
+               break;
+       case NICVF_INTR_QS_ERR:
+               reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT);
+               break;
+       default:
+               netdev_err(nic->netdev,
+                          "Failed to clear interrupt: unknown type\n");
+               break;
+       }
+
+       nicvf_reg_write(nic, NIC_VF_INT, reg_val);
+}
+
+/* Check if interrupt is enabled */
+int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
+{
+       u64 reg_val;
+       u64 mask = 0xff;
+
+       reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
+
+       switch (int_type) {
+       case NICVF_INTR_CQ:
+               mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
+               break;
+       case NICVF_INTR_SQ:
+               mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
+               break;
+       case NICVF_INTR_RBDR:
+               mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
+               break;
+       case NICVF_INTR_PKT_DROP:
+               mask = NICVF_INTR_PKT_DROP_MASK;
+               break;
+       case NICVF_INTR_TCP_TIMER:
+               mask = NICVF_INTR_TCP_TIMER_MASK;
+               break;
+       case NICVF_INTR_MBOX:
+               mask = NICVF_INTR_MBOX_MASK;
+               break;
+       case NICVF_INTR_QS_ERR:
+               mask = NICVF_INTR_QS_ERR_MASK;
+               break;
+       default:
+               netdev_err(nic->netdev,
+                          "Failed to check interrupt enable: unknown type\n");
+               break;
+       }
+
+       return (reg_val & mask);
+}
+
+void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
+{
+       struct rcv_queue *rq;
+
+#define GET_RQ_STATS(reg) \
+       nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
+                           (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
+
+       rq = &nic->qs->rq[rq_idx];
+       rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
+       rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
+}
+
+void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
+{
+       struct snd_queue *sq;
+
+#define GET_SQ_STATS(reg) \
+       nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
+                           (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
+
+       sq = &nic->qs->sq[sq_idx];
+       sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
+       sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
+}
+
+/* Check for errors in the receive cmp.queue entry */
+int nicvf_check_cqe_rx_errs(struct nicvf *nic,
+                           struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
+{
+       struct nicvf_hw_stats *stats = &nic->hw_stats;
+       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
+
+       if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
+               drv_stats->rx_frames_ok++;
+               return 0;
+       }
+
+       if (netif_msg_rx_err(nic))
+               netdev_err(nic->netdev,
+                          "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
+                          nic->netdev->name,
+                          cqe_rx->err_level, cqe_rx->err_opcode);
+
+       switch (cqe_rx->err_opcode) {
+       case CQ_RX_ERROP_RE_PARTIAL:
+               stats->rx_bgx_truncated_pkts++;
+               break;
+       case CQ_RX_ERROP_RE_JABBER:
+               stats->rx_jabber_errs++;
+               break;
+       case CQ_RX_ERROP_RE_FCS:
+               stats->rx_fcs_errs++;
+               break;
+       case CQ_RX_ERROP_RE_RX_CTL:
+               stats->rx_bgx_errs++;
+               break;
+       case CQ_RX_ERROP_PREL2_ERR:
+               stats->rx_prel2_errs++;
+               break;
+       case CQ_RX_ERROP_L2_MAL:
+               stats->rx_l2_hdr_malformed++;
+               break;
+       case CQ_RX_ERROP_L2_OVERSIZE:
+               stats->rx_oversize++;
+               break;
+       case CQ_RX_ERROP_L2_UNDERSIZE:
+               stats->rx_undersize++;
+               break;
+       case CQ_RX_ERROP_L2_LENMISM:
+               stats->rx_l2_len_mismatch++;
+               break;
+       case CQ_RX_ERROP_L2_PCLP:
+               stats->rx_l2_pclp++;
+               break;
+       case CQ_RX_ERROP_IP_NOT:
+               stats->rx_ip_ver_errs++;
+               break;
+       case CQ_RX_ERROP_IP_CSUM_ERR:
+               stats->rx_ip_csum_errs++;
+               break;
+       case CQ_RX_ERROP_IP_MAL:
+               stats->rx_ip_hdr_malformed++;
+               break;
+       case CQ_RX_ERROP_IP_MALD:
+               stats->rx_ip_payload_malformed++;
+               break;
+       case CQ_RX_ERROP_IP_HOP:
+               stats->rx_ip_ttl_errs++;
+               break;
+       case CQ_RX_ERROP_L3_PCLP:
+               stats->rx_l3_pclp++;
+               break;
+       case CQ_RX_ERROP_L4_MAL:
+               stats->rx_l4_malformed++;
+               break;
+       case CQ_RX_ERROP_L4_CHK:
+               stats->rx_l4_csum_errs++;
+               break;
+       case CQ_RX_ERROP_UDP_LEN:
+               stats->rx_udp_len_errs++;
+               break;
+       case CQ_RX_ERROP_L4_PORT:
+               stats->rx_l4_port_errs++;
+               break;
+       case CQ_RX_ERROP_TCP_FLAG:
+               stats->rx_tcp_flag_errs++;
+               break;
+       case CQ_RX_ERROP_TCP_OFFSET:
+               stats->rx_tcp_offset_errs++;
+               break;
+       case CQ_RX_ERROP_L4_PCLP:
+               stats->rx_l4_pclp++;
+               break;
+       case CQ_RX_ERROP_RBDR_TRUNC:
+               stats->rx_truncated_pkts++;
+               break;
+       }
+
+       return 1;
+}
+
+/* Check for errors in the send cmp.queue entry */
+int nicvf_check_cqe_tx_errs(struct nicvf *nic,
+                           struct cmp_queue *cq, struct cqe_send_t *cqe_tx)
+{
+       struct cmp_queue_stats *stats = &cq->stats;
+
+       switch (cqe_tx->send_status) {
+       case CQ_TX_ERROP_GOOD:
+               stats->tx.good++;
+               return 0;
+       case CQ_TX_ERROP_DESC_FAULT:
+               stats->tx.desc_fault++;
+               break;
+       case CQ_TX_ERROP_HDR_CONS_ERR:
+               stats->tx.hdr_cons_err++;
+               break;
+       case CQ_TX_ERROP_SUBDC_ERR:
+               stats->tx.subdesc_err++;
+               break;
+       case CQ_TX_ERROP_IMM_SIZE_OFLOW:
+               stats->tx.imm_size_oflow++;
+               break;
+       case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
+               stats->tx.data_seq_err++;
+               break;
+       case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
+               stats->tx.mem_seq_err++;
+               break;
+       case CQ_TX_ERROP_LOCK_VIOL:
+               stats->tx.lock_viol++;
+               break;
+       case CQ_TX_ERROP_DATA_FAULT:
+               stats->tx.data_fault++;
+               break;
+       case CQ_TX_ERROP_TSTMP_CONFLICT:
+               stats->tx.tstmp_conflict++;
+               break;
+       case CQ_TX_ERROP_TSTMP_TIMEOUT:
+               stats->tx.tstmp_timeout++;
+               break;
+       case CQ_TX_ERROP_MEM_FAULT:
+               stats->tx.mem_fault++;
+               break;
+       case CQ_TX_ERROP_CK_OVERLAP:
+               stats->tx.csum_overlap++;
+               break;
+       case CQ_TX_ERROP_CK_OFLOW:
+               stats->tx.csum_overflow++;
+               break;
+       }
+
+       return 1;
+}
diff --git a/sys/dev/vnic/nicvf_queues.h b/sys/dev/vnic/nicvf_queues.h
new file mode 100644 (file)
index 0000000..d5842e9
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef NICVF_QUEUES_H
+#define NICVF_QUEUES_H
+
+#include <linux/netdevice.h>
+#include "q_struct.h"
+
+#define MAX_QUEUE_SET                  128
+#define MAX_RCV_QUEUES_PER_QS          8
+#define MAX_RCV_BUF_DESC_RINGS_PER_QS  2
+#define MAX_SND_QUEUES_PER_QS          8
+#define MAX_CMP_QUEUES_PER_QS          8
+
+/* VF's queue interrupt ranges */
+#define        NICVF_INTR_ID_CQ                0
+#define        NICVF_INTR_ID_SQ                8
+#define        NICVF_INTR_ID_RBDR              16
+#define        NICVF_INTR_ID_MISC              18
+#define        NICVF_INTR_ID_QS_ERR            19
+
+#define        for_each_cq_irq(irq)    \
+       for (irq = NICVF_INTR_ID_CQ; irq < NICVF_INTR_ID_SQ; irq++)
+#define        for_each_sq_irq(irq)    \
+       for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_RBDR; irq++)
+#define        for_each_rbdr_irq(irq)  \
+       for (irq = NICVF_INTR_ID_RBDR; irq < NICVF_INTR_ID_MISC; irq++)
+
+#define RBDR_SIZE0             0ULL /* 8K entries */
+#define RBDR_SIZE1             1ULL /* 16K entries */
+#define RBDR_SIZE2             2ULL /* 32K entries */
+#define RBDR_SIZE3             3ULL /* 64K entries */
+#define RBDR_SIZE4             4ULL /* 126K entries */
+#define RBDR_SIZE5             5ULL /* 256K entries */
+#define RBDR_SIZE6             6ULL /* 512K entries */
+
+#define SND_QUEUE_SIZE0                0ULL /* 1K entries */
+#define SND_QUEUE_SIZE1                1ULL /* 2K entries */
+#define SND_QUEUE_SIZE2                2ULL /* 4K entries */
+#define SND_QUEUE_SIZE3                3ULL /* 8K entries */
+#define SND_QUEUE_SIZE4                4ULL /* 16K entries */
+#define SND_QUEUE_SIZE5                5ULL /* 32K entries */
+#define SND_QUEUE_SIZE6                6ULL /* 64K entries */
+
+#define CMP_QUEUE_SIZE0                0ULL /* 1K entries */
+#define CMP_QUEUE_SIZE1                1ULL /* 2K entries */
+#define CMP_QUEUE_SIZE2                2ULL /* 4K entries */
+#define CMP_QUEUE_SIZE3                3ULL /* 8K entries */
+#define CMP_QUEUE_SIZE4                4ULL /* 16K entries */
+#define CMP_QUEUE_SIZE5                5ULL /* 32K entries */
+#define CMP_QUEUE_SIZE6                6ULL /* 64K entries */
+
+/* Default queue count per QS, its lengths and threshold values */
+#define RBDR_CNT               1
+#define RCV_QUEUE_CNT          8
+#define SND_QUEUE_CNT          8
+#define CMP_QUEUE_CNT          8 /* Max of RCV and SND qcount */
+
+#define SND_QSIZE              SND_QUEUE_SIZE2
+#define SND_QUEUE_LEN          (1ULL << (SND_QSIZE + 10))
+#define MAX_SND_QUEUE_LEN      (1ULL << (SND_QUEUE_SIZE6 + 10))
+#define SND_QUEUE_THRESH       2ULL
+#define MIN_SQ_DESC_PER_PKT_XMIT       2
+/* Since timestamp not enabled, otherwise 2 */
+#define MAX_CQE_PER_PKT_XMIT           1
+
+/* Keep CQ and SQ sizes same, if timestamping
+ * is enabled this equation will change.
+ */
+#define CMP_QSIZE              CMP_QUEUE_SIZE2
+#define CMP_QUEUE_LEN          (1ULL << (CMP_QSIZE + 10))
+#define CMP_QUEUE_CQE_THRESH   0
+#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */
+
+#define RBDR_SIZE              RBDR_SIZE0
+#define RCV_BUF_COUNT          (1ULL << (RBDR_SIZE + 13))
+#define MAX_RCV_BUF_COUNT      (1ULL << (RBDR_SIZE6 + 13))
+#define RBDR_THRESH            (RCV_BUF_COUNT / 2)
+#define DMA_BUFFER_LEN         2048 /* In multiples of 128bytes */
+#define RCV_FRAG_LEN   (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
+                        SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \
+                        (NICVF_RCV_BUF_ALIGN_BYTES * 2))
+#define RCV_DATA_OFFSET                NICVF_RCV_BUF_ALIGN_BYTES
+
+#define MAX_CQES_FOR_TX                ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
+                                MAX_CQE_PER_PKT_XMIT)
+/* Calculate number of CQEs to reserve for all SQEs.
+ * Its 1/256th level of CQ size.
+ * '+ 1' to account for pipelining
+ */
+#define RQ_CQ_DROP             ((256 / (CMP_QUEUE_LEN / \
+                                (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1)
+
+/* Descriptor size in bytes */
+#define SND_QUEUE_DESC_SIZE    16
+#define CMP_QUEUE_DESC_SIZE    512
+
+/* Buffer / descriptor alignments */
+#define NICVF_RCV_BUF_ALIGN            7
+#define NICVF_RCV_BUF_ALIGN_BYTES      (1ULL << NICVF_RCV_BUF_ALIGN)
+#define NICVF_CQ_BASE_ALIGN_BYTES      512  /* 9 bits */
+#define NICVF_SQ_BASE_ALIGN_BYTES      128  /* 7 bits */
+
+#define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES)  ALIGN(ADDR, ALIGN_BYTES)
+#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\
+       (NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES)
+#define NICVF_RCV_BUF_ALIGN_LEN(X)\
+       (NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X)
+
+/* Queue enable/disable */
+#define NICVF_SQ_EN            BIT_ULL(19)
+
+/* Queue reset */
+#define NICVF_CQ_RESET         BIT_ULL(41)
+#define NICVF_SQ_RESET         BIT_ULL(17)
+#define NICVF_RBDR_RESET       BIT_ULL(43)
+
+enum CQ_RX_ERRLVL_E {
+       CQ_ERRLVL_MAC,
+       CQ_ERRLVL_L2,
+       CQ_ERRLVL_L3,
+       CQ_ERRLVL_L4,
+};
+
+enum CQ_RX_ERROP_E {
+       CQ_RX_ERROP_RE_NONE = 0x0,
+       CQ_RX_ERROP_RE_PARTIAL = 0x1,
+       CQ_RX_ERROP_RE_JABBER = 0x2,
+       CQ_RX_ERROP_RE_FCS = 0x7,
+       CQ_RX_ERROP_RE_TERMINATE = 0x9,
+       CQ_RX_ERROP_RE_RX_CTL = 0xb,
+       CQ_RX_ERROP_PREL2_ERR = 0x1f,
+       CQ_RX_ERROP_L2_FRAGMENT = 0x20,
+       CQ_RX_ERROP_L2_OVERRUN = 0x21,
+       CQ_RX_ERROP_L2_PFCS = 0x22,
+       CQ_RX_ERROP_L2_PUNY = 0x23,
+       CQ_RX_ERROP_L2_MAL = 0x24,
+       CQ_RX_ERROP_L2_OVERSIZE = 0x25,
+       CQ_RX_ERROP_L2_UNDERSIZE = 0x26,
+       CQ_RX_ERROP_L2_LENMISM = 0x27,
+       CQ_RX_ERROP_L2_PCLP = 0x28,
+       CQ_RX_ERROP_IP_NOT = 0x41,
+       CQ_RX_ERROP_IP_CSUM_ERR = 0x42,
+       CQ_RX_ERROP_IP_MAL = 0x43,
+       CQ_RX_ERROP_IP_MALD = 0x44,
+       CQ_RX_ERROP_IP_HOP = 0x45,
+       CQ_RX_ERROP_L3_ICRC = 0x46,
+       CQ_RX_ERROP_L3_PCLP = 0x47,
+       CQ_RX_ERROP_L4_MAL = 0x61,
+       CQ_RX_ERROP_L4_CHK = 0x62,
+       CQ_RX_ERROP_UDP_LEN = 0x63,
+       CQ_RX_ERROP_L4_PORT = 0x64,
+       CQ_RX_ERROP_TCP_FLAG = 0x65,
+       CQ_RX_ERROP_TCP_OFFSET = 0x66,
+       CQ_RX_ERROP_L4_PCLP = 0x67,
+       CQ_RX_ERROP_RBDR_TRUNC = 0x70,
+};
+
+enum CQ_TX_ERROP_E {
+       CQ_TX_ERROP_GOOD = 0x0,
+       CQ_TX_ERROP_DESC_FAULT = 0x10,
+       CQ_TX_ERROP_HDR_CONS_ERR = 0x11,
+       CQ_TX_ERROP_SUBDC_ERR = 0x12,
+       CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80,
+       CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81,
+       CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82,
+       CQ_TX_ERROP_LOCK_VIOL = 0x83,
+       CQ_TX_ERROP_DATA_FAULT = 0x84,
+       CQ_TX_ERROP_TSTMP_CONFLICT = 0x85,
+       CQ_TX_ERROP_TSTMP_TIMEOUT = 0x86,
+       CQ_TX_ERROP_MEM_FAULT = 0x87,
+       CQ_TX_ERROP_CK_OVERLAP = 0x88,
+       CQ_TX_ERROP_CK_OFLOW = 0x89,
+       CQ_TX_ERROP_ENUM_LAST = 0x8a,
+};
+
+struct cmp_queue_stats {
+       struct tx_stats {
+               u64 good;
+               u64 desc_fault;
+               u64 hdr_cons_err;
+               u64 subdesc_err;
+               u64 imm_size_oflow;
+               u64 data_seq_err;
+               u64 mem_seq_err;
+               u64 lock_viol;
+               u64 data_fault;
+               u64 tstmp_conflict;
+               u64 tstmp_timeout;
+               u64 mem_fault;
+               u64 csum_overlap;
+               u64 csum_overflow;
+       } tx;
+} ____cacheline_aligned_in_smp;
+
+enum RQ_SQ_STATS {
+       RQ_SQ_STATS_OCTS,
+       RQ_SQ_STATS_PKTS,
+};
+
+struct rx_tx_queue_stats {
+       u64     bytes;
+       u64     pkts;
+} ____cacheline_aligned_in_smp;
+
+struct q_desc_mem {
+       dma_addr_t      dma;
+       u64             size;
+       u16             q_len;
+       dma_addr_t      phys_base;
+       void            *base;
+       void            *unalign_base;
+};
+
+struct rbdr {
+       bool            enable;
+       u32             dma_size;
+       u32             frag_len;
+       u32             thresh;         /* Threshold level for interrupt */
+       void            *desc;
+       u32             head;
+       u32             tail;
+       struct q_desc_mem   dmem;
+} ____cacheline_aligned_in_smp;
+
+struct rcv_queue {
+       bool            enable;
+       struct  rbdr    *rbdr_start;
+       struct  rbdr    *rbdr_cont;
+       bool            en_tcp_reassembly;
+       u8              cq_qs;  /* CQ's QS to which this RQ is assigned */
+       u8              cq_idx; /* CQ index (0 to 7) in the QS */
+       u8              cont_rbdr_qs;      /* Continue buffer ptrs - QS num */
+       u8              cont_qs_rbdr_idx;  /* RBDR idx in the cont QS */
+       u8              start_rbdr_qs;     /* First buffer ptrs - QS num */
+       u8              start_qs_rbdr_idx; /* RBDR idx in the above QS */
+       u8              caching;
+       struct          rx_tx_queue_stats stats;
+} ____cacheline_aligned_in_smp;
+
+struct cmp_queue {
+       bool            enable;
+       u16             thresh;
+       spinlock_t      lock;  /* lock to serialize processing CQEs */
+       void            *desc;
+       struct q_desc_mem   dmem;
+       struct cmp_queue_stats  stats;
+       int             irq;
+} ____cacheline_aligned_in_smp;
+
+struct snd_queue {
+       bool            enable;
+       u8              cq_qs;  /* CQ's QS to which this SQ is pointing */
+       u8              cq_idx; /* CQ index (0 to 7) in the above QS */
+       u16             thresh;
+       atomic_t        free_cnt;
+       u32             head;
+       u32             tail;
+       u64             *skbuff;
+       void            *desc;
+
+       struct q_desc_mem   dmem;
+       struct rx_tx_queue_stats stats;
+} ____cacheline_aligned_in_smp;
+
+struct queue_set {
+       bool            enable;
+       bool            be_en;
+       u8              vnic_id;
+       u8              rq_cnt;
+       u8              cq_cnt;
+       u64             cq_len;
+       u8              sq_cnt;
+       u64             sq_len;
+       u8              rbdr_cnt;
+       u64             rbdr_len;
+       struct  rcv_queue       rq[MAX_RCV_QUEUES_PER_QS];
+       struct  cmp_queue       cq[MAX_CMP_QUEUES_PER_QS];
+       struct  snd_queue       sq[MAX_SND_QUEUES_PER_QS];
+       struct  rbdr            rbdr[MAX_RCV_BUF_DESC_RINGS_PER_QS];
+} ____cacheline_aligned_in_smp;
+
+#define GET_RBDR_DESC(RING, idx)\
+               (&(((struct rbdr_entry_t *)((RING)->desc))[idx]))
+#define GET_SQ_DESC(RING, idx)\
+               (&(((struct sq_hdr_subdesc *)((RING)->desc))[idx]))
+#define GET_CQ_DESC(RING, idx)\
+               (&(((union cq_desc_t *)((RING)->desc))[idx]))
+
+/* CQ status bits */
+#define        CQ_WR_FULL      BIT(26)
+#define        CQ_WR_DISABLE   BIT(25)
+#define        CQ_WR_FAULT     BIT(24)
+#define        CQ_CQE_COUNT    (0xFFFF << 0)
+
+#define        CQ_ERR_MASK     (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
+
+void nicvf_config_vlan_stripping(struct nicvf *nic,
+                                netdev_features_t features);
+int nicvf_set_qset_resources(struct nicvf *nic);
+int nicvf_config_data_transfer(struct nicvf *nic, bool enable);
+void nicvf_qset_config(struct nicvf *nic, bool enable);
+void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
+                           int qidx, bool enable);
+
+void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx);
+void nicvf_sq_disable(struct nicvf *nic, int qidx);
+void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt);
+void nicvf_sq_free_used_descs(struct net_device *netdev,
+                             struct snd_queue *sq, int qidx);
+int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb);
+
+struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
+void nicvf_rbdr_task(unsigned long data);
+void nicvf_rbdr_work(struct work_struct *work);
+
+void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx);
+void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx);
+void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx);
+int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx);
+
+/* Register access APIs */
+void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val);
+u64  nicvf_reg_read(struct nicvf *nic, u64 offset);
+void nicvf_qset_reg_write(struct nicvf *nic, u64 offset, u64 val);
+u64 nicvf_qset_reg_read(struct nicvf *nic, u64 offset);
+void nicvf_queue_reg_write(struct nicvf *nic, u64 offset,
+                          u64 qidx, u64 val);
+u64  nicvf_queue_reg_read(struct nicvf *nic,
+                         u64 offset, u64 qidx);
+
+/* Stats */
+void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
+void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
+int nicvf_check_cqe_rx_errs(struct nicvf *nic,
+                           struct cmp_queue *cq, struct cqe_rx_t *cqe_rx);
+int nicvf_check_cqe_tx_errs(struct nicvf *nic,
+                           struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
+#endif /* NICVF_QUEUES_H */
diff --git a/sys/dev/vnic/q_struct.h b/sys/dev/vnic/q_struct.h
new file mode 100644 (file)
index 0000000..6d33c7b
--- /dev/null
@@ -0,0 +1,719 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef Q_STRUCT_H
+#define Q_STRUCT_H
+
+/* Load transaction types for reading segment bytes specified by
+ * NIC_SEND_GATHER_S[LD_TYPE].
+ */
+enum nic_send_ld_type_e {
+       NIC_SEND_LD_TYPE_E_LDD = 0x0,
+       NIC_SEND_LD_TYPE_E_LDT = 0x1,
+       NIC_SEND_LD_TYPE_E_LDWB = 0x2,
+       NIC_SEND_LD_TYPE_E_ENUM_LAST = 0x3,
+};
+
+enum ether_type_algorithm {
+       ETYPE_ALG_NONE = 0x0,
+       ETYPE_ALG_SKIP = 0x1,
+       ETYPE_ALG_ENDPARSE = 0x2,
+       ETYPE_ALG_VLAN = 0x3,
+       ETYPE_ALG_VLAN_STRIP = 0x4,
+};
+
+enum layer3_type {
+       L3TYPE_NONE = 0x00,
+       L3TYPE_GRH = 0x01,
+       L3TYPE_IPV4 = 0x04,
+       L3TYPE_IPV4_OPTIONS = 0x05,
+       L3TYPE_IPV6 = 0x06,
+       L3TYPE_IPV6_OPTIONS = 0x07,
+       L3TYPE_ET_STOP = 0x0D,
+       L3TYPE_OTHER = 0x0E,
+};
+
+enum layer4_type {
+       L4TYPE_NONE = 0x00,
+       L4TYPE_IPSEC_ESP = 0x01,
+       L4TYPE_IPFRAG = 0x02,
+       L4TYPE_IPCOMP = 0x03,
+       L4TYPE_TCP = 0x04,
+       L4TYPE_UDP = 0x05,
+       L4TYPE_SCTP = 0x06,
+       L4TYPE_GRE = 0x07,
+       L4TYPE_ROCE_BTH = 0x08,
+       L4TYPE_OTHER = 0x0E,
+};
+
+/* CPI and RSSI configuration */
+enum cpi_algorithm_type {
+       CPI_ALG_NONE = 0x0,
+       CPI_ALG_VLAN = 0x1,
+       CPI_ALG_VLAN16 = 0x2,
+       CPI_ALG_DIFF = 0x3,
+};
+
+enum rss_algorithm_type {
+       RSS_ALG_NONE = 0x00,
+       RSS_ALG_PORT = 0x01,
+       RSS_ALG_IP = 0x02,
+       RSS_ALG_TCP_IP = 0x03,
+       RSS_ALG_UDP_IP = 0x04,
+       RSS_ALG_SCTP_IP = 0x05,
+       RSS_ALG_GRE_IP = 0x06,
+       RSS_ALG_ROCE = 0x07,
+};
+
+enum rss_hash_cfg {
+       RSS_HASH_L2ETC = 0x00,
+       RSS_HASH_IP = 0x01,
+       RSS_HASH_TCP = 0x02,
+       RSS_HASH_TCP_SYN_DIS = 0x03,
+       RSS_HASH_UDP = 0x04,
+       RSS_HASH_L4ETC = 0x05,
+       RSS_HASH_ROCE = 0x06,
+       RSS_L3_BIDI = 0x07,
+       RSS_L4_BIDI = 0x08,
+};
+
+/* Completion queue entry types */
+enum cqe_type {
+       CQE_TYPE_INVALID = 0x0,
+       CQE_TYPE_RX = 0x2,
+       CQE_TYPE_RX_SPLIT = 0x3,
+       CQE_TYPE_RX_TCP = 0x4,
+       CQE_TYPE_SEND = 0x8,
+       CQE_TYPE_SEND_PTP = 0x9,
+};
+
+enum cqe_rx_tcp_status {
+       CQE_RX_STATUS_VALID_TCP_CNXT = 0x00,
+       CQE_RX_STATUS_INVALID_TCP_CNXT = 0x0F,
+};
+
+enum cqe_send_status {
+       CQE_SEND_STATUS_GOOD = 0x00,
+       CQE_SEND_STATUS_DESC_FAULT = 0x01,
+       CQE_SEND_STATUS_HDR_CONS_ERR = 0x11,
+       CQE_SEND_STATUS_SUBDESC_ERR = 0x12,
+       CQE_SEND_STATUS_IMM_SIZE_OFLOW = 0x80,
+       CQE_SEND_STATUS_CRC_SEQ_ERR = 0x81,
+       CQE_SEND_STATUS_DATA_SEQ_ERR = 0x82,
+       CQE_SEND_STATUS_MEM_SEQ_ERR = 0x83,
+       CQE_SEND_STATUS_LOCK_VIOL = 0x84,
+       CQE_SEND_STATUS_LOCK_UFLOW = 0x85,
+       CQE_SEND_STATUS_DATA_FAULT = 0x86,
+       CQE_SEND_STATUS_TSTMP_CONFLICT = 0x87,
+       CQE_SEND_STATUS_TSTMP_TIMEOUT = 0x88,
+       CQE_SEND_STATUS_MEM_FAULT = 0x89,
+       CQE_SEND_STATUS_CSUM_OVERLAP = 0x8A,
+       CQE_SEND_STATUS_CSUM_OVERFLOW = 0x8B,
+};
+
+enum cqe_rx_tcp_end_reason {
+       CQE_RX_TCP_END_FIN_FLAG_DET = 0,
+       CQE_RX_TCP_END_INVALID_FLAG = 1,
+       CQE_RX_TCP_END_TIMEOUT = 2,
+       CQE_RX_TCP_END_OUT_OF_SEQ = 3,
+       CQE_RX_TCP_END_PKT_ERR = 4,
+       CQE_RX_TCP_END_QS_DISABLED = 0x0F,
+};
+
+/* Packet protocol level error enumeration */
+enum cqe_rx_err_level {
+       CQE_RX_ERRLVL_RE = 0x0,
+       CQE_RX_ERRLVL_L2 = 0x1,
+       CQE_RX_ERRLVL_L3 = 0x2,
+       CQE_RX_ERRLVL_L4 = 0x3,
+};
+
+/* Packet protocol level error type enumeration */
+enum cqe_rx_err_opcode {
+       CQE_RX_ERR_RE_NONE = 0x0,
+       CQE_RX_ERR_RE_PARTIAL = 0x1,
+       CQE_RX_ERR_RE_JABBER = 0x2,
+       CQE_RX_ERR_RE_FCS = 0x7,
+       CQE_RX_ERR_RE_TERMINATE = 0x9,
+       CQE_RX_ERR_RE_RX_CTL = 0xb,
+       CQE_RX_ERR_PREL2_ERR = 0x1f,
+       CQE_RX_ERR_L2_FRAGMENT = 0x20,
+       CQE_RX_ERR_L2_OVERRUN = 0x21,
+       CQE_RX_ERR_L2_PFCS = 0x22,
+       CQE_RX_ERR_L2_PUNY = 0x23,
+       CQE_RX_ERR_L2_MAL = 0x24,
+       CQE_RX_ERR_L2_OVERSIZE = 0x25,
+       CQE_RX_ERR_L2_UNDERSIZE = 0x26,
+       CQE_RX_ERR_L2_LENMISM = 0x27,
+       CQE_RX_ERR_L2_PCLP = 0x28,
+       CQE_RX_ERR_IP_NOT = 0x41,
+       CQE_RX_ERR_IP_CHK = 0x42,
+       CQE_RX_ERR_IP_MAL = 0x43,
+       CQE_RX_ERR_IP_MALD = 0x44,
+       CQE_RX_ERR_IP_HOP = 0x45,
+       CQE_RX_ERR_L3_ICRC = 0x46,
+       CQE_RX_ERR_L3_PCLP = 0x47,
+       CQE_RX_ERR_L4_MAL = 0x61,
+       CQE_RX_ERR_L4_CHK = 0x62,
+       CQE_RX_ERR_UDP_LEN = 0x63,
+       CQE_RX_ERR_L4_PORT = 0x64,
+       CQE_RX_ERR_TCP_FLAG = 0x65,
+       CQE_RX_ERR_TCP_OFFSET = 0x66,
+       CQE_RX_ERR_L4_PCLP = 0x67,
+       CQE_RX_ERR_RBDR_TRUNC = 0x70,
+};
+
+struct cqe_rx_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   cqe_type:4; /* W0 */
+       u64   stdn_fault:1;
+       u64   rsvd0:1;
+       u64   rq_qs:7;
+       u64   rq_idx:3;
+       u64   rsvd1:12;
+       u64   rss_alg:4;
+       u64   rsvd2:4;
+       u64   rb_cnt:4;
+       u64   vlan_found:1;
+       u64   vlan_stripped:1;
+       u64   vlan2_found:1;
+       u64   vlan2_stripped:1;
+       u64   l4_type:4;
+       u64   l3_type:4;
+       u64   l2_present:1;
+       u64   err_level:3;
+       u64   err_opcode:8;
+
+       u64   pkt_len:16; /* W1 */
+       u64   l2_ptr:8;
+       u64   l3_ptr:8;
+       u64   l4_ptr:8;
+       u64   cq_pkt_len:8;
+       u64   align_pad:3;
+       u64   rsvd3:1;
+       u64   chan:12;
+
+       u64   rss_tag:32; /* W2 */
+       u64   vlan_tci:16;
+       u64   vlan_ptr:8;
+       u64   vlan2_ptr:8;
+
+       u64   rb3_sz:16; /* W3 */
+       u64   rb2_sz:16;
+       u64   rb1_sz:16;
+       u64   rb0_sz:16;
+
+       u64   rb7_sz:16; /* W4 */
+       u64   rb6_sz:16;
+       u64   rb5_sz:16;
+       u64   rb4_sz:16;
+
+       u64   rb11_sz:16; /* W5 */
+       u64   rb10_sz:16;
+       u64   rb9_sz:16;
+       u64   rb8_sz:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   err_opcode:8;
+       u64   err_level:3;
+       u64   l2_present:1;
+       u64   l3_type:4;
+       u64   l4_type:4;
+       u64   vlan2_stripped:1;
+       u64   vlan2_found:1;
+       u64   vlan_stripped:1;
+       u64   vlan_found:1;
+       u64   rb_cnt:4;
+       u64   rsvd2:4;
+       u64   rss_alg:4;
+       u64   rsvd1:12;
+       u64   rq_idx:3;
+       u64   rq_qs:7;
+       u64   rsvd0:1;
+       u64   stdn_fault:1;
+       u64   cqe_type:4; /* W0 */
+       u64   chan:12;
+       u64   rsvd3:1;
+       u64   align_pad:3;
+       u64   cq_pkt_len:8;
+       u64   l4_ptr:8;
+       u64   l3_ptr:8;
+       u64   l2_ptr:8;
+       u64   pkt_len:16; /* W1 */
+       u64   vlan2_ptr:8;
+       u64   vlan_ptr:8;
+       u64   vlan_tci:16;
+       u64   rss_tag:32; /* W2 */
+       u64   rb0_sz:16;
+       u64   rb1_sz:16;
+       u64   rb2_sz:16;
+       u64   rb3_sz:16; /* W3 */
+       u64   rb4_sz:16;
+       u64   rb5_sz:16;
+       u64   rb6_sz:16;
+       u64   rb7_sz:16; /* W4 */
+       u64   rb8_sz:16;
+       u64   rb9_sz:16;
+       u64   rb10_sz:16;
+       u64   rb11_sz:16; /* W5 */
+#endif
+       u64   rb0_ptr:64;
+       u64   rb1_ptr:64;
+       u64   rb2_ptr:64;
+       u64   rb3_ptr:64;
+       u64   rb4_ptr:64;
+       u64   rb5_ptr:64;
+       u64   rb6_ptr:64;
+       u64   rb7_ptr:64;
+       u64   rb8_ptr:64;
+       u64   rb9_ptr:64;
+       u64   rb10_ptr:64;
+       u64   rb11_ptr:64;
+};
+
+struct cqe_rx_tcp_err_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   cqe_type:4; /* W0 */
+       u64   rsvd0:60;
+
+       u64   rsvd1:4; /* W1 */
+       u64   partial_first:1;
+       u64   rsvd2:27;
+       u64   rbdr_bytes:8;
+       u64   rsvd3:24;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   rsvd0:60;
+       u64   cqe_type:4;
+
+       u64   rsvd3:24;
+       u64   rbdr_bytes:8;
+       u64   rsvd2:27;
+       u64   partial_first:1;
+       u64   rsvd1:4;
+#endif
+};
+
+struct cqe_rx_tcp_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   cqe_type:4; /* W0 */
+       u64   rsvd0:52;
+       u64   cq_tcp_status:8;
+
+       u64   rsvd1:32; /* W1 */
+       u64   tcp_cntx_bytes:8;
+       u64   rsvd2:8;
+       u64   tcp_err_bytes:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   cq_tcp_status:8;
+       u64   rsvd0:52;
+       u64   cqe_type:4; /* W0 */
+
+       u64   tcp_err_bytes:16;
+       u64   rsvd2:8;
+       u64   tcp_cntx_bytes:8;
+       u64   rsvd1:32; /* W1 */
+#endif
+};
+
+struct cqe_send_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   cqe_type:4; /* W0 */
+       u64   rsvd0:4;
+       u64   sqe_ptr:16;
+       u64   rsvd1:4;
+       u64   rsvd2:10;
+       u64   sq_qs:7;
+       u64   sq_idx:3;
+       u64   rsvd3:8;
+       u64   send_status:8;
+
+       u64   ptp_timestamp:64; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   send_status:8;
+       u64   rsvd3:8;
+       u64   sq_idx:3;
+       u64   sq_qs:7;
+       u64   rsvd2:10;
+       u64   rsvd1:4;
+       u64   sqe_ptr:16;
+       u64   rsvd0:4;
+       u64   cqe_type:4; /* W0 */
+
+       u64   ptp_timestamp:64; /* W1 */
+#endif
+};
+
+union cq_desc_t {
+       u64    u[64];
+       struct cqe_send_t snd_hdr;
+       struct cqe_rx_t rx_hdr;
+       struct cqe_rx_tcp_t rx_tcp_hdr;
+       struct cqe_rx_tcp_err_t rx_tcp_err_hdr;
+};
+
+struct rbdr_entry_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   rsvd0:15;
+       u64   buf_addr:42;
+       u64   cache_align:7;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   cache_align:7;
+       u64   buf_addr:42;
+       u64   rsvd0:15;
+#endif
+};
+
+/* TCP reassembly context */
+struct rbe_tcp_cnxt_t {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64   tcp_pkt_cnt:12;
+       u64   rsvd1:4;
+       u64   align_hdr_bytes:4;
+       u64   align_ptr_bytes:4;
+       u64   ptr_bytes:16;
+       u64   rsvd2:24;
+       u64   cqe_type:4;
+       u64   rsvd0:54;
+       u64   tcp_end_reason:2;
+       u64   tcp_status:4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64   tcp_status:4;
+       u64   tcp_end_reason:2;
+       u64   rsvd0:54;
+       u64   cqe_type:4;
+       u64   rsvd2:24;
+       u64   ptr_bytes:16;
+       u64   align_ptr_bytes:4;
+       u64   align_hdr_bytes:4;
+       u64   rsvd1:4;
+       u64   tcp_pkt_cnt:12;
+#endif
+};
+
+/* Always Big endian */
+struct rx_hdr_t {
+       u64   opaque:32;
+       u64   rss_flow:8;
+       u64   skip_length:6;
+       u64   disable_rss:1;
+       u64   disable_tcp_reassembly:1;
+       u64   nodrop:1;
+       u64   dest_alg:2;
+       u64   rsvd0:2;
+       u64   dest_rq:11;
+};
+
+enum send_l4_csum_type {
+       SEND_L4_CSUM_DISABLE = 0x00,
+       SEND_L4_CSUM_UDP = 0x01,
+       SEND_L4_CSUM_TCP = 0x02,
+       SEND_L4_CSUM_SCTP = 0x03,
+};
+
+enum send_crc_alg {
+       SEND_CRCALG_CRC32 = 0x00,
+       SEND_CRCALG_CRC32C = 0x01,
+       SEND_CRCALG_ICRC = 0x02,
+};
+
+enum send_load_type {
+       SEND_LD_TYPE_LDD = 0x00,
+       SEND_LD_TYPE_LDT = 0x01,
+       SEND_LD_TYPE_LDWB = 0x02,
+};
+
+enum send_mem_alg_type {
+       SEND_MEMALG_SET = 0x00,
+       SEND_MEMALG_ADD = 0x08,
+       SEND_MEMALG_SUB = 0x09,
+       SEND_MEMALG_ADDLEN = 0x0A,
+       SEND_MEMALG_SUBLEN = 0x0B,
+};
+
+enum send_mem_dsz_type {
+       SEND_MEMDSZ_B64 = 0x00,
+       SEND_MEMDSZ_B32 = 0x01,
+       SEND_MEMDSZ_B8 = 0x03,
+};
+
+enum sq_subdesc_type {
+       SQ_DESC_TYPE_INVALID = 0x00,
+       SQ_DESC_TYPE_HEADER = 0x01,
+       SQ_DESC_TYPE_CRC = 0x02,
+       SQ_DESC_TYPE_IMMEDIATE = 0x03,
+       SQ_DESC_TYPE_GATHER = 0x04,
+       SQ_DESC_TYPE_MEMORY = 0x05,
+};
+
+struct sq_crc_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64    rsvd1:32;
+       u64    crc_ival:32;
+       u64    subdesc_type:4;
+       u64    crc_alg:2;
+       u64    rsvd0:10;
+       u64    crc_insert_pos:16;
+       u64    hdr_start:16;
+       u64    crc_len:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64    crc_len:16;
+       u64    hdr_start:16;
+       u64    crc_insert_pos:16;
+       u64    rsvd0:10;
+       u64    crc_alg:2;
+       u64    subdesc_type:4;
+       u64    crc_ival:32;
+       u64    rsvd1:32;
+#endif
+};
+
+struct sq_gather_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64    subdesc_type:4; /* W0 */
+       u64    ld_type:2;
+       u64    rsvd0:42;
+       u64    size:16;
+
+       u64    rsvd1:15; /* W1 */
+       u64    addr:49;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64    size:16;
+       u64    rsvd0:42;
+       u64    ld_type:2;
+       u64    subdesc_type:4; /* W0 */
+
+       u64    addr:49;
+       u64    rsvd1:15; /* W1 */
+#endif
+};
+
+/* SQ immediate subdescriptor */
+struct sq_imm_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64    subdesc_type:4; /* W0 */
+       u64    rsvd0:46;
+       u64    len:14;
+
+       u64    data:64; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64    len:14;
+       u64    rsvd0:46;
+       u64    subdesc_type:4; /* W0 */
+
+       u64    data:64; /* W1 */
+#endif
+};
+
+struct sq_mem_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64    subdesc_type:4; /* W0 */
+       u64    mem_alg:4;
+       u64    mem_dsz:2;
+       u64    wmem:1;
+       u64    rsvd0:21;
+       u64    offset:32;
+
+       u64    rsvd1:15; /* W1 */
+       u64    addr:49;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64    offset:32;
+       u64    rsvd0:21;
+       u64    wmem:1;
+       u64    mem_dsz:2;
+       u64    mem_alg:4;
+       u64    subdesc_type:4; /* W0 */
+
+       u64    addr:49;
+       u64    rsvd1:15; /* W1 */
+#endif
+};
+
+struct sq_hdr_subdesc {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64    subdesc_type:4;
+       u64    tso:1;
+       u64    post_cqe:1; /* Post CQE on no error also */
+       u64    dont_send:1;
+       u64    tstmp:1;
+       u64    subdesc_cnt:8;
+       u64    csum_l4:2;
+       u64    csum_l3:1;
+       u64    rsvd0:5;
+       u64    l4_offset:8;
+       u64    l3_offset:8;
+       u64    rsvd1:4;
+       u64    tot_len:20; /* W0 */
+
+       u64    tso_sdc_cont:8;
+       u64    tso_sdc_first:8;
+       u64    tso_l4_offset:8;
+       u64    tso_flags_last:12;
+       u64    tso_flags_first:12;
+       u64    rsvd2:2;
+       u64    tso_max_paysize:14; /* W1 */
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64    tot_len:20;
+       u64    rsvd1:4;
+       u64    l3_offset:8;
+       u64    l4_offset:8;
+       u64    rsvd0:5;
+       u64    csum_l3:1;
+       u64    csum_l4:2;
+       u64    subdesc_cnt:8;
+       u64    tstmp:1;
+       u64    dont_send:1;
+       u64    post_cqe:1; /* Post CQE on no error also */
+       u64    tso:1;
+       u64    subdesc_type:4; /* W0 */
+
+       u64    tso_max_paysize:14;
+       u64    rsvd2:2;
+       u64    tso_flags_first:12;
+       u64    tso_flags_last:12;
+       u64    tso_l4_offset:8;
+       u64    tso_sdc_first:8;
+       u64    tso_sdc_cont:8; /* W1 */
+#endif
+};
+
+/* Queue config register formats */
+struct rq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_2_63:62;
+       u64 ena:1;
+       u64 tcp_ena:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 tcp_ena:1;
+       u64 ena:1;
+       u64 reserved_2_63:62;
+#endif
+};
+
+struct cq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_43_63:21;
+       u64 ena:1;
+       u64 reset:1;
+       u64 caching:1;
+       u64 reserved_35_39:5;
+       u64 qsize:3;
+       u64 reserved_25_31:7;
+       u64 avg_con:9;
+       u64 reserved_0_15:16;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 reserved_0_15:16;
+       u64 avg_con:9;
+       u64 reserved_25_31:7;
+       u64 qsize:3;
+       u64 reserved_35_39:5;
+       u64 caching:1;
+       u64 reset:1;
+       u64 ena:1;
+       u64 reserved_43_63:21;
+#endif
+};
+
+struct sq_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_20_63:44;
+       u64 ena:1;
+       u64 reserved_18_18:1;
+       u64 reset:1;
+       u64 ldwb:1;
+       u64 reserved_11_15:5;
+       u64 qsize:3;
+       u64 reserved_3_7:5;
+       u64 tstmp_bgx_intf:3;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 tstmp_bgx_intf:3;
+       u64 reserved_3_7:5;
+       u64 qsize:3;
+       u64 reserved_11_15:5;
+       u64 ldwb:1;
+       u64 reset:1;
+       u64 reserved_18_18:1;
+       u64 ena:1;
+       u64 reserved_20_63:44;
+#endif
+};
+
+struct rbdr_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_45_63:19;
+       u64 ena:1;
+       u64 reset:1;
+       u64 ldwb:1;
+       u64 reserved_36_41:6;
+       u64 qsize:4;
+       u64 reserved_25_31:7;
+       u64 avg_con:9;
+       u64 reserved_12_15:4;
+       u64 lines:12;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 lines:12;
+       u64 reserved_12_15:4;
+       u64 avg_con:9;
+       u64 reserved_25_31:7;
+       u64 qsize:4;
+       u64 reserved_36_41:6;
+       u64 ldwb:1;
+       u64 reset:1;
+       u64 ena: 1;
+       u64 reserved_45_63:19;
+#endif
+};
+
+struct qs_cfg {
+#if defined(__BIG_ENDIAN_BITFIELD)
+       u64 reserved_32_63:32;
+       u64 ena:1;
+       u64 reserved_27_30:4;
+       u64 sq_ins_ena:1;
+       u64 sq_ins_pos:6;
+       u64 lock_ena:1;
+       u64 lock_viol_cqe_ena:1;
+       u64 send_tstmp_ena:1;
+       u64 be:1;
+       u64 reserved_7_15:9;
+       u64 vnic:7;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       u64 vnic:7;
+       u64 reserved_7_15:9;
+       u64 be:1;
+       u64 send_tstmp_ena:1;
+       u64 lock_viol_cqe_ena:1;
+       u64 lock_ena:1;
+       u64 sq_ins_pos:6;
+       u64 sq_ins_ena:1;
+       u64 reserved_27_30:4;
+       u64 ena:1;
+       u64 reserved_32_63:32;
+#endif
+};
+
+#endif /* Q_STRUCT_H */
diff --git a/sys/dev/vnic/thunder_bgx.c b/sys/dev/vnic/thunder_bgx.c
new file mode 100644 (file)
index 0000000..3dabd59
--- /dev/null
@@ -0,0 +1,1212 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic_reg.h"
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME       "thunder-BGX"
+#define DRV_VERSION    "1.0"
+
+struct lmac {
+       struct bgx              *bgx;
+       int                     dmac;
+       u8                      mac[ETH_ALEN];
+       bool                    link_up;
+       int                     lmacid; /* ID within BGX */
+       int                     lmacid_bd; /* ID on board */
+       struct net_device       netdev;
+       struct phy_device       *phydev;
+       unsigned int            last_duplex;
+       unsigned int            last_link;
+       unsigned int            last_speed;
+       bool                    is_sgmii;
+       struct delayed_work     dwork;
+       struct workqueue_struct *check_link;
+};
+
+struct bgx {
+       u8                      bgx_id;
+       u8                      qlm_mode;
+       struct  lmac            lmac[MAX_LMAC_PER_BGX];
+       int                     lmac_count;
+       int                     lmac_type;
+       int                     lane_to_sds;
+       int                     use_training;
+       void __iomem            *reg_base;
+       struct pci_dev          *pdev;
+};
+
+static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
+static int lmac_count; /* Total no of LMACs in system */
+
+static int bgx_xaui_check_link(struct lmac *lmac);
+
+/* Supported devices */
+static const struct pci_device_id bgx_id_table[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) },
+       { 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder BGX/MAC Driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, bgx_id_table);
+
+/* The Cavium ThunderX network controller can *only* be found in SoCs
+ * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
+ * registers on this platform are implicitly strongly ordered with respect
+ * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use
+ * with no memory barriers in this driver.  The readq()/writeq() functions add
+ * explicit ordering operation which in this case are redundant, and only
+ * add overhead.
+ */
+
+/* Register read/write APIs */
+static u64 bgx_reg_read(struct bgx *bgx, u8 lmac, u64 offset)
+{
+       void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+       return readq_relaxed(addr);
+}
+
+static void bgx_reg_write(struct bgx *bgx, u8 lmac, u64 offset, u64 val)
+{
+       void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+       writeq_relaxed(val, addr);
+}
+
+static void bgx_reg_modify(struct bgx *bgx, u8 lmac, u64 offset, u64 val)
+{
+       void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset;
+
+       writeq_relaxed(val | readq_relaxed(addr), addr);
+}
+
+static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
+{
+       int timeout = 100;
+       u64 reg_val;
+
+       while (timeout) {
+               reg_val = bgx_reg_read(bgx, lmac, reg);
+               if (zero && !(reg_val & mask))
+                       return 0;
+               if (!zero && (reg_val & mask))
+                       return 0;
+               usleep_range(1000, 2000);
+               timeout--;
+       }
+       return 1;
+}
+
+/* Return number of BGX present in HW */
+unsigned bgx_get_map(int node)
+{
+       int i;
+       unsigned map = 0;
+
+       for (i = 0; i < MAX_BGX_PER_CN88XX; i++) {
+               if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i])
+                       map |= (1 << i);
+       }
+
+       return map;
+}
+EXPORT_SYMBOL(bgx_get_map);
+
+/* Return number of LMAC configured for this BGX */
+int bgx_get_lmac_count(int node, int bgx_idx)
+{
+       struct bgx *bgx;
+
+       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       if (bgx)
+               return bgx->lmac_count;
+
+       return 0;
+}
+EXPORT_SYMBOL(bgx_get_lmac_count);
+
+/* Returns the current link status of LMAC */
+void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
+{
+       struct bgx_link_status *link = (struct bgx_link_status *)status;
+       struct bgx *bgx;
+       struct lmac *lmac;
+
+       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       if (!bgx)
+               return;
+
+       lmac = &bgx->lmac[lmacid];
+       link->link_up = lmac->link_up;
+       link->duplex = lmac->last_duplex;
+       link->speed = lmac->last_speed;
+}
+EXPORT_SYMBOL(bgx_get_lmac_link_state);
+
+const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
+{
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+
+       if (bgx)
+               return bgx->lmac[lmacid].mac;
+
+       return NULL;
+}
+EXPORT_SYMBOL(bgx_get_lmac_mac);
+
+void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
+{
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+
+       if (!bgx)
+               return;
+
+       ether_addr_copy(bgx->lmac[lmacid].mac, mac);
+}
+EXPORT_SYMBOL(bgx_set_lmac_mac);
+
+static void bgx_sgmii_change_link_state(struct lmac *lmac)
+{
+       struct bgx *bgx = lmac->bgx;
+       u64 cmr_cfg;
+       u64 port_cfg = 0;
+       u64 misc_ctl = 0;
+
+       cmr_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG);
+       cmr_cfg &= ~CMR_EN;
+       bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+       port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
+       misc_ctl = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL);
+
+       if (lmac->link_up) {
+               misc_ctl &= ~PCS_MISC_CTL_GMX_ENO;
+               port_cfg &= ~GMI_PORT_CFG_DUPLEX;
+               port_cfg |=  (lmac->last_duplex << 2);
+       } else {
+               misc_ctl |= PCS_MISC_CTL_GMX_ENO;
+       }
+
+       switch (lmac->last_speed) {
+       case 10:
+               port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */
+               port_cfg |= GMI_PORT_CFG_SPEED_MSB;  /* speed_msb 1 */
+               port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */
+               misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+               misc_ctl |= 50; /* samp_pt */
+               bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64);
+               bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0);
+               break;
+       case 100:
+               port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */
+               port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */
+               port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */
+               misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+               misc_ctl |= 5; /* samp_pt */
+               bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64);
+               bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0);
+               break;
+       case 1000:
+               port_cfg |= GMI_PORT_CFG_SPEED; /* speed 1 */
+               port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */
+               port_cfg |= GMI_PORT_CFG_SLOT_TIME; /* slottime 1 */
+               misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK;
+               misc_ctl |= 1; /* samp_pt */
+               bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 512);
+               if (lmac->last_duplex)
+                       bgx_reg_write(bgx, lmac->lmacid,
+                                     BGX_GMP_GMI_TXX_BURST, 0);
+               else
+                       bgx_reg_write(bgx, lmac->lmacid,
+                                     BGX_GMP_GMI_TXX_BURST, 8192);
+               break;
+       default:
+               break;
+       }
+       bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL, misc_ctl);
+       bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG, port_cfg);
+
+       port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
+
+       /* renable lmac */
+       cmr_cfg |= CMR_EN;
+       bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+}
+
+static void bgx_lmac_handler(struct net_device *netdev)
+{
+       struct lmac *lmac = container_of(netdev, struct lmac, netdev);
+       struct phy_device *phydev = lmac->phydev;
+       int link_changed = 0;
+
+       if (!lmac)
+               return;
+
+       if (!phydev->link && lmac->last_link)
+               link_changed = -1;
+
+       if (phydev->link &&
+           (lmac->last_duplex != phydev->duplex ||
+            lmac->last_link != phydev->link ||
+            lmac->last_speed != phydev->speed)) {
+                       link_changed = 1;
+       }
+
+       lmac->last_link = phydev->link;
+       lmac->last_speed = phydev->speed;
+       lmac->last_duplex = phydev->duplex;
+
+       if (!link_changed)
+               return;
+
+       if (link_changed > 0)
+               lmac->link_up = true;
+       else
+               lmac->link_up = false;
+
+       if (lmac->is_sgmii)
+               bgx_sgmii_change_link_state(lmac);
+       else
+               bgx_xaui_check_link(lmac);
+}
+
+u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
+{
+       struct bgx *bgx;
+
+       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       if (!bgx)
+               return 0;
+
+       if (idx > 8)
+               lmac = 0;
+       return bgx_reg_read(bgx, lmac, BGX_CMRX_RX_STAT0 + (idx * 8));
+}
+EXPORT_SYMBOL(bgx_get_rx_stats);
+
+u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
+{
+       struct bgx *bgx;
+
+       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       if (!bgx)
+               return 0;
+
+       return bgx_reg_read(bgx, lmac, BGX_CMRX_TX_STAT0 + (idx * 8));
+}
+EXPORT_SYMBOL(bgx_get_tx_stats);
+
+static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac)
+{
+       u64 offset;
+
+       while (bgx->lmac[lmac].dmac > 0) {
+               offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) +
+                       (lmac * MAX_DMAC_PER_LMAC * sizeof(u64));
+               bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0);
+               bgx->lmac[lmac].dmac--;
+       }
+}
+
+void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac)
+{
+       u64 offset;
+       struct bgx *bgx;
+
+#ifdef BGX_IN_PROMISCUOUS_MODE
+       return;
+#endif
+
+       bgx_idx += node * MAX_BGX_PER_CN88XX;
+       bgx = bgx_vnic[bgx_idx];
+
+       if (!bgx) {
+               dev_err(&bgx->pdev->dev,
+                       "BGX%d not yet initialized, ignoring DMAC addition\n",
+                       bgx_idx);
+               return;
+       }
+
+       dmac = dmac | (1ULL << 48) | ((u64)lmac << 49); /* Enable DMAC */
+       if (bgx->lmac[lmac].dmac == MAX_DMAC_PER_LMAC) {
+               dev_err(&bgx->pdev->dev,
+                       "Max DMAC filters for LMAC%d reached, ignoring\n",
+                       lmac);
+               return;
+       }
+
+       if (bgx->lmac[lmac].dmac == MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE)
+               bgx->lmac[lmac].dmac = 1;
+
+       offset = (bgx->lmac[lmac].dmac * sizeof(u64)) +
+               (lmac * MAX_DMAC_PER_LMAC * sizeof(u64));
+       bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, dmac);
+       bgx->lmac[lmac].dmac++;
+
+       bgx_reg_write(bgx, lmac, BGX_CMRX_RX_DMAC_CTL,
+                     (CAM_ACCEPT << 3) | (MCAST_MODE_CAM_FILTER << 1)
+                     | (BCAST_ACCEPT << 0));
+}
+EXPORT_SYMBOL(bgx_add_dmac_addr);
+
+/* Configure BGX LMAC in internal loopback mode */
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+                               int lmac_idx, bool enable)
+{
+       struct bgx *bgx;
+       struct lmac *lmac;
+       u64    cfg;
+
+       bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       if (!bgx)
+               return;
+
+       lmac = &bgx->lmac[lmac_idx];
+       if (lmac->is_sgmii) {
+               cfg = bgx_reg_read(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL);
+               if (enable)
+                       cfg |= PCS_MRX_CTL_LOOPBACK1;
+               else
+                       cfg &= ~PCS_MRX_CTL_LOOPBACK1;
+               bgx_reg_write(bgx, lmac_idx, BGX_GMP_PCS_MRX_CTL, cfg);
+       } else {
+               cfg = bgx_reg_read(bgx, lmac_idx, BGX_SPUX_CONTROL1);
+               if (enable)
+                       cfg |= SPU_CTL_LOOPBACK;
+               else
+                       cfg &= ~SPU_CTL_LOOPBACK;
+               bgx_reg_write(bgx, lmac_idx, BGX_SPUX_CONTROL1, cfg);
+       }
+}
+EXPORT_SYMBOL(bgx_lmac_internal_loopback);
+
+static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
+{
+       u64 cfg;
+
+       bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30);
+       /* max packet size */
+       bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_RXX_JABBER, MAX_FRAME_SIZE);
+
+       /* Disable frame alignment if using preamble */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND);
+       if (cfg & 1)
+               bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_SGMII_CTL, 0);
+
+       /* Enable lmac */
+       bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
+
+       /* PCS reset */
+       bgx_reg_modify(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_RESET);
+       if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_CTL,
+                        PCS_MRX_CTL_RESET, true)) {
+               dev_err(&bgx->pdev->dev, "BGX PCS reset not completed\n");
+               return -1;
+       }
+
+       /* power down, reset autoneg, autoneg enable */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL);
+       cfg &= ~PCS_MRX_CTL_PWR_DN;
+       cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN);
+       bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
+
+       if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
+                        PCS_MRX_STATUS_AN_CPT, false)) {
+               dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
+{
+       u64 cfg;
+
+       /* Reset SPU */
+       bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET);
+       if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) {
+               dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n");
+               return -1;
+       }
+
+       /* Disable LMAC */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+       cfg &= ~CMR_EN;
+       bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+       bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+       /* Set interleaved running disparity for RXAUI */
+       if (bgx->lmac_type != BGX_MODE_RXAUI)
+               bgx_reg_modify(bgx, lmacid,
+                              BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
+       else
+               bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL,
+                              SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP);
+
+       /* clear all interrupts */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT);
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_RX_INT, cfg);
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_INT);
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_INT, cfg);
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+
+       if (bgx->use_training) {
+               bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00);
+               bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00);
+               bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00);
+               /* training enable */
+               bgx_reg_modify(bgx, lmacid,
+                              BGX_SPUX_BR_PMD_CRTL, SPU_PMD_CRTL_TRAIN_EN);
+       }
+
+       /* Append FCS to each packet */
+       bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, SMU_TX_APPEND_FCS_D);
+
+       /* Disable forward error correction */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_FEC_CONTROL);
+       cfg &= ~SPU_FEC_CTL_FEC_EN;
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_FEC_CONTROL, cfg);
+
+       /* Disable autoneg */
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_CONTROL);
+       cfg = cfg & ~(SPU_AN_CTL_AN_EN | SPU_AN_CTL_XNP_EN);
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg);
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV);
+       if (bgx->lmac_type == BGX_MODE_10G_KR)
+               cfg |= (1 << 23);
+       else if (bgx->lmac_type == BGX_MODE_40G_KR)
+               cfg |= (1 << 24);
+       else
+               cfg &= ~((1 << 23) | (1 << 24));
+       cfg = cfg & (~((1ULL << 25) | (1ULL << 22) | (1ULL << 12)));
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_ADV, cfg);
+
+       cfg = bgx_reg_read(bgx, 0, BGX_SPU_DBG_CONTROL);
+       cfg &= ~SPU_DBG_CTL_AN_ARB_LINK_CHK_EN;
+       bgx_reg_write(bgx, 0, BGX_SPU_DBG_CONTROL, cfg);
+
+       /* Enable lmac */
+       bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_CONTROL1);
+       cfg &= ~SPU_CTL_LOW_POWER;
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_CONTROL1, cfg);
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_CTL);
+       cfg &= ~SMU_TX_CTL_UNI_EN;
+       cfg |= SMU_TX_CTL_DIC_EN;
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg);
+
+       /* take lmac_count into account */
+       bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1));
+       /* max packet size */
+       bgx_reg_modify(bgx, lmacid, BGX_SMUX_RX_JABBER, MAX_FRAME_SIZE);
+
+       return 0;
+}
+
+static int bgx_xaui_check_link(struct lmac *lmac)
+{
+       struct bgx *bgx = lmac->bgx;
+       int lmacid = lmac->lmacid;
+       int lmac_type = bgx->lmac_type;
+       u64 cfg;
+
+       bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
+       if (bgx->use_training) {
+               cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+               if (!(cfg & (1ull << 13))) {
+                       cfg = (1ull << 13) | (1ull << 14);
+                       bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+                       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL);
+                       cfg |= (1ull << 0);
+                       bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL, cfg);
+                       return -1;
+               }
+       }
+
+       /* wait for PCS to come out of reset */
+       if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) {
+               dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n");
+               return -1;
+       }
+
+       if ((lmac_type == BGX_MODE_10G_KR) || (lmac_type == BGX_MODE_XFI) ||
+           (lmac_type == BGX_MODE_40G_KR) || (lmac_type == BGX_MODE_XLAUI)) {
+               if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BR_STATUS1,
+                                SPU_BR_STATUS_BLK_LOCK, false)) {
+                       dev_err(&bgx->pdev->dev,
+                               "SPU_BR_STATUS_BLK_LOCK not completed\n");
+                       return -1;
+               }
+       } else {
+               if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BX_STATUS,
+                                SPU_BX_STATUS_RX_ALIGN, false)) {
+                       dev_err(&bgx->pdev->dev,
+                               "SPU_BX_STATUS_RX_ALIGN not completed\n");
+                       return -1;
+               }
+       }
+
+       /* Clear rcvflt bit (latching high) and read it back */
+       bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+       if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
+               dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
+               if (bgx->use_training) {
+                       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
+                       if (!(cfg & (1ull << 13))) {
+                               cfg = (1ull << 13) | (1ull << 14);
+                               bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
+                               cfg = bgx_reg_read(bgx, lmacid,
+                                                  BGX_SPUX_BR_PMD_CRTL);
+                               cfg |= (1ull << 0);
+                               bgx_reg_write(bgx, lmacid,
+                                             BGX_SPUX_BR_PMD_CRTL, cfg);
+                               return -1;
+                       }
+               }
+               return -1;
+       }
+
+       /* Wait for MAC RX to be ready */
+       if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL,
+                        SMU_RX_CTL_STATUS, true)) {
+               dev_err(&bgx->pdev->dev, "SMU RX link not okay\n");
+               return -1;
+       }
+
+       /* Wait for BGX RX to be idle */
+       if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
+               dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
+               return -1;
+       }
+
+       /* Wait for BGX TX to be idle */
+       if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_TX_IDLE, false)) {
+               dev_err(&bgx->pdev->dev, "SMU TX not idle\n");
+               return -1;
+       }
+
+       if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
+               dev_err(&bgx->pdev->dev, "Receive fault\n");
+               return -1;
+       }
+
+       /* Receive link is latching low. Force it high and verify it */
+       bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
+       if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1,
+                        SPU_STATUS1_RCV_LNK, false)) {
+               dev_err(&bgx->pdev->dev, "SPU receive link down\n");
+               return -1;
+       }
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
+       cfg &= ~SPU_MISC_CTL_RX_DIS;
+       bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
+       return 0;
+}
+
+static void bgx_poll_for_link(struct work_struct *work)
+{
+       struct lmac *lmac;
+       u64 link;
+
+       lmac = container_of(work, struct lmac, dwork.work);
+
+       /* Receive link is latching low. Force it high and verify it */
+       bgx_reg_modify(lmac->bgx, lmac->lmacid,
+                      BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
+       bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
+                    SPU_STATUS1_RCV_LNK, false);
+
+       link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+       if (link & SPU_STATUS1_RCV_LNK) {
+               lmac->link_up = 1;
+               if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
+                       lmac->last_speed = 40000;
+               else
+                       lmac->last_speed = 10000;
+               lmac->last_duplex = 1;
+       } else {
+               lmac->link_up = 0;
+       }
+
+       if (lmac->last_link != lmac->link_up) {
+               lmac->last_link = lmac->link_up;
+               if (lmac->link_up)
+                       bgx_xaui_check_link(lmac);
+       }
+
+       queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
+}
+
+static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
+{
+       u64 dmac_bcast = (1ULL << 48) - 1;
+       struct lmac *lmac;
+       u64 cfg;
+
+       lmac = &bgx->lmac[lmacid];
+       lmac->bgx = bgx;
+
+       if (bgx->lmac_type == BGX_MODE_SGMII) {
+               lmac->is_sgmii = 1;
+               if (bgx_lmac_sgmii_init(bgx, lmacid))
+                       return -1;
+       } else {
+               lmac->is_sgmii = 0;
+               if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type))
+                       return -1;
+       }
+
+       if (lmac->is_sgmii) {
+               cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND);
+               cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */
+               bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND, cfg);
+               bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_MIN_PKT, 60 - 1);
+       } else {
+               cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_APPEND);
+               cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */
+               bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, cfg);
+               bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4);
+       }
+
+       /* Enable lmac */
+       bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG,
+                      CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+
+       /* Restore default cfg, incase low level firmware changed it */
+       bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
+
+       /* Add broadcast MAC into all LMAC's DMAC filters */
+       bgx_add_dmac_addr(dmac_bcast, 0, bgx->bgx_id, lmacid);
+
+       if ((bgx->lmac_type != BGX_MODE_XFI) &&
+           (bgx->lmac_type != BGX_MODE_XLAUI) &&
+           (bgx->lmac_type != BGX_MODE_40G_KR) &&
+           (bgx->lmac_type != BGX_MODE_10G_KR)) {
+               if (!lmac->phydev)
+                       return -ENODEV;
+
+               lmac->phydev->dev_flags = 0;
+
+               if (phy_connect_direct(&lmac->netdev, lmac->phydev,
+                                      bgx_lmac_handler,
+                                      PHY_INTERFACE_MODE_SGMII))
+                       return -ENODEV;
+
+               phy_start_aneg(lmac->phydev);
+       } else {
+               lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND |
+                                                  WQ_MEM_RECLAIM, 1);
+               if (!lmac->check_link)
+                       return -ENOMEM;
+               INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link);
+               queue_delayed_work(lmac->check_link, &lmac->dwork, 0);
+       }
+
+       return 0;
+}
+
+static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
+{
+       struct lmac *lmac;
+       u64 cmrx_cfg;
+
+       lmac = &bgx->lmac[lmacid];
+       if (lmac->check_link) {
+               /* Destroy work queue */
+               cancel_delayed_work(&lmac->dwork);
+               flush_workqueue(lmac->check_link);
+               destroy_workqueue(lmac->check_link);
+       }
+
+       cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+       cmrx_cfg &= ~(1 << 15);
+       bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
+       bgx_flush_dmac_addrs(bgx, lmacid);
+
+       if ((bgx->lmac_type != BGX_MODE_XFI) &&
+           (bgx->lmac_type != BGX_MODE_XLAUI) &&
+           (bgx->lmac_type != BGX_MODE_40G_KR) &&
+           (bgx->lmac_type != BGX_MODE_10G_KR)) {
+               if (lmac->phydev)
+                       phy_disconnect(lmac->phydev);
+       }
+       lmac->phydev = NULL;
+}
+
+static void bgx_set_num_ports(struct bgx *bgx)
+{
+       u64 lmac_count;
+
+       switch (bgx->qlm_mode) {
+       case QLM_MODE_SGMII:
+               bgx->lmac_count = 4;
+               bgx->lmac_type = BGX_MODE_SGMII;
+               bgx->lane_to_sds = 0;
+               break;
+       case QLM_MODE_XAUI_1X4:
+               bgx->lmac_count = 1;
+               bgx->lmac_type = BGX_MODE_XAUI;
+               bgx->lane_to_sds = 0xE4;
+                       break;
+       case QLM_MODE_RXAUI_2X2:
+               bgx->lmac_count = 2;
+               bgx->lmac_type = BGX_MODE_RXAUI;
+               bgx->lane_to_sds = 0xE4;
+                       break;
+       case QLM_MODE_XFI_4X1:
+               bgx->lmac_count = 4;
+               bgx->lmac_type = BGX_MODE_XFI;
+               bgx->lane_to_sds = 0;
+               break;
+       case QLM_MODE_XLAUI_1X4:
+               bgx->lmac_count = 1;
+               bgx->lmac_type = BGX_MODE_XLAUI;
+               bgx->lane_to_sds = 0xE4;
+               break;
+       case QLM_MODE_10G_KR_4X1:
+               bgx->lmac_count = 4;
+               bgx->lmac_type = BGX_MODE_10G_KR;
+               bgx->lane_to_sds = 0;
+               bgx->use_training = 1;
+               break;
+       case QLM_MODE_40G_KR4_1X4:
+               bgx->lmac_count = 1;
+               bgx->lmac_type = BGX_MODE_40G_KR;
+               bgx->lane_to_sds = 0xE4;
+               bgx->use_training = 1;
+               break;
+       default:
+               bgx->lmac_count = 0;
+               break;
+       }
+
+       /* Check if low level firmware has programmed LMAC count
+        * based on board type, if yes consider that otherwise
+        * the default static values
+        */
+       lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
+       if (lmac_count != 4)
+               bgx->lmac_count = lmac_count;
+}
+
+static void bgx_init_hw(struct bgx *bgx)
+{
+       int i;
+
+       bgx_set_num_ports(bgx);
+
+       bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP);
+       if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS))
+               dev_err(&bgx->pdev->dev, "BGX%d BIST failed\n", bgx->bgx_id);
+
+       /* Set lmac type and lane2serdes mapping */
+       for (i = 0; i < bgx->lmac_count; i++) {
+               if (bgx->lmac_type == BGX_MODE_RXAUI) {
+                       if (i)
+                               bgx->lane_to_sds = 0x0e;
+                       else
+                               bgx->lane_to_sds = 0x04;
+                       bgx_reg_write(bgx, i, BGX_CMRX_CFG,
+                                     (bgx->lmac_type << 8) | bgx->lane_to_sds);
+                       continue;
+               }
+               bgx_reg_write(bgx, i, BGX_CMRX_CFG,
+                             (bgx->lmac_type << 8) | (bgx->lane_to_sds + i));
+               bgx->lmac[i].lmacid_bd = lmac_count;
+               lmac_count++;
+       }
+
+       bgx_reg_write(bgx, 0, BGX_CMR_TX_LMACS, bgx->lmac_count);
+       bgx_reg_write(bgx, 0, BGX_CMR_RX_LMACS, bgx->lmac_count);
+
+       /* Set the backpressure AND mask */
+       for (i = 0; i < bgx->lmac_count; i++)
+               bgx_reg_modify(bgx, 0, BGX_CMR_CHAN_MSK_AND,
+                              ((1ULL << MAX_BGX_CHANS_PER_LMAC) - 1) <<
+                              (i * MAX_BGX_CHANS_PER_LMAC));
+
+       /* Disable all MAC filtering */
+       for (i = 0; i < RX_DMAC_COUNT; i++)
+               bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + (i * 8), 0x00);
+
+       /* Disable MAC steering (NCSI traffic) */
+       for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
+               bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+}
+
+static void bgx_get_qlm_mode(struct bgx *bgx)
+{
+       struct device *dev = &bgx->pdev->dev;
+       int lmac_type;
+       int train_en;
+
+       /* Read LMAC0 type to figure out QLM mode
+        * This is configured by low level firmware
+        */
+       lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
+       lmac_type = (lmac_type >> 8) & 0x07;
+
+       train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) &
+                               SPU_PMD_CRTL_TRAIN_EN;
+
+       switch (lmac_type) {
+       case BGX_MODE_SGMII:
+               bgx->qlm_mode = QLM_MODE_SGMII;
+               dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id);
+               break;
+       case BGX_MODE_XAUI:
+               bgx->qlm_mode = QLM_MODE_XAUI_1X4;
+               dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id);
+               break;
+       case BGX_MODE_RXAUI:
+               bgx->qlm_mode = QLM_MODE_RXAUI_2X2;
+               dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id);
+               break;
+       case BGX_MODE_XFI:
+               if (!train_en) {
+                       bgx->qlm_mode = QLM_MODE_XFI_4X1;
+                       dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id);
+               } else {
+                       bgx->qlm_mode = QLM_MODE_10G_KR_4X1;
+                       dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id);
+               }
+               break;
+       case BGX_MODE_XLAUI:
+               if (!train_en) {
+                       bgx->qlm_mode = QLM_MODE_XLAUI_1X4;
+                       dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id);
+               } else {
+                       bgx->qlm_mode = QLM_MODE_40G_KR4_1X4;
+                       dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id);
+               }
+               break;
+       default:
+               bgx->qlm_mode = QLM_MODE_SGMII;
+               dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id);
+       }
+}
+
+#ifdef CONFIG_ACPI
+
+static int bgx_match_phy_id(struct device *dev, void *data)
+{
+       struct phy_device *phydev = to_phy_device(dev);
+       u32 *phy_id = data;
+
+       if (phydev->addr == *phy_id)
+               return 1;
+
+       return 0;
+}
+
+static const char *addr_propnames[] = {
+       "mac-address",
+       "local-mac-address",
+       "address",
+};
+
+static int acpi_get_mac_address(struct acpi_device *adev, u8 *dst)
+{
+       u64 mac;
+       int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(addr_propnames); i++) {
+               ret = acpi_dev_prop_read_single(adev, addr_propnames[i],
+                                               DEV_PROP_U64, &mac);
+               if (ret)
+                       continue;
+
+               if (mac & (~0ULL << 48))
+                       continue;       /* more than 6 bytes */
+
+               if (!is_valid_ether_addr((u8 *)&mac))
+                       continue;
+
+               ether_addr_copy(dst, (u8 *)&mac);
+
+               return 0;
+       }
+
+       return ret ? ret : -EINVAL;
+}
+
+static acpi_status bgx_acpi_register_phy(acpi_handle handle,
+                                       u32 lvl, void *context, void **rv)
+{
+       struct acpi_reference_args args;
+       struct bgx *bgx = context;
+       struct acpi_device *adev;
+       struct device *phy_dev;
+       u32 phy_id;
+
+       if (acpi_bus_get_device(handle, &adev))
+               return AE_OK;
+
+       if (acpi_dev_get_property_reference(adev, "phy-handle", 0, &args))
+               return AE_OK;
+
+       if (acpi_dev_prop_read_single(args.adev, "phy-channel", DEV_PROP_U32,
+                                       &phy_id))
+               return AE_OK;
+
+       phy_dev = bus_find_device(&mdio_bus_type, NULL, (void *)&phy_id,
+                                 bgx_match_phy_id);
+       if (!phy_dev)
+               return AE_OK;
+
+       SET_NETDEV_DEV(&bgx->lmac[bgx->lmac_count].netdev, &bgx->pdev->dev);
+       bgx->lmac[bgx->lmac_count].phydev = to_phy_device(phy_dev);
+
+       acpi_get_mac_address(adev, bgx->lmac[bgx->lmac_count].mac);
+
+       bgx->lmac[bgx->lmac_count].lmacid = bgx->lmac_count;
+       bgx->lmac_count++;
+
+       return AE_OK;
+}
+
+static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl,
+                               void *context, void **ret_val)
+{
+       struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct bgx *bgx = context;
+       char bgx_sel[5];
+
+       snprintf(bgx_sel, 5, "BGX%d", bgx->bgx_id);
+       if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &string))) {
+               pr_warn("Invalid link device\n");
+               return AE_OK;
+       }
+
+       if (strncmp(string.pointer, bgx_sel, 4))
+               return AE_OK;
+
+       acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+                           bgx_acpi_register_phy, NULL, bgx, NULL);
+
+       kfree(string.pointer);
+       return AE_CTRL_TERMINATE;
+}
+
+static int bgx_init_acpi_phy(struct bgx *bgx)
+{
+       acpi_get_devices(NULL, bgx_acpi_match_id, bgx, (void **)NULL);
+       return 0;
+}
+
+#else
+
+static int bgx_init_acpi_phy(struct bgx *bgx)
+{
+       return -ENODEV;
+}
+
+#endif /* CONFIG_ACPI */
+
+#if IS_ENABLED(CONFIG_OF_MDIO)
+
+static int bgx_init_of_phy(struct bgx *bgx)
+{
+       struct device_node *np;
+       struct device_node *np_child;
+       u8 lmac = 0;
+       char bgx_sel[5];
+       const char *mac;
+
+       /* Get BGX node from DT */
+       snprintf(bgx_sel, 5, "bgx%d", bgx->bgx_id);
+       np = of_find_node_by_name(NULL, bgx_sel);
+       if (!np)
+               return -ENODEV;
+
+       for_each_child_of_node(np, np_child) {
+               struct device_node *phy_np = of_parse_phandle(np_child,
+                                                             "phy-handle", 0);
+               if (!phy_np)
+                       continue;
+               bgx->lmac[lmac].phydev = of_phy_find_device(phy_np);
+
+               mac = of_get_mac_address(np_child);
+               if (mac)
+                       ether_addr_copy(bgx->lmac[lmac].mac, mac);
+
+               SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev);
+               bgx->lmac[lmac].lmacid = lmac;
+               lmac++;
+               if (lmac == MAX_LMAC_PER_BGX)
+                       break;
+       }
+       return 0;
+}
+
+#else
+
+static int bgx_init_of_phy(struct bgx *bgx)
+{
+       return -ENODEV;
+}
+
+#endif /* CONFIG_OF_MDIO */
+
+static int bgx_init_phy(struct bgx *bgx)
+{
+       int err = bgx_init_of_phy(bgx);
+
+       if (err != -ENODEV)
+               return err;
+
+       return bgx_init_acpi_phy(bgx);
+}
+
+static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       int err;
+       struct device *dev = &pdev->dev;
+       struct bgx *bgx = NULL;
+       u8 lmac;
+
+       bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
+       if (!bgx)
+               return -ENOMEM;
+       bgx->pdev = pdev;
+
+       pci_set_drvdata(pdev, bgx);
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(dev, "Failed to enable PCI device\n");
+               pci_set_drvdata(pdev, NULL);
+               return err;
+       }
+
+       err = pci_request_regions(pdev, DRV_NAME);
+       if (err) {
+               dev_err(dev, "PCI request regions failed 0x%x\n", err);
+               goto err_disable_device;
+       }
+
+       /* MAP configuration registers */
+       bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+       if (!bgx->reg_base) {
+               dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n");
+               err = -ENOMEM;
+               goto err_release_regions;
+       }
+       bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
+       bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX;
+
+       bgx_vnic[bgx->bgx_id] = bgx;
+       bgx_get_qlm_mode(bgx);
+
+       err = bgx_init_phy(bgx);
+       if (err)
+               goto err_enable;
+
+       bgx_init_hw(bgx);
+
+       /* Enable all LMACs */
+       for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+               err = bgx_lmac_enable(bgx, lmac);
+               if (err) {
+                       dev_err(dev, "BGX%d failed to enable lmac%d\n",
+                               bgx->bgx_id, lmac);
+                       goto err_enable;
+               }
+       }
+
+       return 0;
+
+err_enable:
+       bgx_vnic[bgx->bgx_id] = NULL;
+err_release_regions:
+       pci_release_regions(pdev);
+err_disable_device:
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+       return err;
+}
+
+static void bgx_remove(struct pci_dev *pdev)
+{
+       struct bgx *bgx = pci_get_drvdata(pdev);
+       u8 lmac;
+
+       /* Disable all LMACs */
+       for (lmac = 0; lmac < bgx->lmac_count; lmac++)
+               bgx_lmac_disable(bgx, lmac);
+
+       bgx_vnic[bgx->bgx_id] = NULL;
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_driver bgx_driver = {
+       .name = DRV_NAME,
+       .id_table = bgx_id_table,
+       .probe = bgx_probe,
+       .remove = bgx_remove,
+};
+
+static int __init bgx_init_module(void)
+{
+       pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+       return pci_register_driver(&bgx_driver);
+}
+
+static void __exit bgx_cleanup_module(void)
+{
+       pci_unregister_driver(&bgx_driver);
+}
+
+module_init(bgx_init_module);
+module_exit(bgx_cleanup_module);
diff --git a/sys/dev/vnic/thunder_bgx.h b/sys/dev/vnic/thunder_bgx.h
new file mode 100644 (file)
index 0000000..b1c7618
--- /dev/null
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2015 Cavium Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef THUNDER_BGX_H
+#define THUNDER_BGX_H
+
+#define    MAX_BGX_THUNDER                     8 /* Max 4 nodes, 2 per node */
+#define    MAX_BGX_PER_CN88XX                  2
+#define    MAX_LMAC_PER_BGX                    4
+#define    MAX_BGX_CHANS_PER_LMAC              16
+#define    MAX_DMAC_PER_LMAC                   8
+#define    MAX_FRAME_SIZE                      9216
+
+#define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE   2
+
+#define    MAX_LMAC    (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX)
+
+/* Registers */
+#define BGX_CMRX_CFG                   0x00
+#define  CMR_PKT_TX_EN                         BIT_ULL(13)
+#define  CMR_PKT_RX_EN                         BIT_ULL(14)
+#define  CMR_EN                                        BIT_ULL(15)
+#define BGX_CMR_GLOBAL_CFG             0x08
+#define  CMR_GLOBAL_CFG_FCS_STRIP              BIT_ULL(6)
+#define BGX_CMRX_RX_ID_MAP             0x60
+#define BGX_CMRX_RX_STAT0              0x70
+#define BGX_CMRX_RX_STAT1              0x78
+#define BGX_CMRX_RX_STAT2              0x80
+#define BGX_CMRX_RX_STAT3              0x88
+#define BGX_CMRX_RX_STAT4              0x90
+#define BGX_CMRX_RX_STAT5              0x98
+#define BGX_CMRX_RX_STAT6              0xA0
+#define BGX_CMRX_RX_STAT7              0xA8
+#define BGX_CMRX_RX_STAT8              0xB0
+#define BGX_CMRX_RX_STAT9              0xB8
+#define BGX_CMRX_RX_STAT10             0xC0
+#define BGX_CMRX_RX_BP_DROP            0xC8
+#define BGX_CMRX_RX_DMAC_CTL           0x0E8
+#define BGX_CMR_RX_DMACX_CAM           0x200
+#define  RX_DMACX_CAM_EN                       BIT_ULL(48)
+#define  RX_DMACX_CAM_LMACID(x)                        (x << 49)
+#define  RX_DMAC_COUNT                         32
+#define BGX_CMR_RX_STREERING           0x300
+#define  RX_TRAFFIC_STEER_RULE_COUNT           8
+#define BGX_CMR_CHAN_MSK_AND           0x450
+#define BGX_CMR_BIST_STATUS            0x460
+#define BGX_CMR_RX_LMACS               0x468
+#define BGX_CMRX_TX_STAT0              0x600
+#define BGX_CMRX_TX_STAT1              0x608
+#define BGX_CMRX_TX_STAT2              0x610
+#define BGX_CMRX_TX_STAT3              0x618
+#define BGX_CMRX_TX_STAT4              0x620
+#define BGX_CMRX_TX_STAT5              0x628
+#define BGX_CMRX_TX_STAT6              0x630
+#define BGX_CMRX_TX_STAT7              0x638
+#define BGX_CMRX_TX_STAT8              0x640
+#define BGX_CMRX_TX_STAT9              0x648
+#define BGX_CMRX_TX_STAT10             0x650
+#define BGX_CMRX_TX_STAT11             0x658
+#define BGX_CMRX_TX_STAT12             0x660
+#define BGX_CMRX_TX_STAT13             0x668
+#define BGX_CMRX_TX_STAT14             0x670
+#define BGX_CMRX_TX_STAT15             0x678
+#define BGX_CMRX_TX_STAT16             0x680
+#define BGX_CMRX_TX_STAT17             0x688
+#define BGX_CMR_TX_LMACS               0x1000
+
+#define BGX_SPUX_CONTROL1              0x10000
+#define  SPU_CTL_LOW_POWER                     BIT_ULL(11)
+#define  SPU_CTL_LOOPBACK                      BIT_ULL(14)
+#define  SPU_CTL_RESET                         BIT_ULL(15)
+#define BGX_SPUX_STATUS1               0x10008
+#define  SPU_STATUS1_RCV_LNK                   BIT_ULL(2)
+#define BGX_SPUX_STATUS2               0x10020
+#define  SPU_STATUS2_RCVFLT                    BIT_ULL(10)
+#define BGX_SPUX_BX_STATUS             0x10028
+#define  SPU_BX_STATUS_RX_ALIGN                        BIT_ULL(12)
+#define BGX_SPUX_BR_STATUS1            0x10030
+#define  SPU_BR_STATUS_BLK_LOCK                        BIT_ULL(0)
+#define  SPU_BR_STATUS_RCV_LNK                 BIT_ULL(12)
+#define BGX_SPUX_BR_PMD_CRTL           0x10068
+#define  SPU_PMD_CRTL_TRAIN_EN                 BIT_ULL(1)
+#define BGX_SPUX_BR_PMD_LP_CUP         0x10078
+#define BGX_SPUX_BR_PMD_LD_CUP         0x10088
+#define BGX_SPUX_BR_PMD_LD_REP         0x10090
+#define BGX_SPUX_FEC_CONTROL           0x100A0
+#define  SPU_FEC_CTL_FEC_EN                    BIT_ULL(0)
+#define  SPU_FEC_CTL_ERR_EN                    BIT_ULL(1)
+#define BGX_SPUX_AN_CONTROL            0x100C8
+#define  SPU_AN_CTL_AN_EN                      BIT_ULL(12)
+#define  SPU_AN_CTL_XNP_EN                     BIT_ULL(13)
+#define BGX_SPUX_AN_ADV                        0x100D8
+#define BGX_SPUX_MISC_CONTROL          0x10218
+#define  SPU_MISC_CTL_INTLV_RDISP              BIT_ULL(10)
+#define  SPU_MISC_CTL_RX_DIS                   BIT_ULL(12)
+#define BGX_SPUX_INT                   0x10220 /* +(0..3) << 20 */
+#define BGX_SPUX_INT_W1S               0x10228
+#define BGX_SPUX_INT_ENA_W1C           0x10230
+#define BGX_SPUX_INT_ENA_W1S           0x10238
+#define BGX_SPU_DBG_CONTROL            0x10300
+#define  SPU_DBG_CTL_AN_ARB_LINK_CHK_EN                BIT_ULL(18)
+#define  SPU_DBG_CTL_AN_NONCE_MCT_DIS          BIT_ULL(29)
+
+#define BGX_SMUX_RX_INT                        0x20000
+#define BGX_SMUX_RX_JABBER             0x20030
+#define BGX_SMUX_RX_CTL                        0x20048
+#define  SMU_RX_CTL_STATUS                     (3ull << 0)
+#define BGX_SMUX_TX_APPEND             0x20100
+#define  SMU_TX_APPEND_FCS_D                   BIT_ULL(2)
+#define BGX_SMUX_TX_MIN_PKT            0x20118
+#define BGX_SMUX_TX_INT                        0x20140
+#define BGX_SMUX_TX_CTL                        0x20178
+#define  SMU_TX_CTL_DIC_EN                     BIT_ULL(0)
+#define  SMU_TX_CTL_UNI_EN                     BIT_ULL(1)
+#define  SMU_TX_CTL_LNK_STATUS                 (3ull << 4)
+#define BGX_SMUX_TX_THRESH             0x20180
+#define BGX_SMUX_CTL                   0x20200
+#define  SMU_CTL_RX_IDLE                       BIT_ULL(0)
+#define  SMU_CTL_TX_IDLE                       BIT_ULL(1)
+
+#define BGX_GMP_PCS_MRX_CTL            0x30000
+#define         PCS_MRX_CTL_RST_AN                     BIT_ULL(9)
+#define         PCS_MRX_CTL_PWR_DN                     BIT_ULL(11)
+#define         PCS_MRX_CTL_AN_EN                      BIT_ULL(12)
+#define         PCS_MRX_CTL_LOOPBACK1                  BIT_ULL(14)
+#define         PCS_MRX_CTL_RESET                      BIT_ULL(15)
+#define BGX_GMP_PCS_MRX_STATUS         0x30008
+#define         PCS_MRX_STATUS_AN_CPT                  BIT_ULL(5)
+#define BGX_GMP_PCS_ANX_AN_RESULTS     0x30020
+#define BGX_GMP_PCS_SGM_AN_ADV         0x30068
+#define BGX_GMP_PCS_MISCX_CTL          0x30078
+#define  PCS_MISC_CTL_GMX_ENO                  BIT_ULL(11)
+#define  PCS_MISC_CTL_SAMP_PT_MASK     0x7Full
+#define BGX_GMP_GMI_PRTX_CFG           0x38020
+#define  GMI_PORT_CFG_SPEED                    BIT_ULL(1)
+#define  GMI_PORT_CFG_DUPLEX                   BIT_ULL(2)
+#define  GMI_PORT_CFG_SLOT_TIME                        BIT_ULL(3)
+#define  GMI_PORT_CFG_SPEED_MSB                        BIT_ULL(8)
+#define BGX_GMP_GMI_RXX_JABBER         0x38038
+#define BGX_GMP_GMI_TXX_THRESH         0x38210
+#define BGX_GMP_GMI_TXX_APPEND         0x38218
+#define BGX_GMP_GMI_TXX_SLOT           0x38220
+#define BGX_GMP_GMI_TXX_BURST          0x38228
+#define BGX_GMP_GMI_TXX_MIN_PKT                0x38240
+#define BGX_GMP_GMI_TXX_SGMII_CTL      0x38300
+
+#define BGX_MSIX_VEC_0_29_ADDR         0x400000 /* +(0..29) << 4 */
+#define BGX_MSIX_VEC_0_29_CTL          0x400008
+#define BGX_MSIX_PBA_0                 0x4F0000
+
+/* MSI-X interrupts */
+#define BGX_MSIX_VECTORS       30
+#define BGX_LMAC_VEC_OFFSET    7
+#define BGX_MSIX_VEC_SHIFT     4
+
+#define CMRX_INT               0
+#define SPUX_INT               1
+#define SMUX_RX_INT            2
+#define SMUX_TX_INT            3
+#define GMPX_PCS_INT           4
+#define GMPX_GMI_RX_INT                5
+#define GMPX_GMI_TX_INT                6
+#define CMR_MEM_INT            28
+#define SPU_MEM_INT            29
+
+#define LMAC_INTR_LINK_UP      BIT(0)
+#define LMAC_INTR_LINK_DOWN    BIT(1)
+
+/*  RX_DMAC_CTL configuration*/
+enum MCAST_MODE {
+               MCAST_MODE_REJECT,
+               MCAST_MODE_ACCEPT,
+               MCAST_MODE_CAM_FILTER,
+               RSVD
+};
+
+#define BCAST_ACCEPT   1
+#define CAM_ACCEPT     1
+
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
+unsigned bgx_get_map(int node);
+int bgx_get_lmac_count(int node, int bgx);
+const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid);
+void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
+void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
+void bgx_lmac_internal_loopback(int node, int bgx_idx,
+                               int lmac_idx, bool enable);
+u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
+u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
+#define BGX_RX_STATS_COUNT 11
+#define BGX_TX_STATS_COUNT 18
+
+struct bgx_stats {
+       u64 rx_stats[BGX_RX_STATS_COUNT];
+       u64 tx_stats[BGX_TX_STATS_COUNT];
+};
+
+#define BGX_IN_PROMISCUOUS_MODE 1
+
+enum LMAC_TYPE {
+       BGX_MODE_SGMII = 0, /* 1 lane, 1.250 Gbaud */
+       BGX_MODE_XAUI = 1,  /* 4 lanes, 3.125 Gbaud */
+       BGX_MODE_DXAUI = 1, /* 4 lanes, 6.250 Gbaud */
+       BGX_MODE_RXAUI = 2, /* 2 lanes, 6.250 Gbaud */
+       BGX_MODE_XFI = 3,   /* 1 lane, 10.3125 Gbaud */
+       BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */
+       BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */
+       BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */
+};
+
+enum qlm_mode {
+       QLM_MODE_SGMII,         /* SGMII, each lane independent */
+       QLM_MODE_XAUI_1X4,      /* 1 XAUI or DXAUI, 4 lanes */
+       QLM_MODE_RXAUI_2X2,     /* 2 RXAUI, 2 lanes each */
+       QLM_MODE_XFI_4X1,       /* 4 XFI, 1 lane each */
+       QLM_MODE_XLAUI_1X4,     /* 1 XLAUI, 4 lanes each */
+       QLM_MODE_10G_KR_4X1,    /* 4 10GBASE-KR, 1 lane each */
+       QLM_MODE_40G_KR4_1X4,   /* 1 40GBASE-KR4, 4 lanes each */
+};
+
+#endif /* THUNDER_BGX_H */