win-pvdrivers

changeset 875:8d7560d67376

xennet rx performance increases. not well tested yet. measurably faster and more scalable under SMP.
author James Harper <james.harper@bendigoit.com.au>
date Sun Mar 13 10:13:35 2011 +1100 (2011-03-13)
parents f5a973a8a10b
children 7ec9138621d6
files xennet/xennet.h xennet/xennet_rx.c
line diff
     1.1 --- a/xennet/xennet.h	Fri Mar 11 12:38:07 2011 +1100
     1.2 +++ b/xennet/xennet.h	Sun Mar 13 10:13:35 2011 +1100
     1.3 @@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fi
     1.4  #include <ndis.h>
     1.5  #define NTSTRSAFE_LIB
     1.6  #include <ntstrsafe.h>
     1.7 +#include <liblfds.h>
     1.8  
     1.9  #define VENDOR_DRIVER_VERSION_MAJOR 0
    1.10  #define VENDOR_DRIVER_VERSION_MINOR 10
    1.11 @@ -177,16 +178,21 @@ SET_NET_ULONG(PVOID ptr, ULONG data)
    1.12  
    1.13  #define LINUX_MAX_SG_ELEMENTS 19
    1.14  
    1.15 -typedef struct
    1.16 +struct _shared_buffer_t;
    1.17 +
    1.18 +typedef struct _shared_buffer_t shared_buffer_t;
    1.19 +
    1.20 +struct _shared_buffer_t
    1.21  {
    1.22 -  PVOID next;
    1.23 +  struct netif_rx_response rsp;
    1.24 +  shared_buffer_t *next;
    1.25    grant_ref_t gref;
    1.26    USHORT offset;
    1.27    PVOID virtual;
    1.28    PNDIS_BUFFER buffer;
    1.29 -  USHORT id;
    1.30 -  USHORT ref_count;
    1.31 -} shared_buffer_t;
    1.32 +  //USHORT id;
    1.33 +  volatile LONG ref_count;
    1.34 +};
    1.35  
    1.36  typedef struct
    1.37  {
    1.38 @@ -221,8 +227,6 @@ typedef struct {
    1.39    USHORT tcp_length;
    1.40    USHORT tcp_remaining;
    1.41    ULONG tcp_seq;
    1.42 -  BOOLEAN extra_info;
    1.43 -  BOOLEAN more_frags;
    1.44    /* anything past here doesn't get cleared automatically by the ClearPacketInfo */
    1.45    UCHAR header_data[MAX_LOOKAHEAD_LENGTH + MAX_ETH_HEADER_LENGTH];
    1.46  } packet_info_t;
    1.47 @@ -284,10 +288,6 @@ struct xennet_info
    1.48    ULONG tx_outstanding;
    1.49    ULONG tx_id_free;
    1.50    USHORT tx_id_list[NET_TX_RING_SIZE];
    1.51 -  //ULONG tx_cb_free;
    1.52 -  //ULONG tx_cb_list[TX_COALESCE_BUFFERS];
    1.53 -  //ULONG tx_cb_size;
    1.54 -  //shared_buffer_t tx_cbs[TX_COALESCE_BUFFERS];
    1.55    KDPC tx_dpc;
    1.56    NPAGED_LOOKASIDE_LIST tx_lookaside_list;
    1.57  
    1.58 @@ -295,9 +295,7 @@ struct xennet_info
    1.59    KSPIN_LOCK rx_lock;
    1.60    struct netif_rx_front_ring rx;
    1.61    ULONG rx_id_free;
    1.62 -  packet_info_t rxpi;
    1.63 -  PNDIS_PACKET rx_packet_list[NET_RX_RING_SIZE * 2];
    1.64 -  ULONG rx_packet_free;
    1.65 +  packet_info_t *rxpi;
    1.66    KEVENT packet_returned_event;
    1.67    //NDIS_MINIPORT_TIMER rx_timer;
    1.68    KDPC rx_dpc;
    1.69 @@ -305,16 +303,17 @@ struct xennet_info
    1.70    KDPC rx_timer_dpc;
    1.71    NDIS_HANDLE rx_packet_pool;
    1.72    NDIS_HANDLE rx_buffer_pool;
    1.73 -  ULONG rx_pb_free;
    1.74 -#define RX_PAGE_BUFFERS (NET_RX_RING_SIZE * 2)
    1.75 -  ULONG rx_pb_list[RX_PAGE_BUFFERS];
    1.76 -  shared_buffer_t rx_pbs[RX_PAGE_BUFFERS];
    1.77 -  USHORT rx_ring_pbs[NET_RX_RING_SIZE];
    1.78 +  volatile LONG rx_pb_free;
    1.79 +  struct stack_state *rx_pb_stack;
    1.80 +  shared_buffer_t *rx_ring_pbs[NET_RX_RING_SIZE];
    1.81    NPAGED_LOOKASIDE_LIST rx_lookaside_list;
    1.82    /* Receive-ring batched refills. */
    1.83    ULONG rx_target;
    1.84    ULONG rx_max_target;
    1.85    ULONG rx_min_target;
    1.86 +  shared_buffer_t *rx_partial_buf;
    1.87 +  BOOLEAN rx_partial_extra_info_flag ;
    1.88 +  BOOLEAN rx_partial_more_data_flag;
    1.89  
    1.90    /* how many packets are in the net stack atm */
    1.91    ULONG rx_outstanding;
     2.1 --- a/xennet/xennet_rx.c	Fri Mar 11 12:38:07 2011 +1100
     2.2 +++ b/xennet/xennet_rx.c	Sun Mar 13 10:13:35 2011 +1100
     2.3 @@ -25,20 +25,55 @@ Foundation, Inc., 51 Franklin Street, Fi
     2.4  static KDEFERRED_ROUTINE XenNet_RxBufferCheck;
     2.5  #endif
     2.6  
     2.7 +LONG rx_pb_outstanding = 0;
     2.8 +
     2.9  static __inline shared_buffer_t *
    2.10  get_pb_from_freelist(struct xennet_info *xi)
    2.11  {
    2.12 +  NDIS_STATUS status;
    2.13    shared_buffer_t *pb;
    2.14 -  
    2.15 -  if (xi->rx_pb_free == 0)
    2.16 +  PVOID ptr_ref;
    2.17 +
    2.18 +  if (stack_pop(xi->rx_pb_stack, &ptr_ref))
    2.19    {
    2.20 -    //KdPrint((__DRIVER_NAME "     Out of pb's\n"));    
    2.21 +    pb = ptr_ref;
    2.22 +    pb->ref_count = 1;
    2.23 +    InterlockedDecrement(&xi->rx_pb_free);
    2.24 +    InterlockedIncrement(&rx_pb_outstanding);
    2.25 +    return pb;
    2.26 +  }
    2.27 +
    2.28 +  status = NdisAllocateMemoryWithTag(&pb, sizeof(shared_buffer_t), XENNET_POOL_TAG);
    2.29 +  if (status != STATUS_SUCCESS)
    2.30 +  {
    2.31      return NULL;
    2.32    }
    2.33 -  xi->rx_pb_free--;
    2.34 -
    2.35 -  pb = &xi->rx_pbs[xi->rx_pb_list[xi->rx_pb_free]];
    2.36 -  pb->ref_count++;
    2.37 +  status = NdisAllocateMemoryWithTag(&pb->virtual, PAGE_SIZE, XENNET_POOL_TAG);
    2.38 +  if (status != STATUS_SUCCESS)
    2.39 +  {
    2.40 +    NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
    2.41 +    return NULL;
    2.42 +  }
    2.43 +  pb->gref = (grant_ref_t)xi->vectors.GntTbl_GrantAccess(xi->vectors.context, 0,
    2.44 +            (ULONG)(MmGetPhysicalAddress(pb->virtual).QuadPart >> PAGE_SHIFT), FALSE, INVALID_GRANT_REF, (ULONG)'XNRX');
    2.45 +  if (pb->gref == INVALID_GRANT_REF)
    2.46 +  {
    2.47 +    NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
    2.48 +    NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
    2.49 +    return NULL;
    2.50 +  }
    2.51 +  pb->offset = (USHORT)(ULONG_PTR)pb->virtual & (PAGE_SIZE - 1);
    2.52 +  NdisAllocateBuffer(&status, &pb->buffer, xi->rx_buffer_pool, (PUCHAR)pb->virtual, PAGE_SIZE);
    2.53 +  if (status != STATUS_SUCCESS)
    2.54 +  {
    2.55 +    xi->vectors.GntTbl_EndAccess(xi->vectors.context,
    2.56 +        pb->gref, FALSE, (ULONG)'XNRX');
    2.57 +    NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
    2.58 +    NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
    2.59 +    return NULL;
    2.60 +  }
    2.61 +  InterlockedIncrement(&rx_pb_outstanding);
    2.62 +  pb->ref_count = 1;
    2.63    return pb;
    2.64  }
    2.65  
    2.66 @@ -46,21 +81,20 @@ static __inline VOID
    2.67  ref_pb(struct xennet_info *xi, shared_buffer_t *pb)
    2.68  {
    2.69    UNREFERENCED_PARAMETER(xi);
    2.70 -  pb->ref_count++;
    2.71 -  //KdPrint((__DRIVER_NAME "     incremented pb %p ref to %d\n", pb, pb->ref_count));
    2.72 +  InterlockedIncrement(&pb->ref_count);
    2.73  }
    2.74  
    2.75  static __inline VOID
    2.76  put_pb_on_freelist(struct xennet_info *xi, shared_buffer_t *pb)
    2.77  {
    2.78 -  pb->ref_count--;
    2.79 -  if (pb->ref_count == 0)
    2.80 +  if (InterlockedDecrement(&pb->ref_count) == 0)
    2.81    {
    2.82      NdisAdjustBufferLength(pb->buffer, PAGE_SIZE);
    2.83      NDIS_BUFFER_LINKAGE(pb->buffer) = NULL;
    2.84      pb->next = NULL;
    2.85 -    xi->rx_pb_list[xi->rx_pb_free] = pb->id;
    2.86 -    xi->rx_pb_free++;
    2.87 +    stack_push(xi->rx_pb_stack, pb);
    2.88 +    InterlockedIncrement(&xi->rx_pb_free);
    2.89 +    InterlockedDecrement(&rx_pb_outstanding);
    2.90    }
    2.91  }
    2.92  
    2.93 @@ -90,15 +124,15 @@ XenNet_FillRing(struct xennet_info *xi)
    2.94      page_buf = get_pb_from_freelist(xi);
    2.95      if (!page_buf)
    2.96      {
    2.97 -      //KdPrint((__DRIVER_NAME "     Added %d out of %d buffers to rx ring (no free pages)\n", i, batch_target));
    2.98 +      KdPrint((__DRIVER_NAME "     Added %d out of %d buffers to rx ring (no free pages)\n", i, batch_target));
    2.99        break;
   2.100      }
   2.101      xi->rx_id_free--;
   2.102  
   2.103      /* Give to netback */
   2.104      id = (USHORT)((req_prod + i) & (NET_RX_RING_SIZE - 1));
   2.105 -    ASSERT(xi->rx_ring_pbs[id] == (USHORT)0xFFFF);
   2.106 -    xi->rx_ring_pbs[id] = page_buf->id;
   2.107 +    ASSERT(xi->rx_ring_pbs[id] == NULL);
   2.108 +    xi->rx_ring_pbs[id] = page_buf;
   2.109      req = RING_GET_REQUEST(&xi->rx, req_prod + i);
   2.110      req->id = id;
   2.111      req->gref = page_buf->gref;
   2.112 @@ -117,72 +151,52 @@ XenNet_FillRing(struct xennet_info *xi)
   2.113    return NDIS_STATUS_SUCCESS;
   2.114  }
   2.115  
   2.116 +LONG total_allocated_packets = 0;
   2.117 +LARGE_INTEGER last_print_time;
   2.118 +
   2.119 +/* lock free */
   2.120  static PNDIS_PACKET
   2.121  get_packet_from_freelist(struct xennet_info *xi)
   2.122  {
   2.123    NDIS_STATUS status;
   2.124    PNDIS_PACKET packet;
   2.125  
   2.126 -  //ASSERT(!KeTestSpinLock(&xi->rx_lock));
   2.127 -
   2.128 -  if (!xi->rx_packet_free)
   2.129 +  NdisAllocatePacket(&status, &packet, xi->rx_packet_pool);
   2.130 +  if (status != NDIS_STATUS_SUCCESS)
   2.131    {
   2.132 -    NdisAllocatePacket(&status, &packet, xi->rx_packet_pool);
   2.133 -    if (status != NDIS_STATUS_SUCCESS)
   2.134 -    {
   2.135 -      KdPrint((__DRIVER_NAME "     cannot allocate packet\n"));
   2.136 -      return NULL;
   2.137 -    }
   2.138 -    NDIS_SET_PACKET_HEADER_SIZE(packet, XN_HDR_SIZE);
   2.139 -    NdisZeroMemory(packet->MiniportReservedEx, sizeof(packet->MiniportReservedEx));
   2.140 +    KdPrint((__DRIVER_NAME "     cannot allocate packet\n"));
   2.141 +    return NULL;
   2.142    }
   2.143 -  else
   2.144 -  {
   2.145 -    xi->rx_packet_free--;
   2.146 -    packet = xi->rx_packet_list[xi->rx_packet_free];
   2.147 -  }
   2.148 +  NDIS_SET_PACKET_HEADER_SIZE(packet, XN_HDR_SIZE);
   2.149 +  NdisZeroMemory(packet->MiniportReservedEx, sizeof(packet->MiniportReservedEx));
   2.150 +  InterlockedIncrement(&total_allocated_packets);
   2.151    return packet;
   2.152  }
   2.153  
   2.154 +/* lock free */
   2.155  static VOID
   2.156  put_packet_on_freelist(struct xennet_info *xi, PNDIS_PACKET packet)
   2.157  {
   2.158 -  PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
   2.159 -  //ASSERT(!KeTestSpinLock(&xi->rx_lock));
   2.160 +  LARGE_INTEGER current_time;
   2.161  
   2.162 -  if (xi->rx_packet_free == NET_RX_RING_SIZE * 2)
   2.163 +  InterlockedDecrement(&total_allocated_packets);
   2.164 +  NdisFreePacket(packet);
   2.165 +  KeQuerySystemTime(&current_time);
   2.166 +  if ((int)total_allocated_packets < 0 || (current_time.QuadPart - last_print_time.QuadPart) / 10000 > 1000)
   2.167    {
   2.168 -    //KdPrint((__DRIVER_NAME "     packet free list full - releasing packet\n"));
   2.169 -    NdisFreePacket(packet);
   2.170 -    return;
   2.171 -  }
   2.172 -  csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
   2.173 -    packet, TcpIpChecksumPacketInfo);
   2.174 -  csum_info->Value = 0;
   2.175 -  NdisZeroMemory(packet->MiniportReservedEx, sizeof(packet->MiniportReservedEx));
   2.176 -  xi->rx_packet_list[xi->rx_packet_free] = packet;
   2.177 -  xi->rx_packet_free++;
   2.178 -}
   2.179 -
   2.180 -static VOID
   2.181 -packet_freelist_dispose(struct xennet_info *xi)
   2.182 -{
   2.183 -  while(xi->rx_packet_free != 0)
   2.184 -  {
   2.185 -    xi->rx_packet_free--;
   2.186 -    NdisFreePacket(xi->rx_packet_list[xi->rx_packet_free]);
   2.187 +    last_print_time.QuadPart = current_time.QuadPart;
   2.188 +    KdPrint(("total_allocated_packets = %d, rx_pb_outstanding = %d, rx_pb_free = %d\n", total_allocated_packets, rx_pb_outstanding, xi->rx_pb_free));
   2.189    }
   2.190  }
   2.191  
   2.192  static PNDIS_PACKET
   2.193 -XenNet_MakePacket(struct xennet_info *xi)
   2.194 +XenNet_MakePacket(struct xennet_info *xi, packet_info_t *pi)
   2.195  {
   2.196    NDIS_STATUS status;
   2.197    PNDIS_PACKET packet;
   2.198    PNDIS_BUFFER out_buffer;
   2.199    USHORT new_ip4_length;
   2.200    PUCHAR header_va;
   2.201 -  packet_info_t *pi = &xi->rxpi;
   2.202    ULONG out_remaining;
   2.203    ULONG tcp_length;
   2.204    ULONG header_extra;
   2.205 @@ -270,7 +284,7 @@ XenNet_MakePacket(struct xennet_info *xi
   2.206      NdisQueryBufferOffset(pi->curr_buffer, &in_buffer_offset, &in_buffer_length);
   2.207      out_length = min(out_remaining, in_buffer_length - pi->curr_mdl_offset);
   2.208      NdisCopyBuffer(&status, &out_buffer, xi->rx_buffer_pool, pi->curr_buffer, pi->curr_mdl_offset, out_length);
   2.209 -    //TODO: check status
   2.210 +    ASSERT(status == STATUS_SUCCESS); //TODO: properly handle error
   2.211      NdisChainBufferAtBack(packet, out_buffer);
   2.212      ref_pb(xi, pi->curr_pb);
   2.213      pi->curr_mdl_offset = (USHORT)(pi->curr_mdl_offset + out_length);
   2.214 @@ -290,6 +304,7 @@ XenNet_MakePacket(struct xennet_info *xi
   2.215    if (header_extra > 0)
   2.216      pi->header_length -= header_extra;
   2.217    xi->rx_outstanding++;
   2.218 +  ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
   2.219    //FUNCTION_EXIT();
   2.220    return packet;
   2.221  }
   2.222 @@ -431,7 +446,8 @@ XenNet_SumPacketData(
   2.223  static ULONG
   2.224  XenNet_MakePackets(
   2.225    struct xennet_info *xi,
   2.226 -  PLIST_ENTRY rx_packet_list
   2.227 +  PLIST_ENTRY rx_packet_list,
   2.228 +  packet_info_t *pi
   2.229  )
   2.230  {
   2.231    ULONG packet_count = 0;
   2.232 @@ -440,7 +456,6 @@ XenNet_MakePackets(
   2.233    UCHAR psh;
   2.234    PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
   2.235    ULONG parse_result;  
   2.236 -  packet_info_t *pi = &xi->rxpi;
   2.237    //PNDIS_BUFFER buffer;
   2.238    shared_buffer_t *page_buf;
   2.239  
   2.240 @@ -460,7 +475,7 @@ XenNet_MakePackets(
   2.241        break;
   2.242      // fallthrough
   2.243    case 17:  // UDP
   2.244 -    packet = XenNet_MakePacket(xi);
   2.245 +    packet = XenNet_MakePacket(xi, pi);
   2.246      if (packet == NULL)
   2.247      {
   2.248        KdPrint((__DRIVER_NAME "     Ran out of packets\n"));
   2.249 @@ -519,7 +534,7 @@ XenNet_MakePackets(
   2.250      packet_count = 1;
   2.251      goto done;
   2.252    default:
   2.253 -    packet = XenNet_MakePacket(xi);
   2.254 +    packet = XenNet_MakePacket(xi, pi);
   2.255      if (packet == NULL)
   2.256      {
   2.257        KdPrint((__DRIVER_NAME "     Ran out of packets\n"));
   2.258 @@ -543,7 +558,7 @@ XenNet_MakePackets(
   2.259      PMDL mdl;
   2.260      UINT total_length;
   2.261      UINT buffer_length;
   2.262 -    packet = XenNet_MakePacket(xi);
   2.263 +    packet = XenNet_MakePacket(xi, pi);
   2.264      if (!packet)
   2.265      {
   2.266        KdPrint((__DRIVER_NAME "     Ran out of packets\n"));
   2.267 @@ -596,11 +611,11 @@ XenNet_RxQueueDpcSynchronized(PVOID cont
   2.268  }
   2.269  
   2.270  #define MAXIMUM_PACKETS_PER_INDICATE 32
   2.271 -/*
   2.272 -We limit the number of packets per interrupt so that acks get a chance
   2.273 +
   2.274 +/* We limit the number of packets per interrupt so that acks get a chance
   2.275  under high rx load. The DPC is immediately re-scheduled */
   2.276 -
   2.277 -#define MAX_PACKETS_PER_INTERRUPT 64
   2.278 +/* this isn't actually done right now */
   2.279 +#define MAX_BUFFERS_PER_INTERRUPT 256
   2.280  
   2.281  // Called at DISPATCH_LEVEL
   2.282  static VOID
   2.283 @@ -612,13 +627,18 @@ XenNet_RxBufferCheck(PKDPC dpc, PVOID co
   2.284    PLIST_ENTRY entry;
   2.285    PNDIS_PACKET packets[MAXIMUM_PACKETS_PER_INDICATE];
   2.286    ULONG packet_count = 0;
   2.287 -  struct netif_rx_response *rxrsp = NULL;
   2.288 +  ULONG buffer_count = 0;
   2.289    struct netif_extra_info *ei;
   2.290    USHORT id;
   2.291    int more_to_do = FALSE;
   2.292 -  packet_info_t *pi = &xi->rxpi;
   2.293 +  packet_info_t *pi = &xi->rxpi[KeGetCurrentProcessorNumber() & 0xff];
   2.294    //NDIS_STATUS status;
   2.295    shared_buffer_t *page_buf;
   2.296 +  shared_buffer_t *head_buf = NULL;
   2.297 +  shared_buffer_t *tail_buf = NULL;
   2.298 +  shared_buffer_t *last_buf = NULL;
   2.299 +  BOOLEAN extra_info_flag = FALSE;
   2.300 +  BOOLEAN more_data_flag = FALSE;
   2.301    PNDIS_BUFFER buffer;
   2.302  
   2.303    UNREFERENCED_PARAMETER(dpc);
   2.304 @@ -630,6 +650,10 @@ XenNet_RxBufferCheck(PKDPC dpc, PVOID co
   2.305    if (!xi->connected)
   2.306      return; /* a delayed DPC could let this come through... just do nothing */
   2.307  
   2.308 +  InitializeListHead(&rx_packet_list);
   2.309 +
   2.310 +  /* get all the buffers off the ring as quickly as possible so the lock is held for a minimum amount of time */
   2.311 +
   2.312    KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
   2.313    
   2.314    if (xi->rx_shutting_down)
   2.315 @@ -638,107 +662,74 @@ XenNet_RxBufferCheck(PKDPC dpc, PVOID co
   2.316      KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
   2.317      return;
   2.318    }
   2.319 -  InitializeListHead(&rx_packet_list);
   2.320 +
   2.321 +  if (xi->rx_partial_buf)
   2.322 +  {
   2.323 +    head_buf = xi->rx_partial_buf;
   2.324 +    tail_buf = xi->rx_partial_buf;
   2.325 +    while (tail_buf->next)
   2.326 +      tail_buf = tail_buf->next;
   2.327 +    more_data_flag = xi->rx_partial_more_data_flag;
   2.328 +    extra_info_flag = xi->rx_partial_extra_info_flag;
   2.329 +    xi->rx_partial_buf = NULL;
   2.330 +  }
   2.331  
   2.332    do {
   2.333      prod = xi->rx.sring->rsp_prod;
   2.334 -//KdPrint((__DRIVER_NAME "     prod - cons = %d\n", prod - xi->rx.rsp_cons));    
   2.335      KeMemoryBarrier(); /* Ensure we see responses up to 'prod'. */
   2.336  
   2.337 -    for (cons = xi->rx.rsp_cons; cons != prod && packet_count < MAX_PACKETS_PER_INTERRUPT; cons++)
   2.338 +    for (cons = xi->rx.rsp_cons; cons != prod; cons++)
   2.339      {
   2.340        id = (USHORT)(cons & (NET_RX_RING_SIZE - 1));
   2.341 -      ASSERT(xi->rx_ring_pbs[id] != (USHORT)0xFFFF);
   2.342 -      page_buf = &xi->rx_pbs[xi->rx_ring_pbs[id]];
   2.343 -      xi->rx_ring_pbs[id] = 0xFFFF;
   2.344 +      page_buf = xi->rx_ring_pbs[id];
   2.345 +      ASSERT(page_buf);
   2.346 +      xi->rx_ring_pbs[id] = NULL;
   2.347        xi->rx_id_free++;
   2.348 -      //KdPrint((__DRIVER_NAME "     got page_buf %p with id %d from ring at id %d\n", page_buf, page_buf->id, id));
   2.349 -      if (pi->extra_info)
   2.350 +      memcpy(&page_buf->rsp, RING_GET_RESPONSE(&xi->rx, cons), max(sizeof(struct netif_rx_response), sizeof(struct netif_extra_info)));
   2.351 +      if (!extra_info_flag)
   2.352        {
   2.353 -        //KdPrint((__DRIVER_NAME "     processing extra info\n"));
   2.354 -        put_pb_on_freelist(xi, page_buf);
   2.355 -        ei = (struct netif_extra_info *)RING_GET_RESPONSE(&xi->rx, cons);
   2.356 -        pi->extra_info = (BOOLEAN)!!(ei->flags & XEN_NETIF_EXTRA_FLAG_MORE);
   2.357 -        switch (ei->type)
   2.358 +        if (page_buf->rsp.status <= 0
   2.359 +          || page_buf->rsp.offset + page_buf->rsp.status > PAGE_SIZE)
   2.360          {
   2.361 -        case XEN_NETIF_EXTRA_TYPE_GSO:
   2.362 -          switch (ei->u.gso.type)
   2.363 -          {
   2.364 -          case XEN_NETIF_GSO_TYPE_TCPV4:
   2.365 -            pi->mss = ei->u.gso.size;
   2.366 -            //KdPrint((__DRIVER_NAME "     mss = %d\n", pi->mss));
   2.367 -            // TODO - put this assertion somewhere ASSERT(header_len + pi->mss <= PAGE_SIZE); // this limits MTU to PAGE_SIZE - XN_HEADER_LEN
   2.368 -            break;
   2.369 -          default:
   2.370 -            KdPrint((__DRIVER_NAME "     Unknown GSO type (%d) detected\n", ei->u.gso.type));
   2.371 -            break;
   2.372 -          }
   2.373 -          break;
   2.374 -        default:
   2.375 -          KdPrint((__DRIVER_NAME "     Unknown extra info type (%d) detected\n", ei->type));
   2.376 -          break;
   2.377 +          KdPrint((__DRIVER_NAME "     Error: rsp offset %d, size %d\n",
   2.378 +            page_buf->rsp.offset, page_buf->rsp.status));
   2.379 +          ASSERT(!extra_info_flag);
   2.380 +          put_pb_on_freelist(xi, page_buf);
   2.381 +          continue;
   2.382          }
   2.383        }
   2.384 +      
   2.385 +      if (!head_buf)
   2.386 +      {
   2.387 +        head_buf = page_buf;
   2.388 +        tail_buf = page_buf;
   2.389 +      }
   2.390        else
   2.391        {
   2.392 -        rxrsp = RING_GET_RESPONSE(&xi->rx, cons);
   2.393 -        if (rxrsp->status <= 0
   2.394 -          || rxrsp->offset + rxrsp->status > PAGE_SIZE)
   2.395 -        {
   2.396 -          KdPrint((__DRIVER_NAME "     Error: rxrsp offset %d, size %d\n",
   2.397 -            rxrsp->offset, rxrsp->status));
   2.398 -          ASSERT(!pi->extra_info);
   2.399 -          put_pb_on_freelist(xi, page_buf);
   2.400 -          continue;
   2.401 -        }
   2.402 -        ASSERT(!rxrsp->offset);
   2.403 -        ASSERT(rxrsp->id == id);
   2.404 -        if (!pi->more_frags) // handling the packet's 1st buffer
   2.405 -        {
   2.406 -          if (rxrsp->flags & NETRXF_csum_blank)
   2.407 -            pi->csum_blank = TRUE;
   2.408 -          if (rxrsp->flags & NETRXF_data_validated)
   2.409 -            pi->data_validated = TRUE;
   2.410 -        }
   2.411 -        //NdisAllocateBuffer(&status, &buffer, xi->rx_buffer_pool, (PUCHAR)page_buf->virtual + rxrsp->offset, rxrsp->status);
   2.412 -        //KdPrint((__DRIVER_NAME "     buffer = %p, offset = %d, len = %d\n", buffer, rxrsp->offset, rxrsp->status));
   2.413 -        //ASSERT(status == NDIS_STATUS_SUCCESS); // lazy
   2.414 -        buffer = page_buf->buffer;
   2.415 -        NdisAdjustBufferLength(buffer, rxrsp->status);
   2.416 -        //KdPrint((__DRIVER_NAME "     buffer = %p, pb = %p\n", buffer, page_buf));
   2.417 -        if (pi->first_pb)
   2.418 -        {
   2.419 -          //KdPrint((__DRIVER_NAME "     additional buffer\n"));
   2.420 -          pi->curr_pb->next = page_buf;
   2.421 -          pi->curr_pb = page_buf;
   2.422 -          NDIS_BUFFER_LINKAGE(pi->curr_buffer) = buffer;
   2.423 -          pi->curr_buffer = buffer;
   2.424 -        }
   2.425 -        else
   2.426 -        {
   2.427 -          pi->first_pb = page_buf;
   2.428 -          pi->curr_pb = page_buf;
   2.429 -          pi->first_buffer = buffer;
   2.430 -          pi->curr_buffer = buffer;
   2.431 -        }
   2.432 -        pi->mdl_count++;
   2.433 -        pi->extra_info = (BOOLEAN)!!(rxrsp->flags & NETRXF_extra_info);
   2.434 -        pi->more_frags = (BOOLEAN)!!(rxrsp->flags & NETRXF_more_data);
   2.435 -        pi->total_length = pi->total_length + rxrsp->status;
   2.436 +        tail_buf->next = page_buf;
   2.437 +        tail_buf = page_buf;
   2.438        }
   2.439 +      page_buf->next = NULL;
   2.440  
   2.441 -      /* Packet done, add it to the list */
   2.442 -      if (!pi->more_frags && !pi->extra_info)
   2.443 +      if (extra_info_flag)
   2.444        {
   2.445 -        pi->curr_pb = pi->first_pb;
   2.446 -        pi->curr_buffer = pi->first_buffer;
   2.447 -        packet_count += XenNet_MakePackets(xi, &rx_packet_list);
   2.448 +        ei = (struct netif_extra_info *)&page_buf->rsp;
   2.449 +        extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
   2.450        }
   2.451 +      else
   2.452 +      {
   2.453 +        more_data_flag = page_buf->rsp.flags & NETRXF_more_data;
   2.454 +        extra_info_flag = page_buf->rsp.flags & NETRXF_extra_info;
   2.455 +      }
   2.456 +      
   2.457 +      if (!extra_info_flag && !more_data_flag)
   2.458 +        last_buf = page_buf;
   2.459 +      buffer_count++;
   2.460      }
   2.461      xi->rx.rsp_cons = cons;
   2.462  
   2.463 -    if (packet_count >= MAX_PACKETS_PER_INTERRUPT)
   2.464 -      break;
   2.465 +    /* Give netback more buffers */
   2.466 +    XenNet_FillRing(xi);
   2.467  
   2.468      more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
   2.469      if (!more_to_do)
   2.470 @@ -748,34 +739,127 @@ XenNet_RxBufferCheck(PKDPC dpc, PVOID co
   2.471        more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
   2.472      }
   2.473    } while (more_to_do);
   2.474 -
   2.475 -  if (pi->more_frags || pi->extra_info)
   2.476 -    KdPrint((__DRIVER_NAME "     Partial receive (more_frags = %d, extra_info = %d, total_length = %d, mdl_count = %d)\n", pi->more_frags, pi->extra_info, pi->total_length, pi->mdl_count));
   2.477 -
   2.478 -  /* Give netback more buffers */
   2.479 -  XenNet_FillRing(xi);
   2.480 -
   2.481 -  if (packet_count >= MAX_PACKETS_PER_INTERRUPT)
   2.482 +  
   2.483 +  /* anything past last_buf belongs to an incomplete packet... */
   2.484 +  if (last_buf && last_buf->next)
   2.485    {
   2.486 -    /* fire again immediately */
   2.487 -    xi->vectors.EvtChn_Sync(xi->vectors.context, XenNet_RxQueueDpcSynchronized, xi);
   2.488 +    KdPrint((__DRIVER_NAME "     Partial receive\n"));
   2.489 +    xi->rx_partial_buf = last_buf->next;
   2.490 +    xi->rx_partial_more_data_flag = more_data_flag;
   2.491 +    xi->rx_partial_extra_info_flag = extra_info_flag;
   2.492 +    last_buf->next = NULL;
   2.493    }
   2.494  
   2.495 -  //KdPrint((__DRIVER_NAME "     packet_count = %d, page_count = %d, avg_page_count = %d, event = %d\n", packet_count, page_count, xi->avg_page_count / 128, event));
   2.496 -  xi->stat_rx_ok += packet_count;
   2.497 -
   2.498    KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
   2.499  
   2.500 +#if 0
   2.501 +do this on a timer or something during packet manufacture
   2.502 +  if (buffer_count >= MAX_BUFFERS_PER_INTERRUPT)
   2.503 +  {
   2.504 +    /* fire again immediately */
   2.505 +    KdPrint((__DRIVER_NAME "     Dpc Duration Exceeded\n"));
   2.506 +    KeInsertQueueDpc(&xi->rx_dpc, NULL, NULL);
   2.507 +    //xi->vectors.EvtChn_Sync(xi->vectors.context, XenNet_RxQueueDpcSynchronized, xi);
   2.508 +  }
   2.509 +#endif
   2.510 +
   2.511 +  /* make packets out of the buffers */
   2.512 +  page_buf = head_buf;
   2.513 +  extra_info_flag = FALSE;
   2.514 +  more_data_flag = FALSE;
   2.515 +  while (page_buf)
   2.516 +  {
   2.517 +    shared_buffer_t *next_buf = page_buf->next;
   2.518 +
   2.519 +    page_buf->next = NULL;
   2.520 +    if (extra_info_flag)
   2.521 +    {
   2.522 +      //KdPrint((__DRIVER_NAME "     processing extra info\n"));
   2.523 +      ei = (struct netif_extra_info *)&page_buf->rsp;
   2.524 +      extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
   2.525 +      switch (ei->type)
   2.526 +      {
   2.527 +      case XEN_NETIF_EXTRA_TYPE_GSO:
   2.528 +        switch (ei->u.gso.type)
   2.529 +        {
   2.530 +        case XEN_NETIF_GSO_TYPE_TCPV4:
   2.531 +          pi->mss = ei->u.gso.size;
   2.532 +          //KdPrint((__DRIVER_NAME "     mss = %d\n", pi->mss));
   2.533 +          // TODO - put this assertion somewhere ASSERT(header_len + pi->mss <= PAGE_SIZE); // this limits MTU to PAGE_SIZE - XN_HEADER_LEN
   2.534 +          break;
   2.535 +        default:
   2.536 +          KdPrint((__DRIVER_NAME "     Unknown GSO type (%d) detected\n", ei->u.gso.type));
   2.537 +          break;
   2.538 +        }
   2.539 +        break;
   2.540 +      default:
   2.541 +        KdPrint((__DRIVER_NAME "     Unknown extra info type (%d) detected\n", ei->type));
   2.542 +        break;
   2.543 +      }
   2.544 +      put_pb_on_freelist(xi, page_buf);
   2.545 +    }
   2.546 +    else
   2.547 +    {
   2.548 +      ASSERT(!page_buf->rsp.offset);
   2.549 +      if (!more_data_flag) // handling the packet's 1st buffer
   2.550 +      {
   2.551 +        if (page_buf->rsp.flags & NETRXF_csum_blank)
   2.552 +          pi->csum_blank = TRUE;
   2.553 +        if (page_buf->rsp.flags & NETRXF_data_validated)
   2.554 +          pi->data_validated = TRUE;
   2.555 +      }
   2.556 +      buffer = page_buf->buffer;
   2.557 +      NdisAdjustBufferLength(buffer, page_buf->rsp.status);
   2.558 +      //KdPrint((__DRIVER_NAME "     buffer = %p, pb = %p\n", buffer, page_buf));
   2.559 +      if (pi->first_pb)
   2.560 +      {
   2.561 +        ASSERT(pi->curr_pb);
   2.562 +        //KdPrint((__DRIVER_NAME "     additional buffer\n"));
   2.563 +        pi->curr_pb->next = page_buf;
   2.564 +        pi->curr_pb = page_buf;
   2.565 +        ASSERT(pi->curr_buffer);
   2.566 +        NDIS_BUFFER_LINKAGE(pi->curr_buffer) = buffer;
   2.567 +        pi->curr_buffer = buffer;
   2.568 +      }
   2.569 +      else
   2.570 +      {
   2.571 +        pi->first_pb = page_buf;
   2.572 +        pi->curr_pb = page_buf;
   2.573 +        pi->first_buffer = buffer;
   2.574 +        pi->curr_buffer = buffer;
   2.575 +      }
   2.576 +      pi->mdl_count++;
   2.577 +      extra_info_flag = page_buf->rsp.flags & NETRXF_extra_info;
   2.578 +      more_data_flag = page_buf->rsp.flags & NETRXF_more_data;
   2.579 +      pi->total_length = pi->total_length + page_buf->rsp.status;
   2.580 +    }
   2.581 +
   2.582 +    /* Packet done, add it to the list */
   2.583 +    if (!more_data_flag && !extra_info_flag)
   2.584 +    {
   2.585 +      pi->curr_pb = pi->first_pb;
   2.586 +      pi->curr_buffer = pi->first_buffer;
   2.587 +      XenNet_MakePackets(xi, &rx_packet_list, pi);
   2.588 +    }
   2.589 +
   2.590 +    page_buf = next_buf;
   2.591 +  }
   2.592 +  ASSERT(!more_data_flag && !extra_info_flag);
   2.593 +      
   2.594 +  xi->stat_rx_ok += packet_count;
   2.595 +
   2.596 +  /* indicate packets to NDIS */
   2.597    entry = RemoveHeadList(&rx_packet_list);
   2.598    packet_count = 0;
   2.599    while (entry != &rx_packet_list)
   2.600    {
   2.601      PNDIS_PACKET packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
   2.602 +    ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
   2.603 +
   2.604      packets[packet_count++] = packet;
   2.605      entry = RemoveHeadList(&rx_packet_list);
   2.606      if (packet_count == MAXIMUM_PACKETS_PER_INDICATE || entry == &rx_packet_list)
   2.607      {
   2.608 -      //KdPrint((__DRIVER_NAME "     Indicating\n"));
   2.609        NdisMIndicateReceivePacket(xi->adapter_handle, packets, packet_count);
   2.610        packet_count = 0;
   2.611      }
   2.612 @@ -785,7 +869,7 @@ XenNet_RxBufferCheck(PKDPC dpc, PVOID co
   2.613  
   2.614  /* called at DISPATCH_LEVEL */
   2.615  /* it's okay for return packet to be called while resume_state != RUNNING as the packet will simply be added back to the freelist, the grants will be fixed later */
   2.616 -VOID DDKAPI
   2.617 +VOID
   2.618  XenNet_ReturnPacket(
   2.619    IN NDIS_HANDLE MiniportAdapterContext,
   2.620    IN PNDIS_PACKET Packet
   2.621 @@ -799,9 +883,8 @@ XenNet_ReturnPacket(
   2.622  
   2.623    //KdPrint((__DRIVER_NAME "     page_buf = %p\n", page_buf));
   2.624  
   2.625 -  KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
   2.626 -
   2.627    NdisUnchainBufferAtFront(Packet, &buffer);
   2.628 +  
   2.629    while (buffer)
   2.630    {
   2.631      shared_buffer_t *next_buf;
   2.632 @@ -825,7 +908,7 @@ XenNet_ReturnPacket(
   2.633      }
   2.634      NdisUnchainBufferAtFront(Packet, &buffer);
   2.635      page_buf = next_buf;
   2.636 -  }  
   2.637 +  }
   2.638  
   2.639    put_packet_on_freelist(xi, Packet);
   2.640    xi->rx_outstanding--;
   2.641 @@ -833,6 +916,8 @@ XenNet_ReturnPacket(
   2.642    if (!xi->rx_outstanding && xi->rx_shutting_down)
   2.643      KeSetEvent(&xi->packet_returned_event, IO_NO_INCREMENT, FALSE);
   2.644  
   2.645 +  KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
   2.646 +
   2.647    XenNet_FillRing(xi);
   2.648  
   2.649    KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
   2.650 @@ -851,10 +936,10 @@ XenNet_PurgeRing(struct xennet_info *xi)
   2.651    int i;
   2.652    for (i = 0; i < NET_RX_RING_SIZE; i++)
   2.653    {
   2.654 -    if (xi->rx_ring_pbs[i] != 0xFFFF)
   2.655 +    if (xi->rx_ring_pbs[i] != NULL)
   2.656      {
   2.657 -      put_pb_on_freelist(xi, &xi->rx_pbs[xi->rx_ring_pbs[i]]);
   2.658 -      xi->rx_ring_pbs[i] = 0xFFFF;
   2.659 +      put_pb_on_freelist(xi, xi->rx_ring_pbs[i]);
   2.660 +      xi->rx_ring_pbs[i] = NULL;
   2.661      }
   2.662    }
   2.663  }
   2.664 @@ -892,7 +977,7 @@ XenNet_RxResumeStart(xennet_info_t *xi)
   2.665  VOID
   2.666  XenNet_BufferAlloc(xennet_info_t *xi)
   2.667  {
   2.668 -  NDIS_STATUS status;
   2.669 +  //NDIS_STATUS status;
   2.670    int i;
   2.671    
   2.672    xi->rx_id_free = NET_RX_RING_SIZE;
   2.673 @@ -900,41 +985,10 @@ XenNet_BufferAlloc(xennet_info_t *xi)
   2.674  
   2.675    for (i = 0; i < NET_RX_RING_SIZE; i++)
   2.676    {
   2.677 -    xi->rx_ring_pbs[i] = 0xFFFF;
   2.678 +    xi->rx_ring_pbs[i] = NULL;
   2.679    }
   2.680 -  
   2.681 -  for (i = 0; i < RX_PAGE_BUFFERS; i++)
   2.682 -  {
   2.683 -    xi->rx_pbs[i].id = (USHORT)i;
   2.684 -    status = NdisAllocateMemoryWithTag(&xi->rx_pbs[i].virtual, PAGE_SIZE, XENNET_POOL_TAG);
   2.685 -    if (status != STATUS_SUCCESS)
   2.686 -    {
   2.687 -      break;
   2.688 -    }
   2.689 -    xi->rx_pbs[i].gref = (grant_ref_t)xi->vectors.GntTbl_GrantAccess(xi->vectors.context, 0,
   2.690 -              (ULONG)(MmGetPhysicalAddress(xi->rx_pbs[i].virtual).QuadPart >> PAGE_SHIFT), FALSE, INVALID_GRANT_REF, (ULONG)'XNRX');
   2.691 -    if (xi->rx_pbs[i].gref == INVALID_GRANT_REF)
   2.692 -    {
   2.693 -      NdisFreeMemory(xi->rx_pbs[i].virtual, PAGE_SIZE, 0);
   2.694 -      break;
   2.695 -    }
   2.696 -    xi->rx_pbs[i].offset = (USHORT)(ULONG_PTR)xi->rx_pbs[i].virtual & (PAGE_SIZE - 1);
   2.697 -    NdisAllocateBuffer(&status, &xi->rx_pbs[i].buffer, xi->rx_buffer_pool, (PUCHAR)xi->rx_pbs[i].virtual, PAGE_SIZE);
   2.698 -    if (status != STATUS_SUCCESS)
   2.699 -    {
   2.700 -      xi->vectors.GntTbl_EndAccess(xi->vectors.context,
   2.701 -          xi->rx_pbs[i].gref, FALSE, (ULONG)'XNRX');
   2.702 -      NdisFreeMemory(xi->rx_pbs[i].virtual, PAGE_SIZE, 0);
   2.703 -      break;
   2.704 -    }
   2.705 -    xi->rx_pbs[i].ref_count = 1; /* when we put it back it will go to zero */
   2.706 -    put_pb_on_freelist(xi, &xi->rx_pbs[i]);
   2.707 -  }
   2.708 -  if (i == 0)
   2.709 -    KdPrint((__DRIVER_NAME "     Unable to allocate any SharedMemory buffers\n"));
   2.710  }
   2.711  
   2.712 -
   2.713  VOID
   2.714  XenNet_RxResumeEnd(xennet_info_t *xi)
   2.715  {
   2.716 @@ -962,9 +1016,18 @@ XenNet_RxInit(xennet_info_t *xi)
   2.717    KeInitializeEvent(&xi->packet_returned_event, SynchronizationEvent, FALSE);
   2.718    KeInitializeTimer(&xi->rx_timer);
   2.719    KeInitializeDpc(&xi->rx_dpc, XenNet_RxBufferCheck, xi);
   2.720 -  KeSetTargetProcessorDpc(&xi->rx_dpc, 0);
   2.721 +  //KeSetTargetProcessorDpc(&xi->rx_dpc, 0);
   2.722    //KeSetImportanceDpc(&xi->rx_dpc, HighImportance);
   2.723    //KeInitializeDpc(&xi->rx_timer_dpc, XenNet_RxTimerDpc, xi);
   2.724 +  status = NdisAllocateMemoryWithTag((PVOID)&xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), XENNET_POOL_TAG);
   2.725 +  if (status != NDIS_STATUS_SUCCESS)
   2.726 +  {
   2.727 +    KdPrint(("NdisAllocateMemoryWithTag failed with 0x%x\n", status));
   2.728 +    return FALSE;
   2.729 +  }
   2.730 +  NdisZeroMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount());
   2.731 +
   2.732 +  stack_new(&xi->rx_pb_stack, NET_RX_RING_SIZE * 4);
   2.733  
   2.734    XenNet_BufferAlloc(xi);
   2.735    
   2.736 @@ -1015,10 +1078,10 @@ XenNet_RxShutdown(xennet_info_t *xi)
   2.737  
   2.738    //KeAcquireSpinLock(&xi->rx_lock, &old_irql);
   2.739  
   2.740 +  NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
   2.741 +
   2.742    XenNet_BufferFree(xi);
   2.743  
   2.744 -  packet_freelist_dispose(xi);
   2.745 -
   2.746    NdisFreePacketPool(xi->rx_packet_pool);
   2.747  
   2.748    NdisDeleteNPagedLookasideList(&xi->rx_lookaside_list);