win-pvdrivers

view xennet/xennet.h @ 875:8d7560d67376

xennet rx performance increases. not well tested yet. measurably faster and more scalable under SMP.
author James Harper <james.harper@bendigoit.com.au>
date Sun Mar 13 10:13:35 2011 +1100 (2011-03-13)
parents 182a8e70c562
children 16f6219acf13
line source
1 /*
2 PV Drivers for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #pragma warning(disable: 4201)
22 #pragma warning(disable: 4214)
24 #define DDKAPI
25 #include <ntddk.h>
26 #include <wdm.h>
27 #define NDIS_MINIPORT_DRIVER
28 #if NTDDI_VERSION < NTDDI_WINXP
29 # define NDIS50_MINIPORT 1
30 #else
31 # define NDIS51_MINIPORT 1
32 #endif
33 #include <ndis.h>
34 #define NTSTRSAFE_LIB
35 #include <ntstrsafe.h>
36 #include <liblfds.h>
38 #define VENDOR_DRIVER_VERSION_MAJOR 0
39 #define VENDOR_DRIVER_VERSION_MINOR 10
41 #define VENDOR_DRIVER_VERSION (((VENDOR_DRIVER_VERSION_MAJOR) << 16) | (VENDOR_DRIVER_VERSION_MINOR))
43 #define __DRIVER_NAME "XenNet"
45 #include <xen_windows.h>
46 #include <memory.h>
47 #include <grant_table.h>
48 #include <event_channel.h>
49 #include <hvm/params.h>
50 #include <hvm/hvm_op.h>
51 #include <xen_public.h>
52 #include <io/ring.h>
53 #include <io/netif.h>
54 #include <io/xenbus.h>
55 #include <stdlib.h>
56 #define XENNET_POOL_TAG (ULONG) 'XenN'
59 /* Xen macros use these, so they need to be redefined to Win equivs */
60 #define wmb() KeMemoryBarrier()
61 #define mb() KeMemoryBarrier()
63 #define GRANT_INVALID_REF 0
65 #define NAME_SIZE 64
67 #define ETH_ALEN 6
69 /*
70 #define __NET_USHORT_BYTE_0(x) ((USHORT)(x & 0xFF))
71 #define __NET_USHORT_BYTE_1(x) ((USHORT)((PUCHAR)&x)[1] & 0xFF)
73 #define GET_NET_USHORT(x) ((__NET_USHORT_BYTE_0(x) << 8) | __NET_USHORT_BYTE_1(x))
74 #define SET_NET_USHORT(y, x) *((USHORT *)&(y)) = ((__NET_USHORT_BYTE_0(x) << 8) | __NET_USHORT_BYTE_1(x))
75 */
77 static FORCEINLINE USHORT
78 GET_NET_USHORT(USHORT data)
79 {
80 return (data << 8) | (data >> 8);
81 }
83 static FORCEINLINE USHORT
84 GET_NET_PUSHORT(PVOID pdata)
85 {
86 return (*((PUSHORT)pdata) << 8) | (*((PUSHORT)pdata) >> 8);
87 }
89 static FORCEINLINE VOID
90 SET_NET_USHORT(PVOID ptr, USHORT data)
91 {
92 *((PUSHORT)ptr) = GET_NET_USHORT(data);
93 }
95 static FORCEINLINE ULONG
96 GET_NET_ULONG(ULONG data)
97 {
98 ULONG tmp;
100 tmp = ((data & 0x00ff00ff) << 8) | ((data & 0xff00ff00) >> 8);
101 return (tmp << 16) | (tmp >> 16);
102 }
104 static FORCEINLINE ULONG
105 GET_NET_PULONG(PVOID pdata)
106 {
107 ULONG tmp;
109 tmp = ((*((PULONG)pdata) & 0x00ff00ff) << 8) | ((*((PULONG)pdata) & 0xff00ff00) >> 8);
110 return (tmp << 16) | (tmp >> 16);
111 }
113 static FORCEINLINE VOID
114 SET_NET_ULONG(PVOID ptr, ULONG data)
115 {
116 *((PULONG)ptr) = GET_NET_ULONG(data);
117 }
118 /*
119 #define GET_NET_ULONG(x) ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
120 #define SET_NET_ULONG(y, x) *((ULONG *)&(y)) = ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
121 */
123 #define SUPPORTED_PACKET_FILTERS (\
124 NDIS_PACKET_TYPE_DIRECTED | \
125 NDIS_PACKET_TYPE_MULTICAST | \
126 NDIS_PACKET_TYPE_BROADCAST | \
127 NDIS_PACKET_TYPE_PROMISCUOUS | \
128 NDIS_PACKET_TYPE_ALL_MULTICAST)
130 /* couldn't get regular xen ring macros to work...*/
131 #define __NET_RING_SIZE(type, _sz) \
132 (__RD32( \
133 (_sz - sizeof(struct type##_sring) + sizeof(union type##_sring_entry)) \
134 / sizeof(union type##_sring_entry)))
136 #define NET_TX_RING_SIZE __NET_RING_SIZE(netif_tx, PAGE_SIZE)
137 #define NET_RX_RING_SIZE __NET_RING_SIZE(netif_rx, PAGE_SIZE)
139 #pragma warning(disable: 4127) // conditional expression is constant
141 #define MIN_LARGE_SEND_SEGMENTS 4
143 /* TODO: crank this up if we support higher mtus? */
144 #define XN_HDR_SIZE 14
145 #define XN_MAX_DATA_SIZE 1500
146 #define XN_MIN_FRAME_SIZE 60
147 #define XN_MAX_FRAME_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
148 /*
149 #if !defined(OFFLOAD_LARGE_SEND)
150 #define XN_MAX_PKT_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
151 #else
152 #define XN_MAX_PKT_SIZE MAX_LARGE_SEND_OFFLOAD
153 #endif
154 */
156 #define XN_MAX_SEND_PKTS 16
158 #define XENSOURCE_MAC_HDR 0x00163E
159 #define XN_VENDOR_DESC "Xensource"
160 #define MAX_XENBUS_STR_LEN 128
162 #define RX_MIN_TARGET 8
163 #define RX_DFL_MIN_TARGET 256
164 #define RX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
166 //#define MAX_BUFFERS_PER_PACKET NET_RX_RING_SIZE
168 #define MIN_ETH_HEADER_LENGTH 14
169 #define MAX_ETH_HEADER_LENGTH 14
170 #define MIN_IP4_HEADER_LENGTH 20
171 #define MAX_IP4_HEADER_LENGTH (15 * 4)
172 #define MIN_TCP_HEADER_LENGTH 20
173 #define MAX_TCP_HEADER_LENGTH (15 * 4)
174 #define MAX_PKT_HEADER_LENGTH (MAX_ETH_HEADER_LENGTH + MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
176 #define MIN_LOOKAHEAD_LENGTH (MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
177 #define MAX_LOOKAHEAD_LENGTH 256
179 #define LINUX_MAX_SG_ELEMENTS 19
181 struct _shared_buffer_t;
183 typedef struct _shared_buffer_t shared_buffer_t;
185 struct _shared_buffer_t
186 {
187 struct netif_rx_response rsp;
188 shared_buffer_t *next;
189 grant_ref_t gref;
190 USHORT offset;
191 PVOID virtual;
192 PNDIS_BUFFER buffer;
193 //USHORT id;
194 volatile LONG ref_count;
195 };
197 typedef struct
198 {
199 PNDIS_PACKET packet; /* only set on the last packet */
200 PVOID *cb;
201 grant_ref_t gref;
202 } tx_shadow_t;
204 typedef struct {
205 PNDIS_BUFFER first_buffer;
206 PNDIS_BUFFER curr_buffer;
207 shared_buffer_t *first_pb;
208 shared_buffer_t *curr_pb;
209 PUCHAR first_buffer_virtual;
210 ULONG mdl_count;
211 ULONG curr_mdl_offset;
212 USHORT mss;
213 NDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
214 BOOLEAN csum_blank;
215 BOOLEAN data_validated;
216 BOOLEAN split_required;
217 UCHAR ip_version;
218 PUCHAR header;
219 ULONG first_buffer_length;
220 ULONG header_length;
221 UCHAR ip_proto;
222 ULONG total_length;
223 USHORT ip4_header_length;
224 USHORT ip4_length;
225 USHORT tcp_header_length;
226 BOOLEAN tcp_has_options;
227 USHORT tcp_length;
228 USHORT tcp_remaining;
229 ULONG tcp_seq;
230 /* anything past here doesn't get cleared automatically by the ClearPacketInfo */
231 UCHAR header_data[MAX_LOOKAHEAD_LENGTH + MAX_ETH_HEADER_LENGTH];
232 } packet_info_t;
234 #define PAGE_LIST_SIZE (max(NET_RX_RING_SIZE, NET_TX_RING_SIZE) * 4)
235 #define MULTICAST_LIST_MAX_SIZE 32
237 struct xennet_info
238 {
239 BOOLEAN inactive;
241 /* Base device vars */
242 PDEVICE_OBJECT pdo;
243 PDEVICE_OBJECT fdo;
244 PDEVICE_OBJECT lower_do;
245 //WDFDEVICE wdf_device;
246 WCHAR dev_desc[NAME_SIZE];
248 /* NDIS-related vars */
249 NDIS_HANDLE adapter_handle;
250 NDIS_MINIPORT_INTERRUPT interrupt;
251 ULONG packet_filter;
252 BOOLEAN connected;
253 BOOLEAN shutting_down;
254 BOOLEAN tx_shutting_down;
255 BOOLEAN rx_shutting_down;
256 uint8_t perm_mac_addr[ETH_ALEN];
257 uint8_t curr_mac_addr[ETH_ALEN];
258 ULONG current_lookahead;
259 NDIS_DEVICE_POWER_STATE new_power_state;
260 NDIS_DEVICE_POWER_STATE power_state;
261 PIO_WORKITEM power_workitem;
263 /* Misc. Xen vars */
264 XENPCI_VECTORS vectors;
265 PXENPCI_DEVICE_STATE device_state;
266 evtchn_port_t event_channel;
267 ULONG state;
268 char backend_path[MAX_XENBUS_STR_LEN];
269 ULONG backend_state;
270 PVOID config_page;
271 UCHAR multicast_list[MULTICAST_LIST_MAX_SIZE][6];
272 ULONG multicast_list_size;
273 KDPC suspend_dpc;
274 PIO_WORKITEM resume_work_item;
275 KSPIN_LOCK resume_lock;
277 /* tx related - protected by tx_lock */
278 KSPIN_LOCK tx_lock;
279 LIST_ENTRY tx_waiting_pkt_list;
280 struct netif_tx_front_ring tx;
281 ULONG tx_ring_free;
282 tx_shadow_t tx_shadows[NET_TX_RING_SIZE];
283 NDIS_HANDLE tx_buffer_pool;
284 #define TX_HEADER_BUFFER_SIZE 512
285 //#define TX_COALESCE_BUFFERS (NET_TX_RING_SIZE >> 2)
286 #define TX_COALESCE_BUFFERS (NET_TX_RING_SIZE)
287 KEVENT tx_idle_event;
288 ULONG tx_outstanding;
289 ULONG tx_id_free;
290 USHORT tx_id_list[NET_TX_RING_SIZE];
291 KDPC tx_dpc;
292 NPAGED_LOOKASIDE_LIST tx_lookaside_list;
294 /* rx_related - protected by rx_lock */
295 KSPIN_LOCK rx_lock;
296 struct netif_rx_front_ring rx;
297 ULONG rx_id_free;
298 packet_info_t *rxpi;
299 KEVENT packet_returned_event;
300 //NDIS_MINIPORT_TIMER rx_timer;
301 KDPC rx_dpc;
302 KTIMER rx_timer;
303 KDPC rx_timer_dpc;
304 NDIS_HANDLE rx_packet_pool;
305 NDIS_HANDLE rx_buffer_pool;
306 volatile LONG rx_pb_free;
307 struct stack_state *rx_pb_stack;
308 shared_buffer_t *rx_ring_pbs[NET_RX_RING_SIZE];
309 NPAGED_LOOKASIDE_LIST rx_lookaside_list;
310 /* Receive-ring batched refills. */
311 ULONG rx_target;
312 ULONG rx_max_target;
313 ULONG rx_min_target;
314 shared_buffer_t *rx_partial_buf;
315 BOOLEAN rx_partial_extra_info_flag ;
316 BOOLEAN rx_partial_more_data_flag;
318 /* how many packets are in the net stack atm */
319 ULONG rx_outstanding;
321 /* config vars from registry */
322 ULONG config_sg;
323 ULONG config_csum;
324 ULONG config_csum_rx_check;
325 ULONG config_gso;
326 ULONG config_mtu;
327 ULONG config_rx_interrupt_moderation;
329 NDIS_TASK_TCP_IP_CHECKSUM setting_csum;
330 ULONG setting_max_offload;
332 /* config stuff calculated from the above */
333 ULONG config_max_pkt_size;
335 /* stats */
336 ULONG64 stat_tx_ok;
337 ULONG64 stat_rx_ok;
338 ULONG64 stat_tx_error;
339 ULONG64 stat_rx_error;
340 ULONG64 stat_rx_no_buffer;
342 } typedef xennet_info_t;
344 VOID DDKAPI
345 XenNet_ReturnPacket(
346 IN NDIS_HANDLE MiniportAdapterContext,
347 IN PNDIS_PACKET Packet
348 );
350 BOOLEAN
351 XenNet_RxInit(xennet_info_t *xi);
353 BOOLEAN
354 XenNet_RxShutdown(xennet_info_t *xi);
356 VOID
357 XenNet_RxResumeStart(xennet_info_t *xi);
359 VOID
360 XenNet_RxResumeEnd(xennet_info_t *xi);
362 VOID
363 XenNet_TxResumeStart(xennet_info_t *xi);
365 VOID
366 XenNet_TxResumeEnd(xennet_info_t *xi);
368 VOID DDKAPI
369 XenNet_SendPackets(
370 IN NDIS_HANDLE MiniportAdapterContext,
371 IN PPNDIS_PACKET PacketArray,
372 IN UINT NumberOfPackets
373 );
375 VOID
376 XenNet_CancelSendPackets(
377 NDIS_HANDLE MiniportAdapterContext,
378 PVOID CancelId);
380 BOOLEAN
381 XenNet_TxInit(xennet_info_t *xi);
383 BOOLEAN
384 XenNet_TxShutdown(xennet_info_t *xi);
386 NDIS_STATUS DDKAPI
387 XenNet_QueryInformation(
388 IN NDIS_HANDLE MiniportAdapterContext,
389 IN NDIS_OID Oid,
390 IN PVOID InformationBuffer,
391 IN ULONG InformationBufferLength,
392 OUT PULONG BytesWritten,
393 OUT PULONG BytesNeeded);
395 NDIS_STATUS DDKAPI
396 XenNet_SetInformation(
397 IN NDIS_HANDLE MiniportAdapterContext,
398 IN NDIS_OID Oid,
399 IN PVOID InformationBuffer,
400 IN ULONG InformationBufferLength,
401 OUT PULONG BytesRead,
402 OUT PULONG BytesNeeded
403 );
405 NDIS_STATUS
406 XenNet_D0Entry(struct xennet_info *xi);
407 NDIS_STATUS
408 XenNet_D0Exit(struct xennet_info *xi);
409 IO_WORKITEM_ROUTINE
410 XenNet_SetPower;
412 /* return values */
413 #define PARSE_OK 0
414 #define PARSE_TOO_SMALL 1 /* first buffer is too small */
415 #define PARSE_UNKNOWN_TYPE 2
417 BOOLEAN
418 XenNet_BuildHeader(packet_info_t *pi, PVOID header, ULONG new_header_size);
419 ULONG
420 XenNet_ParsePacketHeader(packet_info_t *pi, PUCHAR buffer, ULONG min_header_size);
421 BOOLEAN
422 XenNet_FilterAcceptPacket(struct xennet_info *xi,packet_info_t *pi);
424 VOID
425 XenNet_SumIpHeader(
426 PUCHAR header,
427 USHORT ip4_header_length
428 );
430 static __forceinline VOID
431 XenNet_ClearPacketInfo(packet_info_t *pi)
432 {
433 #if 1
434 RtlZeroMemory(pi, sizeof(packet_info_t) - FIELD_OFFSET(packet_info_t, header_data));
435 #else
436 pi->mdl_count = 0;
437 pi->mss = 0;
438 pi->ip4_header_length = 0;
439 pi->tcp_header_length = 0;
440 pi->curr_mdl_index = pi->curr_mdl_offset = 0;
441 pi->extra_info = pi->more_frags = pi->csum_blank =
442 pi->data_validated = pi->split_required = 0;
443 #endif
444 }
446 /* Get some data from the current packet, but don't cross a page boundry. */
447 static __forceinline ULONG
448 XenNet_QueryData(packet_info_t *pi, ULONG length)
449 {
450 ULONG offset_in_page;
452 if (length > MmGetMdlByteCount(pi->curr_buffer) - pi->curr_mdl_offset)
453 length = MmGetMdlByteCount(pi->curr_buffer) - pi->curr_mdl_offset;
455 offset_in_page = (MmGetMdlByteOffset(pi->curr_buffer) + pi->curr_mdl_offset) & (PAGE_SIZE - 1);
456 if (offset_in_page + length > PAGE_SIZE)
457 length = PAGE_SIZE - offset_in_page;
459 return length;
460 }
462 /* Move the pointers forward by the given amount. No error checking is done. */
463 static __forceinline VOID
464 XenNet_EatData(packet_info_t *pi, ULONG length)
465 {
466 pi->curr_mdl_offset += length;
467 if (pi->curr_mdl_offset >= MmGetMdlByteCount(pi->curr_buffer))
468 {
469 pi->curr_mdl_offset -= MmGetMdlByteCount(pi->curr_buffer);
470 NdisGetNextBuffer(pi->curr_buffer, &pi->curr_buffer);
471 }
472 }