win-pvdrivers

view xennet/xennet.h @ 1070:05ece536b204

Fix LSO bug on FIN packets. Add RxCoalesce option (default on) to work around Cisco VPN issues
author James Harper <james.harper@bendigoit.com.au>
date Wed Nov 13 07:56:13 2013 +1100 (2013-11-13)
parents 2ef536c2d9fe
children 27bd2a5a4704
line source
1 /*
2 PV Drivers for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #pragma warning(disable: 4201)
22 #pragma warning(disable: 4214)
24 #include <ntddk.h>
25 #include <wdm.h>
26 #define NDIS_MINIPORT_DRIVER 1
27 #if NTDDI_VERSION < NTDDI_WINXP
28 # define NDIS50_MINIPORT 1
29 #elif NTDDI_VERSION < NTDDI_VISTA
30 # define NDIS51_MINIPORT 1
31 #else
32 # define NDIS61_MINIPORT 1
33 #endif
34 #include <ndis.h>
35 #define NTSTRSAFE_LIB
36 #include <ntstrsafe.h>
37 #include <liblfds.h>
39 #define VENDOR_DRIVER_VERSION_MAJOR 0
40 #define VENDOR_DRIVER_VERSION_MINOR 11
42 #define MAX_LINK_SPEED 10000000000L /* there is not really any theoretical maximum... */
44 #define VENDOR_DRIVER_VERSION (((VENDOR_DRIVER_VERSION_MAJOR) << 16) | (VENDOR_DRIVER_VERSION_MINOR))
46 #define __DRIVER_NAME "XenNet"
48 //#define PACKET_NEXT_PACKET_FIELD MiniportReservedEx[sizeof(PVOID)] // RX & TX
49 //#define PACKET_FIRST_PB_FIELD MiniportReservedEx[0] // RX
50 #define PACKET_NEXT_PACKET_FIELD MiniportReservedEx[0] // RX & TX
51 #define PACKET_FIRST_PB_FIELD MiniportReservedEx[sizeof(PVOID)] // RX
52 #define PACKET_LIST_ENTRY_FIELD MiniportReservedEx[sizeof(PVOID)] // TX (2 entries)
53 #define PACKET_NEXT_PACKET(_packet) (*(PNDIS_PACKET *)&(_packet)->PACKET_NEXT_PACKET_FIELD)
54 #define PACKET_LIST_ENTRY(_packet) (*(PLIST_ENTRY)&(_packet)->PACKET_LIST_ENTRY_FIELD)
55 #define PACKET_FIRST_PB(_packet) (*(shared_buffer_t **)&(_packet)->PACKET_FIRST_PB_FIELD)
57 #define NB_LIST_ENTRY_FIELD MiniportReserved[0] // TX (2 entries)
58 #define NB_FIRST_PB_FIELD MiniportReserved[0] // RX
59 #define NB_NBL_FIELD MiniportReserved[2] // TX
60 #define NB_LIST_ENTRY(_nb) (*(PLIST_ENTRY)&(_nb)->NB_LIST_ENTRY_FIELD)
61 #define NB_NBL(_nb) (*(PNET_BUFFER_LIST *)&(_nb)->NB_NBL_FIELD)
62 #define NB_FIRST_PB(_nb) (*(shared_buffer_t **)&(_nb)->NB_FIRST_PB_FIELD)
64 #define NBL_REF_FIELD MiniportReserved[0] // TX
65 #define NBL_REF(_nbl) (*(ULONG_PTR *)&(_nbl)->NBL_REF_FIELD)
67 #define NDIS_STATUS_RESOURCES_MAX_LENGTH 64
69 #include <xen_windows.h>
70 #include <memory.h>
71 #include <grant_table.h>
72 #include <event_channel.h>
73 #include <hvm/params.h>
74 #include <hvm/hvm_op.h>
75 #include <xen_public.h>
76 #include <io/ring.h>
77 #include <io/netif.h>
78 #include <io/xenbus.h>
79 #include <stdlib.h>
80 #define XENNET_POOL_TAG (ULONG) 'XenN'
82 /* Xen macros use these, so they need to be redefined to Win equivs */
83 #define wmb() KeMemoryBarrier()
84 #define mb() KeMemoryBarrier()
86 #define GRANT_INVALID_REF 0
88 #define NAME_SIZE 64
90 #define ETH_ALEN 6
92 static FORCEINLINE USHORT
93 GET_NET_USHORT(USHORT data) {
94 return (data << 8) | (data >> 8);
95 }
97 static FORCEINLINE USHORT
98 GET_NET_PUSHORT(PVOID pdata) {
99 return (*((PUSHORT)pdata) << 8) | (*((PUSHORT)pdata) >> 8);
100 }
102 static FORCEINLINE VOID
103 SET_NET_USHORT(PVOID ptr, USHORT data) {
104 *((PUSHORT)ptr) = GET_NET_USHORT(data);
105 }
107 static FORCEINLINE ULONG
108 GET_NET_ULONG(ULONG data) {
109 ULONG tmp;
111 tmp = ((data & 0x00ff00ff) << 8) | ((data & 0xff00ff00) >> 8);
112 return (tmp << 16) | (tmp >> 16);
113 }
115 static FORCEINLINE ULONG
116 GET_NET_PULONG(PVOID pdata) {
117 ULONG tmp;
119 tmp = ((*((PULONG)pdata) & 0x00ff00ff) << 8) | ((*((PULONG)pdata) & 0xff00ff00) >> 8);
120 return (tmp << 16) | (tmp >> 16);
121 }
123 static FORCEINLINE VOID
124 SET_NET_ULONG(PVOID ptr, ULONG data) {
125 *((PULONG)ptr) = GET_NET_ULONG(data);
126 }
127 /*
128 #define GET_NET_ULONG(x) ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
129 #define SET_NET_ULONG(y, x) *((ULONG *)&(y)) = ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
130 */
132 #define SUPPORTED_PACKET_FILTERS (\
133 NDIS_PACKET_TYPE_DIRECTED | \
134 NDIS_PACKET_TYPE_MULTICAST | \
135 NDIS_PACKET_TYPE_BROADCAST | \
136 NDIS_PACKET_TYPE_PROMISCUOUS | \
137 NDIS_PACKET_TYPE_ALL_MULTICAST)
139 /* couldn't get regular xen ring macros to work...*/
140 #define __NET_RING_SIZE(type, _sz) \
141 (__RD32( \
142 (_sz - sizeof(struct type##_sring) + sizeof(union type##_sring_entry)) \
143 / sizeof(union type##_sring_entry)))
145 #define NET_TX_RING_SIZE __NET_RING_SIZE(netif_tx, PAGE_SIZE)
146 #define NET_RX_RING_SIZE __NET_RING_SIZE(netif_rx, PAGE_SIZE)
148 #pragma warning(disable: 4127) // conditional expression is constant
150 #define MIN_LARGE_SEND_SEGMENTS 4
152 /* TODO: crank this up if we support higher mtus? */
153 #define XN_HDR_SIZE 14
154 #define XN_MAX_DATA_SIZE 1500
155 #define XN_MIN_FRAME_SIZE 60
156 #define XN_MAX_FRAME_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
157 /*
158 #if !defined(OFFLOAD_LARGE_SEND)
159 #define XN_MAX_PKT_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
160 #else
161 #define XN_MAX_PKT_SIZE MAX_LARGE_SEND_OFFLOAD
162 #endif
163 */
165 #define XN_MAX_SEND_PKTS 16
167 #define XENSOURCE_MAC_HDR 0x00163E
168 #define XN_VENDOR_DESC "Xensource"
169 #define MAX_XENBUS_STR_LEN 128
171 //#define RX_MIN_TARGET 8
172 #define RX_DEFAULT_TARGET 256
173 //#define RX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
174 #define RX_MAX_PB_FREELIST (NET_RX_RING_SIZE * 4)
175 #define RX_PACKET_MAX (NET_RX_RING_SIZE * 4)
176 #define RX_PACKET_HIGH_WATER_MARK (RX_PACKET_MAX * 3 / 4)
178 //#define MAX_BUFFERS_PER_PACKET NET_RX_RING_SIZE
180 #define MIN_ETH_HEADER_LENGTH 14
181 #define MAX_ETH_HEADER_LENGTH 14
182 #define MIN_IP4_HEADER_LENGTH 20
183 #define MAX_IP4_HEADER_LENGTH (15 * 4)
184 #define MIN_TCP_HEADER_LENGTH 20
185 #define MAX_TCP_HEADER_LENGTH (15 * 4)
186 #define MAX_PKT_HEADER_LENGTH (MAX_ETH_HEADER_LENGTH + MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
188 #define MIN_LOOKAHEAD_LENGTH (MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
189 //#define MAX_LOOKAHEAD_LENGTH PAGE_SIZE
190 /* optimise the size of header buffers */
191 #define MAX_LOOKAHEAD_LENGTH (512 - sizeof(shared_buffer_t) - MAX_ETH_HEADER_LENGTH)
193 #define LINUX_MAX_SG_ELEMENTS 18
195 #define PAGE_LIST_SIZE (max(NET_RX_RING_SIZE, NET_TX_RING_SIZE) * 4)
196 #define MULTICAST_LIST_MAX_SIZE 32
198 #define TX_HEADER_BUFFER_SIZE 512
199 #define TX_COALESCE_BUFFERS (NET_TX_RING_SIZE)
201 /* split incoming large packets into MSS sized chunks */
202 #define RX_LSO_SPLIT_MSS 0
203 /* split incoming large packets in half, to not invoke the delayed ack timer */
204 #define RX_LSO_SPLIT_HALF 1
205 /* don't split incoming large packets. not really useful */
206 #define RX_LSO_SPLIT_NONE 2
208 #define DEVICE_STATE_DISCONNECTED 0 /* -> INITIALISING */
209 #define DEVICE_STATE_INITIALISING 1 /* -> ACTIVE or INACTIVE */
210 #define DEVICE_STATE_INACTIVE 2
211 #define DEVICE_STATE_ACTIVE 3 /* -> DISCONNECTING */
212 #define DEVICE_STATE_DISCONNECTING 4 /* -> DISCONNECTED */
214 struct _shared_buffer_t;
216 typedef struct _shared_buffer_t shared_buffer_t;
218 struct _shared_buffer_t {
219 struct netif_rx_response rsp;
220 shared_buffer_t *next;
221 grant_ref_t gref;
222 //USHORT offset;
223 PVOID virtual;
224 PMDL mdl;
225 //USHORT id;
226 volatile LONG ref_count;
227 };
229 typedef struct {
230 #if NTDDI_VERSION < NTDDI_VISTA
231 PNDIS_PACKET packet; /* only set on the last packet */
232 #else
233 PNET_BUFFER packet; /* only set on the last packet */
234 #endif
235 PVOID *cb;
236 grant_ref_t gref;
237 } tx_shadow_t;
239 typedef struct {
240 ULONG parse_result;
241 PMDL first_mdl;
242 MDL first_mdl_storage;
243 PPFN_NUMBER first_mdl_pfns[17]; /* maximum possible packet size */
244 PMDL curr_mdl;
245 shared_buffer_t *first_pb;
246 shared_buffer_t *curr_pb;
247 PUCHAR first_mdl_virtual;
248 //ULONG mdl_count;
249 ULONG first_mdl_offset;
250 ULONG first_mdl_length;
251 ULONG curr_mdl_offset;
252 USHORT mss;
253 //NDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
254 BOOLEAN csum_blank;
255 BOOLEAN data_validated;
256 BOOLEAN split_required;
257 UCHAR ip_version;
258 PUCHAR header;
259 ULONG header_length;
260 UCHAR ip_proto;
261 BOOLEAN ip_has_options;
262 ULONG total_length;
263 USHORT ip4_header_length;
264 USHORT ip4_length;
265 USHORT tcp_header_length;
266 BOOLEAN tcp_has_options;
267 USHORT tcp_length;
268 USHORT tcp_remaining;
269 ULONG tcp_seq;
270 BOOLEAN is_multicast;
271 BOOLEAN is_broadcast;
272 /* anything past here doesn't get cleared automatically by the ClearPacketInfo */
273 UCHAR header_data[MAX_LOOKAHEAD_LENGTH + MAX_ETH_HEADER_LENGTH];
274 } packet_info_t;
276 struct xennet_info
277 {
278 ULONG device_state;
280 /* Base device vars */
281 PDEVICE_OBJECT pdo;
282 PDEVICE_OBJECT fdo;
283 PDEVICE_OBJECT lower_do;
284 // WCHAR dev_desc[NAME_SIZE];
286 /* NDIS-related vars */
287 NDIS_HANDLE adapter_handle;
288 ULONG packet_filter;
289 uint8_t perm_mac_addr[ETH_ALEN];
290 uint8_t curr_mac_addr[ETH_ALEN];
291 ULONG current_lookahead;
293 /* Misc. Xen vars */
294 XN_HANDLE handle;
296 evtchn_port_t event_channel;
297 ULONG backend_state;
298 KEVENT backend_event;
299 UCHAR multicast_list[MULTICAST_LIST_MAX_SIZE][6];
300 ULONG multicast_list_size;
301 KDPC rxtx_dpc;
303 /* tx related - protected by tx_lock */
304 KSPIN_LOCK tx_lock; /* always acquire rx_lock before tx_lock */
305 LIST_ENTRY tx_waiting_pkt_list;
306 netif_tx_sring_t *tx_sring;
307 grant_ref_t tx_sring_gref;
308 struct netif_tx_front_ring tx_ring;
309 ULONG tx_ring_free;
310 tx_shadow_t tx_shadows[NET_TX_RING_SIZE];
311 ULONG tx_outstanding;
312 ULONG tx_id_free;
313 USHORT tx_id_list[NET_TX_RING_SIZE];
314 NPAGED_LOOKASIDE_LIST tx_lookaside_list;
315 KEVENT tx_idle_event;
317 /* rx_related - protected by rx_lock */
318 KSPIN_LOCK rx_lock; /* always acquire rx_lock before tx_lock */
319 netif_rx_sring_t *rx_sring;
320 grant_ref_t rx_sring_gref;
321 struct netif_rx_front_ring rx_ring;
322 ULONG rx_id_free;
323 packet_info_t *rxpi;
324 #if NTDDI_VERSION < NTDDI_VISTA
325 #else
326 NDIS_HANDLE rx_nbl_pool;
327 #endif
328 NDIS_HANDLE rx_packet_pool;
329 volatile LONG rx_pb_free;
330 struct stack_state *rx_pb_stack;
331 volatile LONG rx_hb_free;
332 struct stack_state *rx_hb_stack;
333 shared_buffer_t *rx_ring_pbs[NET_RX_RING_SIZE];
334 /* Receive-ring batched refills. */
335 ULONG rx_target;
336 ULONG rx_max_target;
337 ULONG rx_min_target;
338 shared_buffer_t *rx_partial_buf;
339 BOOLEAN rx_partial_extra_info_flag ;
340 BOOLEAN rx_partial_more_data_flag;
341 KEVENT rx_idle_event;
342 /* how many packets are in the net stack atm */
343 LONG rx_outstanding;
346 /* config vars from registry */
347 /* the frontend_* indicate our willingness to support */
348 BOOLEAN frontend_sg_supported;
349 BOOLEAN frontend_csum_supported;
350 ULONG frontend_gso_value;
351 ULONG frontend_mtu_value;
352 ULONG frontend_gso_rx_split_type; /* RX_LSO_SPLIT_* */
354 BOOLEAN backend_sg_supported;
355 BOOLEAN backend_csum_supported;
356 ULONG backend_gso_value;
358 BOOLEAN current_sg_supported;
359 BOOLEAN current_csum_supported;
360 ULONG current_gso_value;
361 ULONG current_mtu_value;
362 ULONG current_gso_rx_split_type;
364 BOOLEAN config_csum_rx_check;
365 BOOLEAN config_csum_rx_dont_fix;
366 BOOLEAN config_rx_coalesce;
368 #if NTDDI_VERSION < NTDDI_VISTA
369 NDIS_TASK_TCP_IP_CHECKSUM setting_csum;
370 #else
371 #endif
373 /* config stuff calculated from the above */
374 ULONG config_max_pkt_size;
376 /* stats */
377 #if NTDDI_VERSION < NTDDI_VISTA
378 ULONG64 stat_tx_ok;
379 ULONG64 stat_rx_ok;
380 ULONG64 stat_tx_error;
381 ULONG64 stat_rx_error;
382 ULONG64 stat_rx_no_buffer;
383 #else
384 NDIS_STATISTICS_INFO stats;
385 #endif
387 } typedef xennet_info_t;
389 extern USHORT ndis_os_major_version;
390 extern USHORT ndis_os_minor_version;
392 typedef NDIS_STATUS (*XEN_OID_REQUEST)(NDIS_HANDLE context, PVOID information_buffer, ULONG information_buffer_length, PULONG bytes_read, PULONG bytes_needed);
394 struct xennet_oids_t {
395 ULONG oid;
396 char *oid_name;
397 ULONG min_length;
398 XEN_OID_REQUEST query_routine;
399 XEN_OID_REQUEST set_routine;
400 };
402 extern struct xennet_oids_t xennet_oids[];
404 #if NTDDI_VERSION < NTDDI_VISTA
405 NDIS_STATUS
406 XenNet_QueryInformation(
407 IN NDIS_HANDLE MiniportAdapterContext,
408 IN NDIS_OID Oid,
409 IN PVOID InformationBuffer,
410 IN ULONG InformationBufferLength,
411 OUT PULONG BytesWritten,
412 OUT PULONG BytesNeeded);
414 NDIS_STATUS
415 XenNet_SetInformation(
416 IN NDIS_HANDLE MiniportAdapterContext,
417 IN NDIS_OID Oid,
418 IN PVOID InformationBuffer,
419 IN ULONG InformationBufferLength,
420 OUT PULONG BytesRead,
421 OUT PULONG BytesNeeded
422 );
424 VOID
425 XenNet_SendPackets(
426 IN NDIS_HANDLE MiniportAdapterContext,
427 IN PPNDIS_PACKET PacketArray,
428 IN UINT NumberOfPackets
429 );
431 VOID
432 XenNet_ReturnPacket(
433 IN NDIS_HANDLE MiniportAdapterContext,
434 IN PNDIS_PACKET Packet
435 );
436 #else
438 MINIPORT_OID_REQUEST XenNet_OidRequest;
439 MINIPORT_CANCEL_OID_REQUEST XenNet_CancelOidRequest;
441 MINIPORT_SEND_NET_BUFFER_LISTS XenNet_SendNetBufferLists;
442 MINIPORT_CANCEL_SEND XenNet_CancelSend;
444 MINIPORT_RETURN_NET_BUFFER_LISTS XenNet_ReturnNetBufferLists;
445 #endif
447 NTSTATUS XenNet_Connect(PVOID context, BOOLEAN suspend);
448 NTSTATUS XenNet_Disconnect(PVOID context, BOOLEAN suspend);
449 VOID XenNet_DeviceCallback(PVOID context, ULONG callback_type, PVOID value);
452 BOOLEAN XenNet_RxInit(xennet_info_t *xi);
453 VOID XenNet_RxShutdown(xennet_info_t *xi);
454 BOOLEAN XenNet_RxBufferCheck(struct xennet_info *xi);
456 BOOLEAN XenNet_TxInit(xennet_info_t *xi);
457 BOOLEAN XenNet_TxShutdown(xennet_info_t *xi);
458 VOID XenNet_TxBufferGC(struct xennet_info *xi, BOOLEAN dont_set_event);
461 /* return values */
462 #define PARSE_OK 0
463 #define PARSE_TOO_SMALL 1 /* first buffer is too small */
464 #define PARSE_UNKNOWN_TYPE 2
466 BOOLEAN XenNet_BuildHeader(packet_info_t *pi, PVOID header, ULONG new_header_size);
467 VOID XenNet_ParsePacketHeader(packet_info_t *pi, PUCHAR buffer, ULONG min_header_size);
468 BOOLEAN XenNet_FilterAcceptPacket(struct xennet_info *xi, packet_info_t *pi);
470 BOOLEAN XenNet_CheckIpHeaderSum(PUCHAR header, USHORT ip4_header_length);
471 VOID XenNet_SumIpHeader(PUCHAR header, USHORT ip4_header_length);
473 static __forceinline VOID
474 XenNet_ClearPacketInfo(packet_info_t *pi) {
475 RtlZeroMemory(pi, sizeof(packet_info_t) - FIELD_OFFSET(packet_info_t, header_data));
476 }
478 /* Get some data from the current packet, but don't cross a page boundry. */
479 static __forceinline ULONG
480 XenNet_QueryData(packet_info_t *pi, ULONG length) {
481 ULONG offset_in_page;
483 if (length > MmGetMdlByteCount(pi->curr_mdl) - pi->curr_mdl_offset)
484 length = MmGetMdlByteCount(pi->curr_mdl) - pi->curr_mdl_offset;
486 offset_in_page = (MmGetMdlByteOffset(pi->curr_mdl) + pi->curr_mdl_offset) & (PAGE_SIZE - 1);
487 if (offset_in_page + length > PAGE_SIZE)
488 length = PAGE_SIZE - offset_in_page;
490 return length;
491 }
493 /* Move the pointers forward by the given amount. No error checking is done. */
494 static __forceinline VOID
495 XenNet_EatData(packet_info_t *pi, ULONG length) {
496 pi->curr_mdl_offset += length;
497 if (pi->curr_mdl_offset >= MmGetMdlByteCount(pi->curr_mdl)) {
498 pi->curr_mdl_offset -= MmGetMdlByteCount(pi->curr_mdl);
499 #if NTDDI_VERSION < NTDDI_VISTA
500 NdisGetNextBuffer(pi->curr_mdl, &pi->curr_mdl);
501 #else
502 NdisGetNextMdl(pi->curr_mdl, &pi->curr_mdl);
503 #endif
504 }
505 }