win-pvdrivers

view xennet/xennet6.h @ 950:916ea40186fc

Attempted fix for memory hogging in xennet
author James Harper <james.harper@bendigoit.com.au>
date Thu Sep 22 23:47:46 2011 +1000 (2011-09-22)
parents 81f132396f9e
children 278b479f3f7d
line source
1 /*
2 PV Drivers for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #pragma warning(disable: 4201)
22 #pragma warning(disable: 4214)
24 #include <ntddk.h>
25 #include <wdm.h>
26 #define NDIS_MINIPORT_DRIVER 1
27 #define NDIS60_MINIPORT 1
28 #define NDIS_SUPPORT_NDIS6 1
29 #include <ndis.h>
30 #define NTSTRSAFE_LIB
31 #include <ntstrsafe.h>
32 #include <liblfds.h>
34 #define VENDOR_DRIVER_VERSION_MAJOR 0
35 #define VENDOR_DRIVER_VERSION_MINOR 10
37 #define MAX_LINK_SPEED 10000000000L; /* there is not really any theoretical maximum... */
39 #define VENDOR_DRIVER_VERSION (((VENDOR_DRIVER_VERSION_MAJOR) << 16) | (VENDOR_DRIVER_VERSION_MINOR))
41 #define __DRIVER_NAME "XenNet"
43 #define NB_LIST_ENTRY_FIELD MiniportReserved[0] // TX (2 entries)
44 #define NB_HEADER_BUF_FIELD MiniportReserved[0] // RX
45 #define NB_NBL_FIELD MiniportReserved[2] // TX
46 #define NB_LIST_ENTRY(_nb) (*(PLIST_ENTRY)&(_nb)->NB_LIST_ENTRY_FIELD)
47 #define NB_NBL(_nb) (*(PNET_BUFFER_LIST *)&(_nb)->NB_NBL_FIELD)
48 #define NB_HEADER_BUF(_nb) (*(shared_buffer_t **)&(_nb)->NB_HEADER_BUF_FIELD)
50 #define NBL_REF_FIELD MiniportReserved[0] // TX
51 //#define NBL_LIST_ENTRY_FIELD MiniportReserved[0] // TX (2 entries) - overlaps with REF_FIELD
52 //#define NBL_PACKET_COUNT_FIELD MiniportReserved[0] // RX
53 //#define NBL_LAST_NB_FIELD MiniportReserved[1] // RX
54 #define NBL_REF(_nbl) (*(ULONG_PTR *)&(_nbl)->NBL_REF_FIELD)
55 //#define NBL_LIST_ENTRY(_nbl) (*(PLIST_ENTRY)&(_nbl)->NBL_LIST_ENTRY_FIELD)
56 //#define NBL_PACKET_COUNT(_nbl) (*(ULONG_PTR *)&(_nbl)->NBL_PACKET_COUNT_FIELD)
57 //#define NBL_LAST_NB(_nbl) (*(PNET_BUFFER *)&(_nbl)->NBL_LAST_NB_FIELD)
59 #include <xen_windows.h>
60 #include <memory.h>
61 #include <grant_table.h>
62 #include <event_channel.h>
63 #include <hvm/params.h>
64 #include <hvm/hvm_op.h>
65 #include <xen_public.h>
66 #include <io/ring.h>
67 #include <io/netif.h>
68 #include <io/xenbus.h>
69 #include <stdlib.h>
70 #define XENNET_POOL_TAG (ULONG) 'XenN'
72 /* Xen macros use these, so they need to be redefined to Win equivs */
73 #define wmb() KeMemoryBarrier()
74 #define mb() KeMemoryBarrier()
76 #define GRANT_INVALID_REF 0
78 #define NAME_SIZE 64
80 #define ETH_ALEN 6
82 /*
83 #define __NET_USHORT_BYTE_0(x) ((USHORT)(x & 0xFF))
84 #define __NET_USHORT_BYTE_1(x) ((USHORT)((PUCHAR)&x)[1] & 0xFF)
86 #define GET_NET_USHORT(x) ((__NET_USHORT_BYTE_0(x) << 8) | __NET_USHORT_BYTE_1(x))
87 #define SET_NET_USHORT(y, x) *((USHORT *)&(y)) = ((__NET_USHORT_BYTE_0(x) << 8) | __NET_USHORT_BYTE_1(x))
88 */
90 static FORCEINLINE USHORT
91 GET_NET_USHORT(USHORT data)
92 {
93 return (data << 8) | (data >> 8);
94 }
96 static FORCEINLINE USHORT
97 GET_NET_PUSHORT(PVOID pdata)
98 {
99 return (*((PUSHORT)pdata) << 8) | (*((PUSHORT)pdata) >> 8);
100 }
102 static FORCEINLINE VOID
103 SET_NET_USHORT(PVOID ptr, USHORT data)
104 {
105 *((PUSHORT)ptr) = GET_NET_USHORT(data);
106 }
108 static FORCEINLINE ULONG
109 GET_NET_ULONG(ULONG data)
110 {
111 ULONG tmp;
113 tmp = ((data & 0x00ff00ff) << 8) | ((data & 0xff00ff00) >> 8);
114 return (tmp << 16) | (tmp >> 16);
115 }
117 static FORCEINLINE ULONG
118 GET_NET_PULONG(PVOID pdata)
119 {
120 ULONG tmp;
122 tmp = ((*((PULONG)pdata) & 0x00ff00ff) << 8) | ((*((PULONG)pdata) & 0xff00ff00) >> 8);
123 return (tmp << 16) | (tmp >> 16);
124 }
126 static FORCEINLINE VOID
127 SET_NET_ULONG(PVOID ptr, ULONG data)
128 {
129 *((PULONG)ptr) = GET_NET_ULONG(data);
130 }
131 /*
132 #define GET_NET_ULONG(x) ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
133 #define SET_NET_ULONG(y, x) *((ULONG *)&(y)) = ((GET_NET_USHORT(x) << 16) | GET_NET_USHORT(((PUCHAR)&x)[2]))
134 */
136 #define SUPPORTED_PACKET_FILTERS (\
137 NDIS_PACKET_TYPE_DIRECTED | \
138 NDIS_PACKET_TYPE_MULTICAST | \
139 NDIS_PACKET_TYPE_BROADCAST | \
140 NDIS_PACKET_TYPE_PROMISCUOUS | \
141 NDIS_PACKET_TYPE_ALL_MULTICAST)
143 /* couldn't get regular xen ring macros to work...*/
144 #define __NET_RING_SIZE(type, _sz) \
145 (__RD32( \
146 (_sz - sizeof(struct type##_sring) + sizeof(union type##_sring_entry)) \
147 / sizeof(union type##_sring_entry)))
149 #define NET_TX_RING_SIZE __NET_RING_SIZE(netif_tx, PAGE_SIZE)
150 #define NET_RX_RING_SIZE __NET_RING_SIZE(netif_rx, PAGE_SIZE)
152 #pragma warning(disable: 4127) // conditional expression is constant
154 #define MIN_LARGE_SEND_SEGMENTS 4
156 /* TODO: crank this up if we support higher mtus? */
157 #define XN_HDR_SIZE 14
158 #define XN_MAX_DATA_SIZE 1500
159 #define XN_MIN_FRAME_SIZE 60
160 #define XN_MAX_FRAME_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
161 /*
162 #if !defined(OFFLOAD_LARGE_SEND)
163 #define XN_MAX_PKT_SIZE (XN_HDR_SIZE + XN_DATA_SIZE)
164 #else
165 #define XN_MAX_PKT_SIZE MAX_LARGE_SEND_OFFLOAD
166 #endif
167 */
169 #define XN_MAX_SEND_PKTS 16
171 #define XENSOURCE_MAC_HDR 0x00163E
172 #define XN_VENDOR_DESC "Xensource"
173 #define MAX_XENBUS_STR_LEN 128
175 #define RX_MIN_TARGET 8
176 #define RX_DFL_MIN_TARGET 256
177 #define RX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
178 #define RX_MAX_PB_FREELIST (RX_MAX_TARGET * 4)
180 //#define MAX_BUFFERS_PER_PACKET NET_RX_RING_SIZE
182 #define MIN_ETH_HEADER_LENGTH 14
183 #define MAX_ETH_HEADER_LENGTH 14
184 #define MIN_IP4_HEADER_LENGTH 20
185 #define MAX_IP4_HEADER_LENGTH (15 * 4)
186 #define MIN_TCP_HEADER_LENGTH 20
187 #define MAX_TCP_HEADER_LENGTH (15 * 4)
188 #define MAX_PKT_HEADER_LENGTH (MAX_ETH_HEADER_LENGTH + MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
190 #define MIN_LOOKAHEAD_LENGTH (MAX_IP4_HEADER_LENGTH + MAX_TCP_HEADER_LENGTH)
191 #define MAX_LOOKAHEAD_LENGTH PAGE_SIZE /* don't know if this is a good idea - was 256*/
193 #define LINUX_MAX_SG_ELEMENTS 19
195 struct _shared_buffer_t;
197 typedef struct _shared_buffer_t shared_buffer_t;
199 struct _shared_buffer_t
200 {
201 struct netif_rx_response rsp;
202 shared_buffer_t *next;
203 grant_ref_t gref;
204 //USHORT offset;
205 PVOID virtual;
206 PMDL mdl;
207 //USHORT id;
208 volatile LONG ref_count;
209 };
211 typedef struct
212 {
213 PNET_BUFFER nb; /* only set on the last packet */
214 PVOID *cb;
215 grant_ref_t gref;
216 } tx_shadow_t;
218 typedef struct {
219 ULONG parse_result;
220 PMDL first_mdl;
221 MDL first_mdl_storage;
222 PPFN_NUMBER first_mdl_pfns[17]; /* maximum possible packet size */
223 PMDL curr_mdl;
224 shared_buffer_t *first_pb;
225 shared_buffer_t *curr_pb;
226 PUCHAR first_mdl_virtual;
227 //ULONG mdl_count;
228 ULONG first_mdl_offset;
229 ULONG first_mdl_length;
230 ULONG curr_mdl_offset;
231 USHORT mss;
232 //NDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
233 BOOLEAN csum_blank;
234 BOOLEAN data_validated;
235 BOOLEAN split_required;
236 UCHAR ip_version;
237 PUCHAR header;
238 ULONG header_length;
239 UCHAR ip_proto;
240 ULONG total_length;
241 USHORT ip4_header_length;
242 USHORT ip4_length;
243 USHORT tcp_header_length;
244 BOOLEAN tcp_has_options;
245 USHORT tcp_length;
246 USHORT tcp_remaining;
247 ULONG tcp_seq;
248 BOOLEAN is_multicast;
249 BOOLEAN is_broadcast;
250 /* anything past here doesn't get cleared automatically by the ClearPacketInfo */
251 UCHAR header_data[MAX_LOOKAHEAD_LENGTH + MAX_ETH_HEADER_LENGTH];
252 } packet_info_t;
254 #define PAGE_LIST_SIZE (max(NET_RX_RING_SIZE, NET_TX_RING_SIZE) * 4)
255 #define MULTICAST_LIST_MAX_SIZE 32
257 /* split incoming large packets into MSS sized chunks */
258 #define RX_LSO_SPLIT_MSS 0
259 /* split incoming large packets in half, to not invoke the delayed ack timer */
260 #define RX_LSO_SPLIT_HALF 1
261 /* don't split incoming large packets. not really useful */
262 #define RX_LSO_SPLIT_NONE 2
264 struct xennet_info
265 {
266 BOOLEAN inactive;
268 /* Base device vars */
269 PDEVICE_OBJECT pdo;
270 PDEVICE_OBJECT fdo;
271 PDEVICE_OBJECT lower_do;
272 //WDFDEVICE wdf_device;
273 WCHAR dev_desc[NAME_SIZE];
275 /* NDIS-related vars */
276 NDIS_HANDLE adapter_handle;
277 ULONG packet_filter;
278 BOOLEAN connected;
279 BOOLEAN shutting_down;
280 BOOLEAN tx_shutting_down;
281 BOOLEAN rx_shutting_down;
282 uint8_t perm_mac_addr[ETH_ALEN];
283 uint8_t curr_mac_addr[ETH_ALEN];
284 ULONG current_lookahead;
285 NDIS_DEVICE_POWER_STATE new_power_state;
286 NDIS_DEVICE_POWER_STATE power_state;
287 PIO_WORKITEM power_workitem;
289 /* Misc. Xen vars */
290 XENPCI_VECTORS vectors;
291 PXENPCI_DEVICE_STATE device_state;
292 evtchn_port_t event_channel;
293 ULONG state;
294 char backend_path[MAX_XENBUS_STR_LEN];
295 ULONG backend_state;
296 PVOID config_page;
297 UCHAR multicast_list[MULTICAST_LIST_MAX_SIZE][6];
298 ULONG multicast_list_size;
299 KDPC suspend_dpc;
300 PIO_WORKITEM resume_work_item;
301 KSPIN_LOCK resume_lock;
302 KDPC rxtx_dpc;
304 /* tx related - protected by tx_lock */
305 KSPIN_LOCK tx_lock;
306 LIST_ENTRY tx_waiting_pkt_list;
307 struct netif_tx_front_ring tx;
308 ULONG tx_ring_free;
309 tx_shadow_t tx_shadows[NET_TX_RING_SIZE];
310 //NDIS_HANDLE tx_buffer_pool;
311 #define TX_HEADER_BUFFER_SIZE 512
312 //#define TX_COALESCE_BUFFERS (NET_TX_RING_SIZE >> 2)
313 #define TX_COALESCE_BUFFERS (NET_TX_RING_SIZE)
314 KEVENT tx_idle_event;
315 ULONG tx_outstanding;
316 ULONG tx_id_free;
317 USHORT tx_id_list[NET_TX_RING_SIZE];
318 NPAGED_LOOKASIDE_LIST tx_lookaside_list;
320 /* rx_related - protected by rx_lock */
321 KSPIN_LOCK rx_lock;
322 struct netif_rx_front_ring rx;
323 ULONG rx_id_free;
324 packet_info_t *rxpi;
325 KEVENT packet_returned_event;
326 NDIS_HANDLE rx_nbl_pool;
327 NDIS_HANDLE rx_nb_pool;
328 volatile LONG rx_pb_free;
329 struct stack_state *rx_pb_stack;
330 volatile LONG rx_hb_free;
331 struct stack_state *rx_hb_stack;
332 shared_buffer_t *rx_ring_pbs[NET_RX_RING_SIZE];
333 NPAGED_LOOKASIDE_LIST rx_lookaside_list;
334 /* Receive-ring batched refills. */
335 ULONG rx_target;
336 ULONG rx_max_target;
337 ULONG rx_min_target;
338 shared_buffer_t *rx_partial_buf;
339 BOOLEAN rx_partial_extra_info_flag ;
340 BOOLEAN rx_partial_more_data_flag;
342 /* how many packets are in the net stack atm */
343 LONG rx_outstanding;
345 /* config vars from registry */
346 /* the frontend_* indicate our willingness to support */
347 BOOLEAN frontend_sg_supported;
348 BOOLEAN frontend_csum_supported;
349 ULONG frontend_gso_value;
350 ULONG frontend_mtu_value;
351 ULONG frontend_gso_rx_split_type; /* RX_LSO_SPLIT_* */
353 BOOLEAN backend_sg_supported;
354 BOOLEAN backend_csum_supported;
355 ULONG backend_gso_value;
357 BOOLEAN current_sg_supported;
358 BOOLEAN current_csum_supported;
359 ULONG current_gso_value;
360 ULONG current_mtu_value;
361 ULONG current_gso_rx_split_type;
363 /* config stuff calculated from the above */
364 ULONG config_max_pkt_size;
366 /* stats */\
367 NDIS_STATISTICS_INFO stats;
368 //ULONG64 stat_tx_ok;
369 //ULONG64 stat_rx_ok;
370 //ULONG64 stat_tx_error;
371 //ULONG64 stat_rx_error;
372 //ULONG64 stat_rx_no_buffer;
374 } typedef xennet_info_t;
376 struct xennet_oids_t {
377 ULONG oid;
378 char *oid_name;
379 ULONG min_length;
380 MINIPORT_OID_REQUEST *query_routine;
381 MINIPORT_OID_REQUEST *set_routine;
382 };
384 extern struct xennet_oids_t xennet_oids[];
386 MINIPORT_OID_REQUEST XenNet_OidRequest;
387 MINIPORT_CANCEL_OID_REQUEST XenNet_CancelOidRequest;
389 MINIPORT_SEND_NET_BUFFER_LISTS XenNet_SendNetBufferLists;
390 MINIPORT_CANCEL_SEND XenNet_CancelSend;
392 MINIPORT_RETURN_NET_BUFFER_LISTS XenNet_ReturnNetBufferLists;
394 BOOLEAN
395 XenNet_RxInit(xennet_info_t *xi);
397 BOOLEAN
398 XenNet_RxShutdown(xennet_info_t *xi);
400 VOID
401 XenNet_RxResumeStart(xennet_info_t *xi);
403 VOID
404 XenNet_RxResumeEnd(xennet_info_t *xi);
406 BOOLEAN
407 XenNet_RxBufferCheck(struct xennet_info *xi);
409 VOID
410 XenNet_TxResumeStart(xennet_info_t *xi);
412 VOID
413 XenNet_TxResumeEnd(xennet_info_t *xi);
415 BOOLEAN
416 XenNet_TxInit(xennet_info_t *xi);
418 BOOLEAN
419 XenNet_TxShutdown(xennet_info_t *xi);
421 VOID
422 XenNet_TxBufferGC(struct xennet_info *xi, BOOLEAN dont_set_event);
424 #if 0
425 NDIS_STATUS
426 XenNet_D0Entry(struct xennet_info *xi);
427 NDIS_STATUS
428 XenNet_D0Exit(struct xennet_info *xi);
429 IO_WORKITEM_ROUTINE
430 XenNet_SetPower;
431 #endif
433 /* return values */
434 #define PARSE_OK 0
435 #define PARSE_TOO_SMALL 1 /* first buffer is too small */
436 #define PARSE_UNKNOWN_TYPE 2
438 BOOLEAN
439 XenNet_BuildHeader(packet_info_t *pi, PVOID header, ULONG new_header_size);
440 VOID
441 XenNet_ParsePacketHeader(packet_info_t *pi, PUCHAR buffer, ULONG min_header_size);
442 BOOLEAN
443 XenNet_FilterAcceptPacket(struct xennet_info *xi,packet_info_t *pi);
445 VOID
446 XenNet_SumIpHeader(
447 PUCHAR header,
448 USHORT ip4_header_length
449 );
451 static __forceinline VOID
452 XenNet_ClearPacketInfo(packet_info_t *pi)
453 {
454 RtlZeroMemory(pi, sizeof(packet_info_t) - FIELD_OFFSET(packet_info_t, header_data));
455 }
457 /* Get some data from the current packet, but don't cross a page boundry. */
458 static __forceinline ULONG
459 XenNet_QueryData(packet_info_t *pi, ULONG length)
460 {
461 ULONG offset_in_page;
463 if (length > MmGetMdlByteCount(pi->curr_mdl) - pi->curr_mdl_offset)
464 length = MmGetMdlByteCount(pi->curr_mdl) - pi->curr_mdl_offset;
466 offset_in_page = (MmGetMdlByteOffset(pi->curr_mdl) + pi->curr_mdl_offset) & (PAGE_SIZE - 1);
467 if (offset_in_page + length > PAGE_SIZE)
468 length = PAGE_SIZE - offset_in_page;
470 return length;
471 }
473 /* Move the pointers forward by the given amount. No error checking is done. */
474 static __forceinline VOID
475 XenNet_EatData(packet_info_t *pi, ULONG length)
476 {
477 pi->curr_mdl_offset += length;
478 if (pi->curr_mdl_offset >= MmGetMdlByteCount(pi->curr_mdl))
479 {
480 pi->curr_mdl_offset -= MmGetMdlByteCount(pi->curr_mdl);
481 NdisGetNextMdl(pi->curr_mdl, &pi->curr_mdl);
482 }
483 }