win-pvdrivers

view xennet/xennet6_rx.c @ 962:278b479f3f7d

Fix for problem on rx where tcpip.sys leaks memory if the first MDL contains a fragment of the TCP payload.
Fixed a bug in detecting checksum support
Improve multi-buffer handling in rx path
Remove old #ifdef'd out code
Adding NDIS version detection for version-dependent features
Started adding Header/Data split
author James Harper <james.harper@bendigoit.com.au>
date Fri Dec 30 20:58:01 2011 +1100 (2011-12-30)
parents 013b63248e5d
children 941699790045
line source
1 /*
2 PV Net Driver for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #include "xennet6.h"
23 static __inline shared_buffer_t *
24 get_pb_from_freelist(struct xennet_info *xi)
25 {
26 shared_buffer_t *pb;
27 PVOID ptr_ref;
29 if (stack_pop(xi->rx_pb_stack, &ptr_ref))
30 {
31 pb = ptr_ref;
32 pb->ref_count = 1;
33 InterlockedDecrement(&xi->rx_pb_free);
34 return pb;
35 }
37 /* don't allocate a new one if we are shutting down */
38 if (xi->shutting_down)
39 return NULL;
41 pb = NdisAllocateMemoryWithTagPriority(xi->adapter_handle, sizeof(shared_buffer_t), XENNET_POOL_TAG, LowPoolPriority);
42 if (!pb)
43 return NULL;
44 pb->virtual = NdisAllocateMemoryWithTagPriority(xi->adapter_handle, PAGE_SIZE, XENNET_POOL_TAG, LowPoolPriority);
45 if (!pb->virtual)
46 {
47 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
48 return NULL;
49 }
50 pb->mdl = IoAllocateMdl(pb->virtual, PAGE_SIZE, FALSE, FALSE, NULL);
51 if (!pb->mdl)
52 {
53 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
54 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
55 return NULL;
56 }
57 pb->gref = (grant_ref_t)xi->vectors.GntTbl_GrantAccess(xi->vectors.context, 0,
58 (ULONG)(MmGetPhysicalAddress(pb->virtual).QuadPart >> PAGE_SHIFT), FALSE, INVALID_GRANT_REF, (ULONG)'XNRX');
59 if (pb->gref == INVALID_GRANT_REF)
60 {
61 IoFreeMdl(pb->mdl);
62 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
63 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
64 return NULL;
65 }
66 MmBuildMdlForNonPagedPool(pb->mdl);
67 pb->ref_count = 1;
68 return pb;
69 }
71 static __inline VOID
72 ref_pb(struct xennet_info *xi, shared_buffer_t *pb)
73 {
74 UNREFERENCED_PARAMETER(xi);
75 InterlockedIncrement(&pb->ref_count);
76 }
78 static __inline VOID
79 put_pb_on_freelist(struct xennet_info *xi, shared_buffer_t *pb)
80 {
81 if (InterlockedDecrement(&pb->ref_count) == 0)
82 {
83 //NdisAdjustBufferLength(pb->buffer, PAGE_SIZE);
84 //NDIS_BUFFER_LINKAGE(pb->buffer) = NULL;
85 if (xi->rx_pb_free > RX_MAX_PB_FREELIST)
86 {
87 IoFreeMdl(pb->mdl);
88 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
89 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
90 return;
91 }
92 pb->mdl->ByteCount = PAGE_SIZE;
93 pb->mdl->Next = NULL;
94 pb->next = NULL;
95 stack_push(xi->rx_pb_stack, pb);
96 InterlockedIncrement(&xi->rx_pb_free);
97 }
98 }
100 static __inline shared_buffer_t *
101 get_hb_from_freelist(struct xennet_info *xi)
102 {
103 shared_buffer_t *hb;
104 PVOID ptr_ref;
106 if (stack_pop(xi->rx_hb_stack, &ptr_ref))
107 {
108 hb = ptr_ref;
109 InterlockedDecrement(&xi->rx_hb_free);
110 return hb;
111 }
113 /* don't allocate a new one if we are shutting down */
114 if (xi->shutting_down)
115 return NULL;
117 hb = NdisAllocateMemoryWithTagPriority(xi->adapter_handle, sizeof(shared_buffer_t) + MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH, XENNET_POOL_TAG, LowPoolPriority);
118 if (!hb)
119 return NULL;
120 NdisZeroMemory(hb, sizeof(shared_buffer_t));
121 hb->mdl = IoAllocateMdl(hb + 1, MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH, FALSE, FALSE, NULL);
122 if (!hb->mdl)
123 {
124 NdisFreeMemory(hb, sizeof(shared_buffer_t) + MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH, 0);
125 return NULL;
126 }
127 MmBuildMdlForNonPagedPool(hb->mdl);
128 return hb;
129 }
131 static __inline VOID
132 put_hb_on_freelist(struct xennet_info *xi, shared_buffer_t *hb)
133 {
134 ASSERT(xi);
135 hb->mdl->ByteCount = sizeof(shared_buffer_t) + MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH;
136 hb->mdl->Next = NULL;
137 hb->next = NULL;
138 stack_push(xi->rx_hb_stack, hb);
139 InterlockedIncrement(&xi->rx_hb_free);
140 }
142 // Called at DISPATCH_LEVEL with rx lock held
143 static NDIS_STATUS
144 XenNet_FillRing(struct xennet_info *xi)
145 {
146 unsigned short id;
147 shared_buffer_t *page_buf;
148 ULONG i, notify;
149 ULONG batch_target;
150 RING_IDX req_prod = xi->rx.req_prod_pvt;
151 netif_rx_request_t *req;
153 //FUNCTION_ENTER();
155 batch_target = xi->rx_target - (req_prod - xi->rx.rsp_cons);
157 if (batch_target < (xi->rx_target >> 2))
158 {
159 //FUNCTION_EXIT();
160 return NDIS_STATUS_SUCCESS; /* only refill if we are less than 3/4 full already */
161 }
163 for (i = 0; i < batch_target; i++)
164 {
165 page_buf = get_pb_from_freelist(xi);
166 if (!page_buf)
167 {
168 KdPrint((__DRIVER_NAME " Added %d out of %d buffers to rx ring (no free pages)\n", i, batch_target));
169 break;
170 }
171 xi->rx_id_free--;
173 /* Give to netback */
174 id = (USHORT)((req_prod + i) & (NET_RX_RING_SIZE - 1));
175 ASSERT(xi->rx_ring_pbs[id] == NULL);
176 xi->rx_ring_pbs[id] = page_buf;
177 req = RING_GET_REQUEST(&xi->rx, req_prod + i);
178 req->id = id;
179 req->gref = page_buf->gref;
180 ASSERT(req->gref != INVALID_GRANT_REF);
181 }
182 KeMemoryBarrier();
183 xi->rx.req_prod_pvt = req_prod + i;
184 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xi->rx, notify);
185 if (notify)
186 {
187 xi->vectors.EvtChn_Notify(xi->vectors.context, xi->event_channel);
188 }
190 //FUNCTION_EXIT();
192 return NDIS_STATUS_SUCCESS;
193 }
195 typedef struct {
196 PNET_BUFFER_LIST first_nbl;
197 PNET_BUFFER_LIST last_nbl;
198 ULONG packet_count;
199 ULONG nbl_count;
200 } rx_context_t;
202 static BOOLEAN
203 XenNet_MakePacket(struct xennet_info *xi, rx_context_t *rc, packet_info_t *pi)
204 {
205 PNET_BUFFER_LIST nbl;
206 PNET_BUFFER nb;
207 PMDL mdl_head, mdl_tail, curr_mdl;
208 PUCHAR header_va;
209 ULONG out_remaining;
210 ULONG header_extra;
211 shared_buffer_t *header_buf;
212 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csum_info;
214 //FUNCTION_ENTER();
216 nbl = NdisAllocateNetBufferList(xi->rx_nbl_pool, 0, 0);
217 if (!nbl)
218 {
219 /* buffers will be freed in MakePackets */
220 KdPrint((__DRIVER_NAME " No free nbl's\n"));
221 //FUNCTION_EXIT();
222 return FALSE;
223 }
225 nb = NdisAllocateNetBuffer(xi->rx_nb_pool, NULL, 0, 0);
226 if (!nb)
227 {
228 KdPrint((__DRIVER_NAME " No free nb's\n"));
229 NdisFreeNetBufferList(nbl);
230 //FUNCTION_EXIT();
231 return FALSE;
232 }
234 if (!pi->first_mdl->Next && !pi->split_required) {
235 /* a single buffer <= MTU */
236 header_buf = NULL;
237 XenNet_BuildHeader(pi, pi->first_mdl_virtual, pi->first_mdl_length);
238 NET_BUFFER_FIRST_MDL(nb) = pi->first_mdl;
239 NET_BUFFER_CURRENT_MDL(nb) = pi->first_mdl;
240 NET_BUFFER_CURRENT_MDL_OFFSET(nb) = 0;
241 NET_BUFFER_DATA_OFFSET(nb) = 0;
242 NET_BUFFER_DATA_LENGTH(nb) = pi->total_length;
243 NB_FIRST_PB(nb) = pi->first_pb;
244 ref_pb(xi, pi->first_pb);
245 } else {
246 ASSERT(ndis_os_minor_version >= 1);
247 header_buf = get_hb_from_freelist(xi);
248 if (!header_buf)
249 {
250 KdPrint((__DRIVER_NAME " No free header buffers\n"));
251 NdisFreeNetBufferList(nbl);
252 NdisFreeNetBuffer(nb);
253 //FUNCTION_EXIT();
254 return FALSE;
255 }
256 header_va = (PUCHAR)(header_buf + 1);
257 NdisMoveMemory(header_va, pi->header, pi->header_length);
258 //KdPrint((__DRIVER_NAME " header_length = %d, current_lookahead = %d\n", pi->header_length, xi->current_lookahead));
259 //KdPrint((__DRIVER_NAME " ip4_header_length = %d\n", pi->ip4_header_length));
260 //KdPrint((__DRIVER_NAME " tcp_header_length = %d\n", pi->tcp_header_length));
261 /* make sure only the header is in the first buffer (or the entire packet, but that is done in the above case) */
262 XenNet_BuildHeader(pi, header_va, MAX_ETH_HEADER_LENGTH + pi->ip4_header_length + pi->tcp_header_length);
263 header_extra = pi->header_length - (MAX_ETH_HEADER_LENGTH + pi->ip4_header_length + pi->tcp_header_length);
264 ASSERT(pi->header_length <= MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH);
265 header_buf->mdl->ByteCount = pi->header_length;
266 mdl_head = mdl_tail = curr_mdl = header_buf->mdl;
267 NB_FIRST_PB(nb) = header_buf;
268 header_buf->next = pi->curr_pb;
269 NET_BUFFER_FIRST_MDL(nb) = mdl_head;
270 NET_BUFFER_CURRENT_MDL(nb) = mdl_head;
271 NET_BUFFER_CURRENT_MDL_OFFSET(nb) = 0;
272 NET_BUFFER_DATA_OFFSET(nb) = 0;
273 NET_BUFFER_DATA_LENGTH(nb) = pi->header_length;
275 if (pi->split_required)
276 {
277 ULONG tcp_length;
278 USHORT new_ip4_length;
279 tcp_length = (USHORT)min(pi->mss, pi->tcp_remaining);
280 new_ip4_length = (USHORT)(pi->ip4_header_length + pi->tcp_header_length + tcp_length);
281 //KdPrint((__DRIVER_NAME " new_ip4_length = %d\n", new_ip4_length));
282 //KdPrint((__DRIVER_NAME " this tcp_length = %d\n", tcp_length));
283 SET_NET_USHORT(&header_va[XN_HDR_SIZE + 2], new_ip4_length);
284 SET_NET_ULONG(&header_va[XN_HDR_SIZE + pi->ip4_header_length + 4], pi->tcp_seq);
285 pi->tcp_seq += tcp_length;
286 pi->tcp_remaining = (USHORT)(pi->tcp_remaining - tcp_length);
287 /* part of the packet is already present in the header buffer for lookahead */
288 out_remaining = tcp_length - header_extra;
289 ASSERT((LONG)out_remaining >= 0);
290 }
291 else
292 {
293 out_remaining = pi->total_length - pi->header_length;
294 ASSERT((LONG)out_remaining >= 0);
295 }
296 //KdPrint((__DRIVER_NAME " before loop - out_remaining = %d\n", out_remaining));
298 while (out_remaining != 0)
299 {
300 //ULONG in_buffer_offset;
301 ULONG in_buffer_length;
302 ULONG out_length;
304 //KdPrint((__DRIVER_NAME " in loop - out_remaining = %d, curr_buffer = %p, curr_pb = %p\n", out_remaining, pi->curr_mdl, pi->curr_pb));
305 if (!pi->curr_mdl || !pi->curr_pb)
306 {
307 KdPrint((__DRIVER_NAME " out of buffers for packet\n"));
308 //KdPrint((__DRIVER_NAME " out_remaining = %d, curr_buffer = %p, curr_pb = %p\n", out_remaining, pi->curr_mdl, pi->curr_pb));
309 // TODO: free some stuff or we'll leak
310 /* unchain buffers then free packet */
311 //FUNCTION_EXIT();
312 return FALSE;
313 }
315 in_buffer_length = MmGetMdlByteCount(pi->curr_mdl);
316 out_length = min(out_remaining, in_buffer_length - pi->curr_mdl_offset);
317 curr_mdl = IoAllocateMdl((PUCHAR)MmGetMdlVirtualAddress(pi->curr_mdl) + pi->curr_mdl_offset, out_length, FALSE, FALSE, NULL);
318 ASSERT(curr_mdl);
319 IoBuildPartialMdl(pi->curr_mdl, curr_mdl, (PUCHAR)MmGetMdlVirtualAddress(pi->curr_mdl) + pi->curr_mdl_offset, out_length);
320 mdl_tail->Next = curr_mdl;
321 mdl_tail = curr_mdl;
322 curr_mdl->Next = NULL; /* I think this might be redundant */
323 NET_BUFFER_DATA_LENGTH(nb) += out_length;
324 ref_pb(xi, pi->curr_pb);
325 pi->curr_mdl_offset = (USHORT)(pi->curr_mdl_offset + out_length);
326 if (pi->curr_mdl_offset == in_buffer_length)
327 {
328 pi->curr_mdl = pi->curr_mdl->Next;
329 pi->curr_pb = pi->curr_pb->next;
330 pi->curr_mdl_offset = 0;
331 }
332 out_remaining -= out_length;
333 }
334 if (pi->split_required)
335 {
336 // TODO: only if Ip checksum is disabled...
337 //XenNet_SumIpHeader(header_va, pi->ip4_header_length);
338 }
339 if (header_extra > 0)
340 pi->header_length -= header_extra;
341 //ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
342 }
344 rc->packet_count++;
345 NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
346 //NET_BUFFER_NEXT_NB(nb) = NULL; /*is this already done for me? */
348 if (pi->parse_result == PARSE_OK)
349 {
350 BOOLEAN checksum_offload = FALSE;
351 csum_info.Value = 0;
352 if (pi->csum_blank || pi->data_validated || pi->mss)
353 {
354 if (pi->ip_proto == 6) // && xi->setting_csum.V4Receive.TcpChecksum)
355 {
356 // if (!pi->tcp_has_options || xi->setting_csum.V4Receive.TcpOptionsSupported)
357 // {
358 csum_info.Receive.IpChecksumSucceeded = TRUE;
359 csum_info.Receive.TcpChecksumSucceeded = TRUE;
360 checksum_offload = TRUE;
361 // }
362 }
363 else if (pi->ip_proto == 17) // &&xi->setting_csum.V4Receive.UdpChecksum)
364 {
365 csum_info.Receive.IpChecksumSucceeded = TRUE;
366 csum_info.Receive.UdpChecksumSucceeded = TRUE;
367 checksum_offload = TRUE;
368 }
369 }
370 NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo) = csum_info.Value;
371 }
373 if (!rc->first_nbl)
374 {
375 rc->first_nbl = nbl;
376 }
377 else
378 {
379 NET_BUFFER_LIST_NEXT_NBL(rc->last_nbl) = nbl;
380 }
381 rc->last_nbl = nbl;
382 NET_BUFFER_LIST_NEXT_NBL(nbl) = NULL;
383 rc->nbl_count++;
384 InterlockedIncrement(&xi->rx_outstanding);
385 if (pi->is_multicast)
386 {
387 /* multicast */
388 xi->stats.ifHCInMulticastPkts++;
389 xi->stats.ifHCInMulticastOctets += NET_BUFFER_DATA_LENGTH(nb);
390 }
391 else if (pi->is_broadcast)
392 {
393 /* broadcast */
394 xi->stats.ifHCInBroadcastPkts++;
395 xi->stats.ifHCInBroadcastOctets += NET_BUFFER_DATA_LENGTH(nb);
396 }
397 else
398 {
399 /* unicast */
400 xi->stats.ifHCInUcastPkts++;
401 xi->stats.ifHCInUcastOctets += NET_BUFFER_DATA_LENGTH(nb);
402 }
403 //FUNCTION_EXIT();
404 return TRUE;
405 }
407 static VOID
408 XenNet_MakePackets(struct xennet_info *xi, rx_context_t *rc, packet_info_t *pi)
409 {
410 UCHAR psh;
411 //PNDIS_BUFFER buffer;
412 shared_buffer_t *page_buf;
414 //FUNCTION_ENTER();
416 XenNet_ParsePacketHeader(pi, NULL, 0);
417 //pi->split_required = FALSE;
419 if (!XenNet_FilterAcceptPacket(xi, pi))
420 {
421 goto done;
422 }
424 if (pi->split_required)
425 {
426 switch (xi->current_gso_rx_split_type)
427 {
428 case RX_LSO_SPLIT_HALF:
429 pi->mss = max((pi->tcp_length + 1) / 2, pi->mss);
430 break;
431 case RX_LSO_SPLIT_NONE:
432 pi->mss = 65535;
433 break;
434 }
435 }
437 switch (pi->ip_proto)
438 {
439 case 6: // TCP
440 if (pi->split_required)
441 break;
442 /* fall through */
443 case 17: // UDP
444 if (!XenNet_MakePacket(xi, rc, pi))
445 {
446 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
447 xi->stats.ifInDiscards++;
448 goto done;
449 }
450 goto done;
451 default:
452 if (!XenNet_MakePacket(xi, rc, pi))
453 {
454 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
455 xi->stats.ifInDiscards++;
456 goto done;
457 }
458 goto done;
459 }
461 /* this is the split_required code */
462 pi->tcp_remaining = pi->tcp_length;
464 /* we can make certain assumptions here as the following code is only for tcp4 */
465 psh = pi->header[XN_HDR_SIZE + pi->ip4_header_length + 13] & 8;
466 while (pi->tcp_remaining)
467 {
468 if (!XenNet_MakePacket(xi, rc, pi))
469 {
470 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
471 xi->stats.ifInDiscards++;
472 break; /* we are out of memory - just drop the packets */
473 }
474 if (psh)
475 {
476 //NdisGetFirstBufferFromPacketSafe(packet, &mdl, &header_va, &buffer_length, &total_length, NormalPagePriority);
477 if (pi->tcp_remaining)
478 pi->header[XN_HDR_SIZE + pi->ip4_header_length + 13] &= ~8;
479 else
480 pi->header[XN_HDR_SIZE + pi->ip4_header_length + 13] |= 8;
481 }
482 //XenNet_SumPacketData(pi, packet, TRUE);
483 //entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
484 //InsertTailList(rx_packet_list, entry);
485 }
486 done:
487 page_buf = pi->first_pb;
488 while (page_buf)
489 {
490 shared_buffer_t *next_pb = page_buf->next;
491 put_pb_on_freelist(xi, page_buf); /* this doesn't actually free the page_puf if there are outstanding references */
492 page_buf = next_pb;
493 }
494 XenNet_ClearPacketInfo(pi);
495 //FUNCTION_EXIT();
496 return;
497 }
499 /* called at <= DISPATCH_LEVEL */
500 /* it's okay for return packet to be called while resume_state != RUNNING as the packet will simply be added back to the freelist, the grants will be fixed later */
501 VOID
502 XenNet_ReturnNetBufferLists(NDIS_HANDLE adapter_context, PNET_BUFFER_LIST curr_nbl, ULONG return_flags)
503 {
504 struct xennet_info *xi = adapter_context;
505 UNREFERENCED_PARAMETER(return_flags);
507 //FUNCTION_ENTER();
509 //KdPrint((__DRIVER_NAME " page_buf = %p\n", page_buf));
511 ASSERT(xi);
512 while (curr_nbl)
513 {
514 PNET_BUFFER_LIST next_nbl;
515 PNET_BUFFER curr_nb;
517 next_nbl = NET_BUFFER_LIST_NEXT_NBL(curr_nbl);
518 curr_nb = NET_BUFFER_LIST_FIRST_NB(curr_nbl);
519 while (curr_nb)
520 {
521 PNET_BUFFER next_nb;
522 PMDL curr_mdl;
523 shared_buffer_t *page_buf;
525 next_nb = NET_BUFFER_NEXT_NB(curr_nb);
526 curr_mdl = NET_BUFFER_FIRST_MDL(curr_nb);
527 page_buf = NB_FIRST_PB(curr_nb);
528 while (curr_mdl)
529 {
530 shared_buffer_t *next_buf;
531 PMDL next_mdl;
533 ASSERT(page_buf); /* make sure that there is a pb to match this mdl */
534 next_mdl = curr_mdl->Next;
535 next_buf = page_buf->next;
536 if (!page_buf->virtual)
537 {
538 /* this is a hb not a pb because virtual is NULL (virtual is just the memory after the hb */
539 put_hb_on_freelist(xi, (shared_buffer_t *)MmGetMdlVirtualAddress(curr_mdl) - 1);
540 }
541 else
542 {
543 //KdPrint((__DRIVER_NAME " returning page_buf %p with id %d\n", page_buf, page_buf->id));
544 if (curr_mdl != page_buf->mdl)
545 {
546 //KdPrint((__DRIVER_NAME " curr_mdl = %p, page_buf->mdl = %p\n", curr_mdl, page_buf->mdl));
547 IoFreeMdl(curr_mdl);
548 }
549 put_pb_on_freelist(xi, page_buf);
550 }
551 curr_mdl = next_mdl;
552 page_buf = next_buf;
553 }
555 NdisFreeNetBuffer(curr_nb);
556 InterlockedDecrement(&xi->rx_outstanding);
558 curr_nb = next_nb;
559 }
560 NdisFreeNetBufferList(curr_nbl);
561 curr_nbl = next_nbl;
562 }
564 if (!xi->rx_outstanding && xi->rx_shutting_down)
565 KeSetEvent(&xi->packet_returned_event, IO_NO_INCREMENT, FALSE);
567 //FUNCTION_EXIT();
568 }
570 /* We limit the number of packets per interrupt so that acks get a chance
571 under high rx load. The DPC is immediately re-scheduled */
572 //#define MAXIMUM_PACKETS_PER_INTERRUPT 32 /* this is calculated before large packet split */
573 //#define MAXIMUM_DATA_PER_INTERRUPT (MAXIMUM_PACKETS_PER_INTERRUPT * 1500) /* help account for large packets */
575 #define MAXIMUM_PACKETS_PER_INTERRUPT 2560 /* this is calculated before large packet split */
576 #define MAXIMUM_DATA_PER_INTERRUPT (MAXIMUM_PACKETS_PER_INTERRUPT * 1500) /* help account for large packets */
578 // Called at DISPATCH_LEVEL
579 BOOLEAN
580 XenNet_RxBufferCheck(struct xennet_info *xi)
581 {
582 RING_IDX cons, prod;
583 ULONG packet_count = 0;
584 ULONG packet_data = 0;
585 ULONG buffer_count = 0;
586 USHORT id;
587 int more_to_do = FALSE;
588 shared_buffer_t *page_buf;
589 //LIST_ENTRY rx_header_only_packet_list;
590 //PLIST_ENTRY entry;
591 //ULONG nbl_count = 0;
592 ULONG interim_packet_data = 0;
593 struct netif_extra_info *ei;
594 rx_context_t rc;
595 packet_info_t *pi = &xi->rxpi[KeGetCurrentProcessorNumber() & 0xff];
596 shared_buffer_t *head_buf = NULL;
597 shared_buffer_t *tail_buf = NULL;
598 shared_buffer_t *last_buf = NULL;
599 BOOLEAN extra_info_flag = FALSE;
600 BOOLEAN more_data_flag = FALSE;
601 BOOLEAN dont_set_event;
602 //FUNCTION_ENTER();
604 if (!xi->connected)
605 return FALSE; /* a delayed DPC could let this come through... just do nothing */
607 rc.first_nbl = NULL;
608 rc.last_nbl = NULL;
609 rc.packet_count = 0;
610 rc.nbl_count = 0;
612 /* get all the buffers off the ring as quickly as possible so the lock is held for a minimum amount of time */
613 KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
615 if (xi->rx_shutting_down)
616 {
617 /* there is a chance that our Dpc had been queued just before the shutdown... */
618 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
619 return FALSE;
620 }
622 if (xi->rx_partial_buf)
623 {
624 head_buf = xi->rx_partial_buf;
625 tail_buf = xi->rx_partial_buf;
626 while (tail_buf->next)
627 tail_buf = tail_buf->next;
628 more_data_flag = xi->rx_partial_more_data_flag;
629 extra_info_flag = xi->rx_partial_extra_info_flag;
630 xi->rx_partial_buf = NULL;
631 }
633 do {
634 prod = xi->rx.sring->rsp_prod;
635 KeMemoryBarrier(); /* Ensure we see responses up to 'prod'. */
637 for (cons = xi->rx.rsp_cons; cons != prod && packet_count < MAXIMUM_PACKETS_PER_INTERRUPT && packet_data < MAXIMUM_DATA_PER_INTERRUPT; cons++)
638 {
639 id = (USHORT)(cons & (NET_RX_RING_SIZE - 1));
640 page_buf = xi->rx_ring_pbs[id];
641 ASSERT(page_buf);
642 xi->rx_ring_pbs[id] = NULL;
643 xi->rx_id_free++;
644 memcpy(&page_buf->rsp, RING_GET_RESPONSE(&xi->rx, cons), max(sizeof(struct netif_rx_response), sizeof(struct netif_extra_info)));
645 if (!extra_info_flag)
646 {
647 if (page_buf->rsp.status <= 0
648 || page_buf->rsp.offset + page_buf->rsp.status > PAGE_SIZE)
649 {
650 KdPrint((__DRIVER_NAME " Error: rsp offset %d, size %d\n",
651 page_buf->rsp.offset, page_buf->rsp.status));
652 ASSERT(!extra_info_flag);
653 put_pb_on_freelist(xi, page_buf);
654 continue;
655 }
656 }
658 if (!head_buf)
659 {
660 head_buf = page_buf;
661 tail_buf = page_buf;
662 }
663 else
664 {
665 tail_buf->next = page_buf;
666 tail_buf = page_buf;
667 }
668 page_buf->next = NULL;
670 if (extra_info_flag)
671 {
672 ei = (struct netif_extra_info *)&page_buf->rsp;
673 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
674 }
675 else
676 {
677 more_data_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_more_data);
678 extra_info_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_extra_info);
679 interim_packet_data += page_buf->rsp.status;
680 }
682 if (!extra_info_flag && !more_data_flag)
683 {
684 last_buf = page_buf;
685 packet_count++;
686 packet_data += interim_packet_data;
687 interim_packet_data = 0;
688 }
689 buffer_count++;
690 }
691 xi->rx.rsp_cons = cons;
693 /* Give netback more buffers */
694 XenNet_FillRing(xi);
696 if (packet_count >= MAXIMUM_PACKETS_PER_INTERRUPT || packet_data >= MAXIMUM_DATA_PER_INTERRUPT)
697 break;
699 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
700 if (!more_to_do)
701 {
702 xi->rx.sring->rsp_event = xi->rx.rsp_cons + 1;
703 KeMemoryBarrier();
704 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
705 }
706 } while (more_to_do);
708 /* anything past last_buf belongs to an incomplete packet... */
709 if (last_buf && last_buf->next)
710 {
711 KdPrint((__DRIVER_NAME " Partial receive\n"));
712 xi->rx_partial_buf = last_buf->next;
713 xi->rx_partial_more_data_flag = more_data_flag;
714 xi->rx_partial_extra_info_flag = extra_info_flag;
715 last_buf->next = NULL;
716 }
718 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
720 if (packet_count >= MAXIMUM_PACKETS_PER_INTERRUPT || packet_data >= MAXIMUM_DATA_PER_INTERRUPT)
721 {
722 /* fire again immediately */
723 KdPrint((__DRIVER_NAME " Dpc Duration Exceeded\n"));
724 /* we want the Dpc on the end of the queue. By definition we are already on the right CPU so we know the Dpc queue will be run immediately */
725 // KeSetImportanceDpc(&xi->rxtx_dpc, MediumImportance);
726 KeInsertQueueDpc(&xi->rxtx_dpc, NULL, NULL);
727 /* dont set an event in TX path */
728 dont_set_event = TRUE;
729 }
730 else
731 {
732 /* make sure the Dpc queue is run immediately next interrupt */
733 // KeSetImportanceDpc(&xi->rxtx_dpc, HighImportance);
734 /* set an event in TX path */
735 dont_set_event = FALSE;
736 }
738 /* make packets out of the buffers */
739 page_buf = head_buf;
740 extra_info_flag = FALSE;
741 more_data_flag = FALSE;
743 while (page_buf)
744 {
745 shared_buffer_t *next_buf = page_buf->next;
746 PMDL mdl;
748 page_buf->next = NULL;
749 if (extra_info_flag)
750 {
751 //KdPrint((__DRIVER_NAME " processing extra info\n"));
752 ei = (struct netif_extra_info *)&page_buf->rsp;
753 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
754 switch (ei->type)
755 {
756 case XEN_NETIF_EXTRA_TYPE_GSO:
757 switch (ei->u.gso.type)
758 {
759 case XEN_NETIF_GSO_TYPE_TCPV4:
760 pi->mss = ei->u.gso.size;
761 //KdPrint((__DRIVER_NAME " mss = %d\n", pi->mss));
762 // TODO - put this assertion somewhere ASSERT(header_len + pi->mss <= PAGE_SIZE); // this limits MTU to PAGE_SIZE - XN_HEADER_LEN
763 break;
764 default:
765 KdPrint((__DRIVER_NAME " Unknown GSO type (%d) detected\n", ei->u.gso.type));
766 break;
767 }
768 break;
769 default:
770 KdPrint((__DRIVER_NAME " Unknown extra info type (%d) detected\n", ei->type));
771 break;
772 }
773 put_pb_on_freelist(xi, page_buf);
774 }
775 else
776 {
777 ASSERT(!page_buf->rsp.offset);
778 if (!more_data_flag) // handling the packet's 1st buffer
779 {
780 if (page_buf->rsp.flags & NETRXF_csum_blank)
781 pi->csum_blank = TRUE;
782 if (page_buf->rsp.flags & NETRXF_data_validated)
783 pi->data_validated = TRUE;
784 }
785 mdl = page_buf->mdl;
786 mdl->ByteCount = page_buf->rsp.status; //NdisAdjustBufferLength(mdl, page_buf->rsp.status);
787 //KdPrint((__DRIVER_NAME " buffer = %p, pb = %p\n", buffer, page_buf));
788 if (pi->first_pb)
789 {
790 ASSERT(pi->curr_pb);
791 //KdPrint((__DRIVER_NAME " additional buffer\n"));
792 pi->curr_pb->next = page_buf;
793 pi->curr_pb = page_buf;
794 ASSERT(pi->curr_mdl);
795 pi->curr_mdl->Next = mdl;
796 pi->curr_mdl = mdl;
797 }
798 else
799 {
800 pi->first_pb = page_buf;
801 pi->curr_pb = page_buf;
802 pi->first_mdl = mdl;
803 pi->curr_mdl = mdl;
804 }
805 //pi->mdl_count++;
806 extra_info_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_extra_info);
807 more_data_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_more_data);
808 pi->total_length = pi->total_length + page_buf->rsp.status;
809 }
811 /* Packet done, add it to the list */
812 if (!more_data_flag && !extra_info_flag)
813 {
814 pi->curr_pb = pi->first_pb;
815 pi->curr_mdl = pi->first_mdl;
816 XenNet_MakePackets(xi, &rc, pi);
817 }
819 page_buf = next_buf;
820 }
821 ASSERT(!more_data_flag && !extra_info_flag);
823 if (rc.first_nbl)
824 {
825 NdisMIndicateReceiveNetBufferLists(xi->adapter_handle, rc.first_nbl,
826 NDIS_DEFAULT_PORT_NUMBER, rc.nbl_count,
827 NDIS_RECEIVE_FLAGS_DISPATCH_LEVEL
828 //| NDIS_RECEIVE_FLAGS_SINGLE_ETHER_TYPE
829 | NDIS_RECEIVE_FLAGS_PERFECT_FILTERED);
830 }
831 //FUNCTION_EXIT();
832 return dont_set_event;
833 }
835 /*
836 Free all Rx buffers (on halt, for example)
837 The ring must be stopped at this point.
838 */
840 static VOID
841 XenNet_PurgeRing(xennet_info_t *xi)
842 {
843 int i;
844 for (i = 0; i < NET_RX_RING_SIZE; i++)
845 {
846 if (xi->rx_ring_pbs[i] != NULL)
847 {
848 put_pb_on_freelist(xi, xi->rx_ring_pbs[i]);
849 xi->rx_ring_pbs[i] = NULL;
850 }
851 }
852 }
854 static VOID
855 XenNet_BufferFree(xennet_info_t *xi)
856 {
857 shared_buffer_t *sb;
859 XenNet_PurgeRing(xi);
861 /* because we are shutting down this won't allocate new ones */
862 while ((sb = get_pb_from_freelist(xi)) != NULL)
863 {
864 xi->vectors.GntTbl_EndAccess(xi->vectors.context,
865 sb->gref, FALSE, (ULONG)'XNRX');
866 IoFreeMdl(sb->mdl);
867 NdisFreeMemory(sb->virtual, sizeof(shared_buffer_t), 0);
868 NdisFreeMemory(sb, PAGE_SIZE, 0);
869 }
870 while ((sb = get_hb_from_freelist(xi)) != NULL)
871 {
872 IoFreeMdl(sb->mdl);
873 NdisFreeMemory(sb, sizeof(shared_buffer_t) + MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH, 0);
874 }
875 }
877 VOID
878 XenNet_BufferAlloc(xennet_info_t *xi)
879 {
880 //NDIS_STATUS status;
881 int i;
883 xi->rx_id_free = NET_RX_RING_SIZE;
884 xi->rx_outstanding = 0;
886 for (i = 0; i < NET_RX_RING_SIZE; i++)
887 {
888 xi->rx_ring_pbs[i] = NULL;
889 }
890 }
892 VOID
893 XenNet_RxResumeStart(xennet_info_t *xi)
894 {
895 KIRQL old_irql;
897 FUNCTION_ENTER();
899 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
900 XenNet_PurgeRing(xi);
901 KeReleaseSpinLock(&xi->rx_lock, old_irql);
903 FUNCTION_EXIT();
904 }
906 VOID
907 XenNet_RxResumeEnd(xennet_info_t *xi)
908 {
909 KIRQL old_irql;
911 FUNCTION_ENTER();
913 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
914 //XenNet_BufferAlloc(xi);
915 XenNet_FillRing(xi);
916 KeReleaseSpinLock(&xi->rx_lock, old_irql);
918 FUNCTION_EXIT();
919 }
921 BOOLEAN
922 XenNet_RxInit(xennet_info_t *xi)
923 {
924 NET_BUFFER_LIST_POOL_PARAMETERS nbl_pool_parameters;
925 NET_BUFFER_POOL_PARAMETERS nb_pool_parameters;
926 int ret;
928 FUNCTION_ENTER();
930 xi->rx_shutting_down = FALSE;
931 KeInitializeSpinLock(&xi->rx_lock);
932 KeInitializeEvent(&xi->packet_returned_event, SynchronizationEvent, FALSE);
933 xi->rxpi = NdisAllocateMemoryWithTagPriority(xi->adapter_handle, sizeof(packet_info_t) * NdisSystemProcessorCount(), XENNET_POOL_TAG, NormalPoolPriority);
934 if (!xi->rxpi)
935 {
936 KdPrint(("NdisAllocateMemoryWithTagPriority failed\n"));
937 return FALSE;
938 }
939 NdisZeroMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount());
941 ret = stack_new(&xi->rx_pb_stack, NET_RX_RING_SIZE * 4);
942 if (!ret)
943 {
944 FUNCTION_MSG("Failed to allocate rx_pb_stack\n");
945 NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
946 return FALSE;
947 }
948 stack_new(&xi->rx_hb_stack, NET_RX_RING_SIZE * 4);
949 if (!ret)
950 {
951 FUNCTION_MSG("Failed to allocate rx_hb_stack\n");
952 stack_delete(xi->rx_pb_stack, NULL, NULL);
953 NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
954 return FALSE;
955 }
957 XenNet_BufferAlloc(xi);
959 nbl_pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
960 nbl_pool_parameters.Header.Revision = NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
961 nbl_pool_parameters.Header.Size = NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1;
962 nbl_pool_parameters.ProtocolId = NDIS_PROTOCOL_ID_DEFAULT;
963 nbl_pool_parameters.fAllocateNetBuffer = FALSE;
964 nbl_pool_parameters.ContextSize = 0;
965 nbl_pool_parameters.PoolTag = XENNET_POOL_TAG;
966 nbl_pool_parameters.DataSize = 0; /* NET_BUFFERS are always allocated separately */
968 xi->rx_nbl_pool = NdisAllocateNetBufferListPool(xi->adapter_handle, &nbl_pool_parameters);
969 if (!xi->rx_nbl_pool)
970 {
971 KdPrint(("NdisAllocateNetBufferListPool failed\n"));
972 return FALSE;
973 }
975 nb_pool_parameters.Header.Type = NDIS_OBJECT_TYPE_DEFAULT;
976 nb_pool_parameters.Header.Revision = NET_BUFFER_POOL_PARAMETERS_REVISION_1;
977 nb_pool_parameters.Header.Size = NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1;
978 nb_pool_parameters.PoolTag = XENNET_POOL_TAG;
979 nb_pool_parameters.DataSize = 0; /* the buffers come from the ring */
980 xi->rx_nb_pool = NdisAllocateNetBufferPool(xi->adapter_handle, &nb_pool_parameters);
981 if (!xi->rx_nb_pool)
982 {
983 KdPrint(("NdisAllocateNetBufferPool (rx_nb_pool) failed\n"));
984 return FALSE;
985 }
987 XenNet_FillRing(xi);
989 FUNCTION_EXIT();
991 return TRUE;
992 }
994 BOOLEAN
995 XenNet_RxShutdown(xennet_info_t *xi)
996 {
997 KIRQL old_irql;
998 //PNDIS_PACKET packet;
999 UNREFERENCED_PARAMETER(xi);
1001 FUNCTION_ENTER();
1003 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1004 xi->rx_shutting_down = TRUE;
1005 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1007 KeFlushQueuedDpcs();
1009 while (xi->rx_outstanding)
1011 KdPrint((__DRIVER_NAME " Waiting for all packets to be returned\n"));
1012 KeWaitForSingleObject(&xi->packet_returned_event, Executive, KernelMode, FALSE, NULL);
1015 NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
1017 XenNet_BufferFree(xi);
1020 stack_delete(xi->rx_pb_stack, NULL, NULL);
1021 stack_delete(xi->rx_hb_stack, NULL, NULL);
1023 NdisFreeNetBufferPool(xi->rx_nb_pool);
1024 NdisFreeNetBufferListPool(xi->rx_nbl_pool);
1026 FUNCTION_EXIT();
1028 return TRUE;