win-pvdrivers

view xennet/xennet5_rx.c @ 995:0879eec3f528

Fix packet leak under 2003
author James Harper <james.harper@bendigoit.com.au>
date Mon Oct 22 21:21:16 2012 +1100 (2012-10-22)
parents 8f483a2b2991
children
line source
1 /*
2 PV Net Driver for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #include "xennet5.h"
23 static __inline shared_buffer_t *
24 get_pb_from_freelist(struct xennet_info *xi)
25 {
26 NDIS_STATUS status;
27 shared_buffer_t *pb;
28 PVOID ptr_ref;
30 if (stack_pop(xi->rx_pb_stack, &ptr_ref))
31 {
32 pb = ptr_ref;
33 pb->ref_count = 1;
34 InterlockedDecrement(&xi->rx_pb_free);
35 return pb;
36 }
38 /* don't allocate a new one if we are shutting down */
39 if (xi->shutting_down)
40 return NULL;
42 status = NdisAllocateMemoryWithTag(&pb, sizeof(shared_buffer_t), XENNET_POOL_TAG);
43 if (status != STATUS_SUCCESS)
44 {
45 return NULL;
46 }
47 status = NdisAllocateMemoryWithTag(&pb->virtual, PAGE_SIZE, XENNET_POOL_TAG);
48 if (status != STATUS_SUCCESS)
49 {
50 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
51 return NULL;
52 }
53 pb->gref = (grant_ref_t)xi->vectors.GntTbl_GrantAccess(xi->vectors.context,
54 (ULONG)(MmGetPhysicalAddress(pb->virtual).QuadPart >> PAGE_SHIFT), FALSE, INVALID_GRANT_REF, (ULONG)'XNRX');
55 if (pb->gref == INVALID_GRANT_REF)
56 {
57 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
58 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
59 return NULL;
60 }
61 pb->offset = (USHORT)(ULONG_PTR)pb->virtual & (PAGE_SIZE - 1);
62 NdisAllocateBuffer(&status, &pb->buffer, xi->rx_buffer_pool, (PUCHAR)pb->virtual, PAGE_SIZE);
63 if (status != STATUS_SUCCESS)
64 {
65 xi->vectors.GntTbl_EndAccess(xi->vectors.context,
66 pb->gref, FALSE, (ULONG)'XNRX');
67 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
68 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
69 return NULL;
70 }
71 pb->ref_count = 1;
72 return pb;
73 }
75 static __inline VOID
76 ref_pb(struct xennet_info *xi, shared_buffer_t *pb)
77 {
78 UNREFERENCED_PARAMETER(xi);
79 InterlockedIncrement(&pb->ref_count);
80 }
82 static __inline VOID
83 put_pb_on_freelist(struct xennet_info *xi, shared_buffer_t *pb)
84 {
85 if (InterlockedDecrement(&pb->ref_count) == 0)
86 {
87 NdisAdjustBufferLength(pb->buffer, PAGE_SIZE);
88 NDIS_BUFFER_LINKAGE(pb->buffer) = NULL;
89 pb->next = NULL;
90 stack_push(xi->rx_pb_stack, pb);
91 InterlockedIncrement(&xi->rx_pb_free);
92 }
93 }
95 // Called at DISPATCH_LEVEL with rx lock held
96 static NDIS_STATUS
97 XenNet_FillRing(struct xennet_info *xi)
98 {
99 unsigned short id;
100 shared_buffer_t *page_buf;
101 ULONG i, notify;
102 ULONG batch_target;
103 RING_IDX req_prod = xi->rx.req_prod_pvt;
104 netif_rx_request_t *req;
106 //FUNCTION_ENTER();
108 batch_target = xi->rx_target - (req_prod - xi->rx.rsp_cons);
110 if (batch_target < (xi->rx_target >> 2))
111 {
112 //FUNCTION_EXIT();
113 return NDIS_STATUS_SUCCESS; /* only refill if we are less than 3/4 full already */
114 }
116 for (i = 0; i < batch_target; i++)
117 {
118 page_buf = get_pb_from_freelist(xi);
119 if (!page_buf)
120 {
121 KdPrint((__DRIVER_NAME " Added %d out of %d buffers to rx ring (no free pages)\n", i, batch_target));
122 break;
123 }
124 xi->rx_id_free--;
126 /* Give to netback */
127 id = (USHORT)((req_prod + i) & (NET_RX_RING_SIZE - 1));
128 ASSERT(xi->rx_ring_pbs[id] == NULL);
129 xi->rx_ring_pbs[id] = page_buf;
130 req = RING_GET_REQUEST(&xi->rx, req_prod + i);
131 req->id = id;
132 req->gref = page_buf->gref;
133 ASSERT(req->gref != INVALID_GRANT_REF);
134 }
135 KeMemoryBarrier();
136 xi->rx.req_prod_pvt = req_prod + i;
137 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xi->rx, notify);
138 if (notify)
139 {
140 xi->vectors.EvtChn_Notify(xi->vectors.context, xi->event_channel);
141 }
143 //FUNCTION_EXIT();
145 return NDIS_STATUS_SUCCESS;
146 }
148 /* lock free */
149 static PNDIS_PACKET
150 get_packet_from_freelist(struct xennet_info *xi)
151 {
152 NDIS_STATUS status;
153 PNDIS_PACKET packet;
154 PVOID ptr_ref;
156 if (stack_pop(xi->rx_packet_stack, &ptr_ref))
157 {
158 packet = ptr_ref;
159 return packet;
160 }
162 if (xi->rx_shutting_down) /* don't keep allocating new packets on shutdown */
163 return NULL;
165 NdisAllocatePacket(&status, &packet, xi->rx_packet_pool);
166 if (status != NDIS_STATUS_SUCCESS) {
167 KdPrint((__DRIVER_NAME " cannot allocate packet status = %08x, rx_outstanding = %d\n", status, xi->rx_outstanding));
168 return NULL;
169 }
170 NDIS_SET_PACKET_HEADER_SIZE(packet, XN_HDR_SIZE);
171 NdisZeroMemory(packet->MiniportReservedEx, sizeof(packet->MiniportReservedEx));
172 return packet;
173 }
175 /* lock free */
176 static VOID
177 put_packet_on_freelist(struct xennet_info *xi, PNDIS_PACKET packet)
178 {
179 PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
181 UNREFERENCED_PARAMETER(xi);
182 NdisReinitializePacket(packet);
183 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
184 packet, TcpIpChecksumPacketInfo);
185 csum_info->Value = 0;
187 stack_push(xi->rx_packet_stack, packet);
188 }
190 static PNDIS_PACKET
191 XenNet_MakePacket(struct xennet_info *xi, packet_info_t *pi)
192 {
193 NDIS_STATUS status;
194 PNDIS_PACKET packet;
195 PNDIS_BUFFER out_buffer;
196 USHORT new_ip4_length;
197 PUCHAR header_va;
198 ULONG out_remaining;
199 ULONG tcp_length;
200 ULONG header_extra;
201 ULONG packet_length = 0;
202 shared_buffer_t *header_buf;
204 //FUNCTION_ENTER();
206 packet = get_packet_from_freelist(xi);
207 if (packet == NULL)
208 {
209 /* buffers will be freed in MakePackets */
210 //KdPrint((__DRIVER_NAME " No free packets\n"));
211 //FUNCTION_EXIT();
212 return NULL;
213 }
215 if (!pi->split_required && pi->mdl_count == 1)
216 {
217 /* shortcut for the single packet single mdl case */
219 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_SUCCESS);
220 NdisCopyBuffer(&status, &out_buffer, xi->rx_buffer_pool, pi->first_buffer, 0, pi->total_length);
221 if (status != STATUS_SUCCESS)
222 {
223 KdPrint((__DRIVER_NAME " No free rx buffers\n"));
224 put_packet_on_freelist(xi, packet);
225 return NULL;
226 }
227 NdisChainBufferAtBack(packet, out_buffer);
228 *(shared_buffer_t **)&packet->MiniportReservedEx[0] = pi->first_pb;
229 ref_pb(xi, pi->first_pb); /* so that the buffer doesn't get freed at the end of MakePackets*/
230 //FUNCTION_EXIT();
231 /* windows gets lazy about ack packets and holds on to them forever under high load situations. we don't like this */
232 if (pi->ip_proto == 6 && pi->total_length <= NDIS_STATUS_RESOURCES_MAX_LENGTH)
233 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_RESOURCES);
234 else
235 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_SUCCESS);
236 return packet;
237 }
239 header_buf = NdisAllocateFromNPagedLookasideList(&xi->rx_lookaside_list);
240 if (!header_buf)
241 {
242 KdPrint((__DRIVER_NAME " No free header buffers\n"));
243 put_packet_on_freelist(xi, packet);
244 return NULL;
245 }
246 header_va = (PUCHAR)(header_buf + 1);
247 NdisZeroMemory(header_buf, sizeof(shared_buffer_t));
248 NdisMoveMemory(header_va, pi->header, pi->header_length);
250 /* make sure we satisfy the lookahead requirement */
252 if (pi->split_required)
253 {
254 /* for split packets we need to make sure the 'header' is no bigger than header+mss bytes */
255 XenNet_BuildHeader(pi, header_va, min((ULONG)MAX_ETH_HEADER_LENGTH + pi->ip4_header_length + pi->tcp_header_length + pi->mss, MAX_ETH_HEADER_LENGTH + max(MIN_LOOKAHEAD_LENGTH, xi->current_lookahead)));
256 }
257 else
258 {
259 XenNet_BuildHeader(pi, header_va, max(MIN_LOOKAHEAD_LENGTH, xi->current_lookahead) + MAX_ETH_HEADER_LENGTH);
260 }
261 header_extra = pi->header_length - (MAX_ETH_HEADER_LENGTH + pi->ip4_header_length + pi->tcp_header_length);
262 ASSERT(pi->header_length <= MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH);
263 NdisAllocateBuffer(&status, &out_buffer, xi->rx_buffer_pool, header_va, pi->header_length);
264 if (status != STATUS_SUCCESS)
265 {
266 KdPrint((__DRIVER_NAME " No free header buffers\n"));
267 NdisFreeToNPagedLookasideList(&xi->rx_lookaside_list, header_buf);
268 put_packet_on_freelist(xi, packet);
269 return NULL;
270 }
271 NdisChainBufferAtBack(packet, out_buffer);
272 packet_length += pi->header_length;
273 *(shared_buffer_t **)&packet->MiniportReservedEx[0] = header_buf;
274 header_buf->next = pi->curr_pb;
276 // TODO: if there are only a few bytes left on the first buffer then add them to the header buffer too... maybe
278 if (pi->split_required)
279 {
280 tcp_length = (USHORT)min(pi->mss, pi->tcp_remaining);
281 new_ip4_length = (USHORT)(pi->ip4_header_length + pi->tcp_header_length + tcp_length);
282 //KdPrint((__DRIVER_NAME " new_ip4_length = %d\n", new_ip4_length));
283 //KdPrint((__DRIVER_NAME " this tcp_length = %d\n", tcp_length));
284 SET_NET_USHORT(&header_va[XN_HDR_SIZE + 2], new_ip4_length);
285 SET_NET_ULONG(&header_va[XN_HDR_SIZE + pi->ip4_header_length + 4], pi->tcp_seq);
286 pi->tcp_seq += tcp_length;
287 pi->tcp_remaining = (USHORT)(pi->tcp_remaining - tcp_length);
288 /* part of the packet is already present in the header buffer for lookahead */
289 out_remaining = tcp_length - header_extra;
290 ASSERT((LONG)out_remaining >= 0);
291 }
292 else
293 {
294 out_remaining = pi->total_length - pi->header_length;
295 ASSERT((LONG)out_remaining >= 0);
296 }
297 //KdPrint((__DRIVER_NAME " before loop - out_remaining = %d\n", out_remaining));
299 while (out_remaining != 0)
300 {
301 ULONG in_buffer_offset;
302 ULONG in_buffer_length;
303 ULONG out_length;
305 //KdPrint((__DRIVER_NAME " in loop - out_remaining = %d, curr_buffer = %p, curr_pb = %p\n", out_remaining, pi->curr_buffer, pi->curr_pb));
306 if (!pi->curr_buffer || !pi->curr_pb)
307 {
308 KdPrint((__DRIVER_NAME " out of buffers for packet\n"));
309 KdPrint((__DRIVER_NAME " out_remaining = %d, curr_buffer = %p, curr_pb = %p\n", out_remaining, pi->curr_buffer, pi->curr_pb));
310 // TODO: free some stuff or we'll leak
311 /* unchain buffers then free packet */
312 return NULL;
313 }
314 NdisQueryBufferOffset(pi->curr_buffer, &in_buffer_offset, &in_buffer_length);
315 out_length = min(out_remaining, in_buffer_length - pi->curr_mdl_offset);
316 NdisCopyBuffer(&status, &out_buffer, xi->rx_buffer_pool, pi->curr_buffer, pi->curr_mdl_offset, out_length);
317 ASSERT(status == STATUS_SUCCESS); //TODO: properly handle error
318 NdisChainBufferAtBack(packet, out_buffer);
319 packet_length += out_length;
320 ref_pb(xi, pi->curr_pb);
321 pi->curr_mdl_offset = (USHORT)(pi->curr_mdl_offset + out_length);
322 if (pi->curr_mdl_offset == in_buffer_length)
323 {
324 NdisGetNextBuffer(pi->curr_buffer, &pi->curr_buffer);
325 pi->curr_pb = pi->curr_pb->next;
326 pi->curr_mdl_offset = 0;
327 }
328 out_remaining -= out_length;
329 }
330 if (pi->split_required) {
331 XenNet_SumIpHeader(header_va, pi->ip4_header_length);
332 }
333 if (header_extra > 0)
334 pi->header_length -= header_extra;
335 ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
336 /* windows gets lazy about ack packets and holds on to them forever under high load situations. we don't like this */
337 if (pi->ip_proto == 6 && packet_length <= NDIS_STATUS_RESOURCES_MAX_LENGTH)
338 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_RESOURCES);
339 else
340 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_SUCCESS);
341 //FUNCTION_EXIT();
342 return packet;
343 }
345 /*
346 Windows appears to insist that the checksum on received packets is correct, and won't
347 believe us when we lie about it, which happens when the packet is generated on the
348 same bridge in Dom0. Doh!
349 This is only for TCP and UDP packets. IP checksums appear to be correct anyways.
350 */
352 static BOOLEAN
353 XenNet_SumPacketData(
354 packet_info_t *pi,
355 PNDIS_PACKET packet,
356 BOOLEAN set_csum
357 )
358 {
359 USHORT i;
360 PUCHAR buffer;
361 PMDL mdl;
362 UINT total_length;
363 UINT data_length;
364 UINT buffer_length;
365 USHORT buffer_offset;
366 ULONG csum;
367 PUSHORT csum_ptr;
368 USHORT remaining;
369 USHORT ip4_length;
370 BOOLEAN csum_span = TRUE; /* when the USHORT to be checksummed spans a buffer */
372 //FUNCTION_ENTER();
374 NdisGetFirstBufferFromPacketSafe(packet, &mdl, &buffer, &buffer_length, &total_length, NormalPagePriority);
375 if (!buffer) {
376 FUNCTION_MSG("NdisGetFirstBufferFromPacketSafe failed, buffer == NULL\n");
377 return FALSE;
378 }
379 ASSERT(mdl);
381 ip4_length = GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 2]);
382 data_length = ip4_length + XN_HDR_SIZE;
384 if ((USHORT)data_length > total_length) {
385 FUNCTION_MSG("Size Mismatch %d (ip4_length + XN_HDR_SIZE) != %d (total_length)\n", ip4_length + XN_HDR_SIZE, total_length);
386 return FALSE;
387 }
389 switch (pi->ip_proto)
390 {
391 case 6:
392 ASSERT(buffer_length >= (USHORT)(XN_HDR_SIZE + pi->ip4_header_length + 17));
393 csum_ptr = (USHORT *)&buffer[XN_HDR_SIZE + pi->ip4_header_length + 16];
394 break;
395 case 17:
396 ASSERT(buffer_length >= (USHORT)(XN_HDR_SIZE + pi->ip4_header_length + 7));
397 csum_ptr = (USHORT *)&buffer[XN_HDR_SIZE + pi->ip4_header_length + 6];
398 break;
399 default:
400 KdPrint((__DRIVER_NAME " Don't know how to calc sum for IP Proto %d\n", pi->ip_proto));
401 //FUNCTION_EXIT();
402 return FALSE; // should never happen
403 }
405 if (set_csum)
406 *csum_ptr = 0;
408 csum = 0;
409 csum += GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 12]) + GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 14]); // src
410 csum += GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 16]) + GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 18]); // dst
411 csum += ((USHORT)buffer[XN_HDR_SIZE + 9]);
413 remaining = ip4_length - pi->ip4_header_length;
415 csum += remaining;
417 csum_span = FALSE;
418 buffer_offset = i = XN_HDR_SIZE + pi->ip4_header_length;
419 while (i < data_length)
420 {
421 /* don't include the checksum field itself in the calculation */
422 if ((pi->ip_proto == 6 && i == XN_HDR_SIZE + pi->ip4_header_length + 16) || (pi->ip_proto == 17 && i == XN_HDR_SIZE + pi->ip4_header_length + 6))
423 {
424 /* we know that this always happens in the header buffer so we are guaranteed the full two bytes */
425 i += 2;
426 buffer_offset += 2;
427 continue;
428 }
429 if (csum_span)
430 {
431 /* the other half of the next bit */
432 ASSERT(buffer_offset == 0);
433 csum += (USHORT)buffer[buffer_offset];
434 csum_span = FALSE;
435 i += 1;
436 buffer_offset += 1;
437 }
438 else if (buffer_offset == buffer_length - 1)
439 {
440 /* deal with a buffer ending on an odd byte boundary */
441 csum += (USHORT)buffer[buffer_offset] << 8;
442 csum_span = TRUE;
443 i += 1;
444 buffer_offset += 1;
445 }
446 else
447 {
448 csum += GET_NET_PUSHORT(&buffer[buffer_offset]);
449 i += 2;
450 buffer_offset += 2;
451 }
452 if (buffer_offset == buffer_length && i < total_length)
453 {
454 NdisGetNextBuffer(mdl, &mdl);
455 if (mdl == NULL)
456 {
457 KdPrint((__DRIVER_NAME " Ran out of buffers\n"));
458 return FALSE; // should never happen
459 }
460 NdisQueryBufferSafe(mdl, &buffer, &buffer_length, NormalPagePriority);
461 ASSERT(buffer_length);
462 buffer_offset = 0;
463 }
464 }
466 while (csum & 0xFFFF0000)
467 csum = (csum & 0xFFFF) + (csum >> 16);
469 if (set_csum)
470 {
471 *csum_ptr = (USHORT)~GET_NET_USHORT((USHORT)csum);
472 }
473 else
474 {
475 //FUNCTION_EXIT();
476 return (BOOLEAN)(*csum_ptr == (USHORT)~GET_NET_USHORT((USHORT)csum));
477 }
478 //FUNCTION_EXIT();
479 return TRUE;
480 }
482 static ULONG
483 XenNet_MakePackets(
484 struct xennet_info *xi,
485 PLIST_ENTRY rx_packet_list,
486 packet_info_t *pi
487 )
488 {
489 ULONG packet_count = 0;
490 PNDIS_PACKET packet;
491 PLIST_ENTRY entry;
492 UCHAR psh;
493 PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
494 ULONG parse_result;
495 //PNDIS_BUFFER buffer;
496 shared_buffer_t *page_buf;
498 //FUNCTION_ENTER();
500 parse_result = XenNet_ParsePacketHeader(pi, NULL, 0);
502 if (!XenNet_FilterAcceptPacket(xi, pi))
503 {
504 goto done;
505 }
507 switch (pi->ip_proto)
508 {
509 case 6: // TCP
510 if (pi->split_required)
511 break;
512 // fallthrough
513 case 17: // UDP
514 packet = XenNet_MakePacket(xi, pi);
515 if (packet == NULL)
516 {
517 //KdPrint((__DRIVER_NAME " Ran out of packets\n"));
518 xi->stat_rx_no_buffer++;
519 packet_count = 0;
520 goto done;
521 }
522 if (parse_result == PARSE_OK)
523 {
524 BOOLEAN checksum_offload = FALSE;
525 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
526 packet, TcpIpChecksumPacketInfo);
527 ASSERT(csum_info->Value == 0);
528 if (pi->csum_blank || pi->data_validated)
529 {
530 /* we know this is IPv4, and we know Linux always validates the IPv4 checksum for us */
531 if (xi->setting_csum.V4Receive.IpChecksum)
532 {
533 if (!pi->ip_has_options || xi->setting_csum.V4Receive.IpOptionsSupported)
534 {
535 if (XenNet_CheckIpHeader(pi->header, pi->ip4_header_length))
536 csum_info->Receive.NdisPacketIpChecksumSucceeded = TRUE;
537 else
538 csum_info->Receive.NdisPacketIpChecksumFailed = TRUE;
539 }
540 }
541 if (xi->setting_csum.V4Receive.TcpChecksum && pi->ip_proto == 6)
542 {
543 if (!pi->tcp_has_options || xi->setting_csum.V4Receive.TcpOptionsSupported)
544 {
545 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
546 checksum_offload = TRUE;
547 }
548 }
549 else if (xi->setting_csum.V4Receive.UdpChecksum && pi->ip_proto == 17)
550 {
551 csum_info->Receive.NdisPacketUdpChecksumSucceeded = TRUE;
552 checksum_offload = TRUE;
553 }
554 if (pi->csum_blank && (!xi->config_csum_rx_dont_fix || !checksum_offload))
555 {
556 XenNet_SumPacketData(pi, packet, TRUE);
557 }
558 }
559 else if (xi->config_csum_rx_check && pi->ip_version == 4)
560 {
561 if (xi->setting_csum.V4Receive.IpChecksum)
562 {
563 if (!pi->ip_has_options || xi->setting_csum.V4Receive.IpOptionsSupported)
564 {
565 if (XenNet_CheckIpHeader(pi->header, pi->ip4_header_length))
566 csum_info->Receive.NdisPacketIpChecksumSucceeded = TRUE;
567 else
568 csum_info->Receive.NdisPacketIpChecksumFailed = TRUE;
569 }
570 }
571 if (xi->setting_csum.V4Receive.TcpChecksum && pi->ip_proto == 6)
572 {
573 if (!pi->tcp_has_options || xi->setting_csum.V4Receive.TcpOptionsSupported)
574 {
575 if (XenNet_SumPacketData(pi, packet, FALSE))
576 {
577 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
578 }
579 else
580 {
581 csum_info->Receive.NdisPacketTcpChecksumFailed = TRUE;
582 }
583 }
584 }
585 else if (xi->setting_csum.V4Receive.UdpChecksum && pi->ip_proto == 17)
586 {
587 if (XenNet_SumPacketData(pi, packet, FALSE))
588 {
589 csum_info->Receive.NdisPacketUdpChecksumSucceeded = TRUE;
590 }
591 else
592 {
593 csum_info->Receive.NdisPacketUdpChecksumFailed = TRUE;
594 }
595 }
596 }
597 }
598 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
599 InsertTailList(rx_packet_list, entry);
600 packet_count = 1;
601 goto done;
602 default:
603 packet = XenNet_MakePacket(xi, pi);
604 if (packet == NULL)
605 {
606 //KdPrint((__DRIVER_NAME " Ran out of packets\n"));
607 xi->stat_rx_no_buffer++;
608 packet_count = 0;
609 goto done;
610 }
611 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
612 InsertTailList(rx_packet_list, entry);
613 packet_count = 1;
614 goto done;
615 }
617 pi->tcp_remaining = pi->tcp_length;
619 /* we can make certain assumptions here as the following code is only for tcp4 */
620 psh = pi->header[XN_HDR_SIZE + pi->ip4_header_length + 13] & 8;
621 while (pi->tcp_remaining)
622 {
623 PUCHAR header_va;
624 PMDL mdl;
625 UINT total_length;
626 UINT buffer_length;
627 packet = XenNet_MakePacket(xi, pi);
628 if (!packet)
629 {
630 //KdPrint((__DRIVER_NAME " Ran out of packets\n"));
631 xi->stat_rx_no_buffer++;
632 break; /* we are out of memory - just drop the packets */
633 }
634 if (xi->setting_csum.V4Receive.TcpChecksum)
635 {
636 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
637 packet, TcpIpChecksumPacketInfo);
638 csum_info->Receive.NdisPacketIpChecksumSucceeded = TRUE;
639 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
640 }
641 if (psh)
642 {
643 NdisGetFirstBufferFromPacketSafe(packet, &mdl, &header_va, &buffer_length, &total_length, NormalPagePriority);
644 if (pi->tcp_remaining)
645 header_va[XN_HDR_SIZE + pi->ip4_header_length + 13] &= ~8;
646 else
647 header_va[XN_HDR_SIZE + pi->ip4_header_length + 13] |= 8;
648 }
649 XenNet_SumPacketData(pi, packet, TRUE);
650 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
651 InsertTailList(rx_packet_list, entry);
652 packet_count++;
653 }
655 done:
656 page_buf = pi->first_pb;
657 while (page_buf)
658 {
659 shared_buffer_t *next_pb;
661 next_pb = page_buf->next;
662 put_pb_on_freelist(xi, page_buf);
663 page_buf = next_pb;
664 }
665 XenNet_ClearPacketInfo(pi);
666 //FUNCTION_EXIT();
667 return packet_count;
668 }
671 /* called at DISPATCH_LEVEL */
672 /* it's okay for return packet to be called while resume_state != RUNNING as the packet will simply be added back to the freelist, the grants will be fixed later */
673 VOID
674 XenNet_ReturnPacket(
675 IN NDIS_HANDLE MiniportAdapterContext,
676 IN PNDIS_PACKET Packet
677 )
678 {
679 struct xennet_info *xi = MiniportAdapterContext;
680 PNDIS_BUFFER buffer;
681 shared_buffer_t *page_buf = *(shared_buffer_t **)&Packet->MiniportReservedEx[0];
683 //FUNCTION_ENTER();
685 //KdPrint((__DRIVER_NAME " page_buf = %p\n", page_buf));
687 NdisUnchainBufferAtFront(Packet, &buffer);
689 while (buffer)
690 {
691 shared_buffer_t *next_buf;
692 ASSERT(page_buf);
693 next_buf = page_buf->next;
694 if (!page_buf->virtual)
695 {
696 /* this isn't actually a share_buffer, it is some memory allocated for the header - just free it */
697 PUCHAR va;
698 UINT len;
699 #pragma warning(suppress:28193) /* va is valid because it was mapped earlier */
700 NdisQueryBufferSafe(buffer, &va, &len, NormalPagePriority);
701 NdisFreeToNPagedLookasideList(&xi->rx_lookaside_list, va - sizeof(shared_buffer_t));
702 NdisFreeBuffer(buffer);
703 }
704 else
705 {
706 //KdPrint((__DRIVER_NAME " returning page_buf %p with id %d\n", page_buf, page_buf->id));
707 if (buffer != page_buf->buffer)
708 NdisFreeBuffer(buffer);
709 put_pb_on_freelist(xi, page_buf);
710 }
711 NdisUnchainBufferAtFront(Packet, &buffer);
712 page_buf = next_buf;
713 }
715 put_packet_on_freelist(xi, Packet);
716 if (!InterlockedDecrement(&xi->rx_outstanding)) {
717 if (xi->rx_shutting_down) {
718 KeSetEvent(&xi->packet_returned_event, IO_NO_INCREMENT, FALSE);
719 } else {
720 /* check performance of this - only happens on ring empty */
721 KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
722 XenNet_FillRing(xi);
723 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
724 }
725 }
726 //FUNCTION_EXIT();
727 }
729 #define MAXIMUM_PACKETS_PER_INDICATE 32
731 /* We limit the number of packets per interrupt so that acks get a chance
732 under high rx load. The DPC is immediately re-scheduled */
733 #define MAXIMUM_PACKETS_PER_INTERRUPT 32 /* this is calculated before large packet split */
734 #define MAXIMUM_DATA_PER_INTERRUPT (MAXIMUM_PACKETS_PER_INTERRUPT * 1500) /* help account for large packets */
736 // Called at DISPATCH_LEVEL
737 BOOLEAN
738 XenNet_RxBufferCheck(struct xennet_info *xi)
739 {
740 RING_IDX cons, prod;
741 LIST_ENTRY rx_packet_list;
742 LIST_ENTRY rx_header_only_packet_list;
743 PLIST_ENTRY entry;
744 PNDIS_PACKET packets[MAXIMUM_PACKETS_PER_INDICATE];
745 ULONG packet_count = 0;
746 ULONG buffer_count = 0;
747 ULONG packet_data = 0;
748 ULONG interim_packet_data = 0;
749 struct netif_extra_info *ei;
750 USHORT id;
751 int more_to_do = FALSE;
752 packet_info_t *pi = &xi->rxpi[KeGetCurrentProcessorNumber() & 0xff];
753 shared_buffer_t *page_buf;
754 shared_buffer_t *head_buf = NULL;
755 shared_buffer_t *tail_buf = NULL;
756 shared_buffer_t *last_buf = NULL;
757 BOOLEAN extra_info_flag = FALSE;
758 BOOLEAN more_data_flag = FALSE;
759 PNDIS_BUFFER buffer;
760 BOOLEAN dont_set_event;
762 //FUNCTION_ENTER();
764 if (!xi->connected)
765 return FALSE; /* a delayed DPC could let this come through... just do nothing */
767 InitializeListHead(&rx_packet_list);
769 /* get all the buffers off the ring as quickly as possible so the lock is held for a minimum amount of time */
771 KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
773 if (xi->rx_shutting_down)
774 {
775 /* there is a chance that our Dpc had been queued just before the shutdown... */
776 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
777 return FALSE;
778 }
780 if (xi->rx_partial_buf)
781 {
782 head_buf = xi->rx_partial_buf;
783 tail_buf = xi->rx_partial_buf;
784 while (tail_buf->next)
785 tail_buf = tail_buf->next;
786 more_data_flag = xi->rx_partial_more_data_flag;
787 extra_info_flag = xi->rx_partial_extra_info_flag;
788 xi->rx_partial_buf = NULL;
789 }
791 do {
792 prod = xi->rx.sring->rsp_prod;
793 KeMemoryBarrier(); /* Ensure we see responses up to 'prod'. */
795 for (cons = xi->rx.rsp_cons; cons != prod && packet_count < MAXIMUM_PACKETS_PER_INTERRUPT && packet_data < MAXIMUM_DATA_PER_INTERRUPT; cons++)
796 {
797 id = (USHORT)(cons & (NET_RX_RING_SIZE - 1));
798 page_buf = xi->rx_ring_pbs[id];
799 ASSERT(page_buf);
800 xi->rx_ring_pbs[id] = NULL;
801 xi->rx_id_free++;
802 memcpy(&page_buf->rsp, RING_GET_RESPONSE(&xi->rx, cons), max(sizeof(struct netif_rx_response), sizeof(struct netif_extra_info)));
803 if (!extra_info_flag)
804 {
805 if (page_buf->rsp.status <= 0
806 || page_buf->rsp.offset + page_buf->rsp.status > PAGE_SIZE)
807 {
808 KdPrint((__DRIVER_NAME " Error: rsp offset %d, size %d\n",
809 page_buf->rsp.offset, page_buf->rsp.status));
810 ASSERT(!extra_info_flag);
811 put_pb_on_freelist(xi, page_buf);
812 continue;
813 }
814 }
816 if (!head_buf)
817 {
818 head_buf = page_buf;
819 tail_buf = page_buf;
820 }
821 else
822 {
823 tail_buf->next = page_buf;
824 tail_buf = page_buf;
825 }
826 page_buf->next = NULL;
828 if (extra_info_flag)
829 {
830 ei = (struct netif_extra_info *)&page_buf->rsp;
831 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
832 }
833 else
834 {
835 more_data_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_more_data);
836 extra_info_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_extra_info);
837 interim_packet_data += page_buf->rsp.status;
838 }
840 if (!extra_info_flag && !more_data_flag)
841 {
842 last_buf = page_buf;
843 packet_count++;
844 packet_data += interim_packet_data;
845 interim_packet_data = 0;
846 }
847 buffer_count++;
848 }
849 xi->rx.rsp_cons = cons;
851 /* Give netback more buffers */
852 XenNet_FillRing(xi);
854 if (packet_count >= MAXIMUM_PACKETS_PER_INTERRUPT || packet_data >= MAXIMUM_DATA_PER_INTERRUPT)
855 break;
857 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
858 if (!more_to_do)
859 {
860 xi->rx.sring->rsp_event = xi->rx.rsp_cons + 1;
861 KeMemoryBarrier();
862 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
863 }
864 } while (more_to_do);
866 /* anything past last_buf belongs to an incomplete packet... */
867 if (last_buf && last_buf->next)
868 {
869 KdPrint((__DRIVER_NAME " Partial receive\n"));
870 xi->rx_partial_buf = last_buf->next;
871 xi->rx_partial_more_data_flag = more_data_flag;
872 xi->rx_partial_extra_info_flag = extra_info_flag;
873 last_buf->next = NULL;
874 }
876 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
878 if (packet_count >= MAXIMUM_PACKETS_PER_INTERRUPT || packet_data >= MAXIMUM_DATA_PER_INTERRUPT)
879 {
880 /* fire again immediately */
881 /* we want the Dpc on the end of the queue. By definition we are already on the right CPU so we know the Dpc queue will be run immediately */
882 KeSetImportanceDpc(&xi->rxtx_dpc, MediumImportance);
883 KeInsertQueueDpc(&xi->rxtx_dpc, NULL, NULL);
884 /* dont set an event in TX path */
885 dont_set_event = TRUE;
886 }
887 else
888 {
889 /* make sure the Dpc queue is run immediately next interrupt */
890 KeSetImportanceDpc(&xi->rxtx_dpc, HighImportance);
891 /* set an event in TX path */
892 dont_set_event = FALSE;
893 }
895 /* make packets out of the buffers */
896 page_buf = head_buf;
897 extra_info_flag = FALSE;
898 more_data_flag = FALSE;
899 while (page_buf)
900 {
901 shared_buffer_t *next_buf = page_buf->next;
903 page_buf->next = NULL;
904 if (extra_info_flag)
905 {
906 //KdPrint((__DRIVER_NAME " processing extra info\n"));
907 ei = (struct netif_extra_info *)&page_buf->rsp;
908 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
909 switch (ei->type)
910 {
911 case XEN_NETIF_EXTRA_TYPE_GSO:
912 switch (ei->u.gso.type)
913 {
914 case XEN_NETIF_GSO_TYPE_TCPV4:
915 pi->mss = ei->u.gso.size;
916 //KdPrint((__DRIVER_NAME " mss = %d\n", pi->mss));
917 // TODO - put this assertion somewhere ASSERT(header_len + pi->mss <= PAGE_SIZE); // this limits MTU to PAGE_SIZE - XN_HEADER_LEN
918 break;
919 default:
920 KdPrint((__DRIVER_NAME " Unknown GSO type (%d) detected\n", ei->u.gso.type));
921 break;
922 }
923 break;
924 default:
925 KdPrint((__DRIVER_NAME " Unknown extra info type (%d) detected\n", ei->type));
926 break;
927 }
928 put_pb_on_freelist(xi, page_buf);
929 }
930 else
931 {
932 ASSERT(!page_buf->rsp.offset);
933 if (!more_data_flag) // handling the packet's 1st buffer
934 {
935 if (page_buf->rsp.flags & NETRXF_csum_blank)
936 pi->csum_blank = TRUE;
937 if (page_buf->rsp.flags & NETRXF_data_validated)
938 pi->data_validated = TRUE;
939 }
940 buffer = page_buf->buffer;
941 NdisAdjustBufferLength(buffer, page_buf->rsp.status);
942 //KdPrint((__DRIVER_NAME " buffer = %p, pb = %p\n", buffer, page_buf));
943 if (pi->first_pb)
944 {
945 ASSERT(pi->curr_pb);
946 //KdPrint((__DRIVER_NAME " additional buffer\n"));
947 pi->curr_pb->next = page_buf;
948 pi->curr_pb = page_buf;
949 ASSERT(pi->curr_buffer);
950 NDIS_BUFFER_LINKAGE(pi->curr_buffer) = buffer;
951 pi->curr_buffer = buffer;
952 }
953 else
954 {
955 pi->first_pb = page_buf;
956 pi->curr_pb = page_buf;
957 pi->first_buffer = buffer;
958 pi->curr_buffer = buffer;
959 }
960 pi->mdl_count++;
961 extra_info_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_extra_info);
962 more_data_flag = (BOOLEAN)(page_buf->rsp.flags & NETRXF_more_data);
963 pi->total_length = pi->total_length + page_buf->rsp.status;
964 }
966 /* Packet done, add it to the list */
967 if (!more_data_flag && !extra_info_flag)
968 {
969 pi->curr_pb = pi->first_pb;
970 pi->curr_buffer = pi->first_buffer;
971 XenNet_MakePackets(xi, &rx_packet_list, pi);
972 }
974 page_buf = next_buf;
975 }
976 ASSERT(!more_data_flag && !extra_info_flag);
978 xi->stat_rx_ok += packet_count;
980 /* indicate packets to NDIS */
981 entry = RemoveHeadList(&rx_packet_list);
982 InitializeListHead(&rx_header_only_packet_list);
983 packet_count = 0;
985 while (entry != &rx_packet_list) {
986 PNDIS_PACKET packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
987 NDIS_STATUS status;
988 ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
989 status = NDIS_GET_PACKET_STATUS(packet);
990 if (status == NDIS_STATUS_RESOURCES)
991 InsertTailList(&rx_header_only_packet_list, entry);
992 packets[packet_count++] = packet;
993 InterlockedIncrement(&xi->rx_outstanding);
994 entry = RemoveHeadList(&rx_packet_list);
995 /* if we indicate a packet with NDIS_STATUS_RESOURCES then any following packet can't be NDIS_STATUS_SUCCESS */
996 if (packet_count == MAXIMUM_PACKETS_PER_INDICATE || entry == &rx_packet_list
997 || (NDIS_GET_PACKET_STATUS(CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)])) == NDIS_STATUS_SUCCESS
998 && status == NDIS_STATUS_RESOURCES))
999 {
1000 NdisMIndicateReceivePacket(xi->adapter_handle, packets, packet_count);
1001 packet_count = 0;
1004 /* now return the packets for which we indicated NDIS_STATUS_RESOURCES */
1005 entry = RemoveHeadList(&rx_header_only_packet_list);
1006 while (entry != &rx_header_only_packet_list) {
1007 PNDIS_PACKET packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
1008 entry = RemoveHeadList(&rx_header_only_packet_list);
1009 XenNet_ReturnPacket(xi, packet);
1012 return dont_set_event;
1013 //FUNCTION_EXIT();
1016 /*
1017 Free all Rx buffers (on halt, for example)
1018 The ring must be stopped at this point.
1019 */
1021 static VOID
1022 XenNet_PurgeRing(struct xennet_info *xi)
1024 int i;
1025 for (i = 0; i < NET_RX_RING_SIZE; i++)
1027 if (xi->rx_ring_pbs[i] != NULL)
1029 put_pb_on_freelist(xi, xi->rx_ring_pbs[i]);
1030 xi->rx_ring_pbs[i] = NULL;
1035 static VOID
1036 XenNet_BufferFree(struct xennet_info *xi)
1038 shared_buffer_t *pb;
1040 XenNet_PurgeRing(xi);
1042 while ((pb = get_pb_from_freelist(xi)) != NULL)
1044 NdisFreeBuffer(pb->buffer);
1045 xi->vectors.GntTbl_EndAccess(xi->vectors.context,
1046 pb->gref, FALSE, (ULONG)'XNRX');
1047 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
1048 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
1052 VOID
1053 XenNet_RxResumeStart(xennet_info_t *xi)
1055 KIRQL old_irql;
1057 FUNCTION_ENTER();
1059 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1060 XenNet_PurgeRing(xi);
1061 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1063 FUNCTION_EXIT();
1066 VOID
1067 XenNet_BufferAlloc(xennet_info_t *xi)
1069 //NDIS_STATUS status;
1070 int i;
1072 xi->rx_id_free = NET_RX_RING_SIZE;
1073 xi->rx_outstanding = 0;
1075 for (i = 0; i < NET_RX_RING_SIZE; i++)
1077 xi->rx_ring_pbs[i] = NULL;
1081 VOID
1082 XenNet_RxResumeEnd(xennet_info_t *xi)
1084 KIRQL old_irql;
1086 FUNCTION_ENTER();
1088 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1089 //XenNet_BufferAlloc(xi);
1090 XenNet_FillRing(xi);
1091 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1093 FUNCTION_EXIT();
1096 BOOLEAN
1097 XenNet_RxInit(xennet_info_t *xi)
1099 NDIS_STATUS status;
1101 FUNCTION_ENTER();
1103 xi->rx_shutting_down = FALSE;
1104 KeInitializeSpinLock(&xi->rx_lock);
1105 KeInitializeEvent(&xi->packet_returned_event, SynchronizationEvent, FALSE);
1106 KeInitializeTimer(&xi->rx_timer);
1107 status = NdisAllocateMemoryWithTag((PVOID)&xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), XENNET_POOL_TAG);
1108 if (status != NDIS_STATUS_SUCCESS)
1110 KdPrint(("NdisAllocateMemoryWithTag failed with 0x%x\n", status));
1111 return FALSE;
1113 NdisZeroMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount());
1115 stack_new(&xi->rx_pb_stack, NET_RX_RING_SIZE * 4);
1117 XenNet_BufferAlloc(xi);
1119 NdisAllocatePacketPool(&status, &xi->rx_packet_pool, NET_RX_RING_SIZE * 4,
1120 PROTOCOL_RESERVED_SIZE_IN_PACKET);
1121 if (status != NDIS_STATUS_SUCCESS)
1123 KdPrint(("NdisAllocatePacketPool failed with 0x%x\n", status));
1124 return FALSE;
1126 stack_new(&xi->rx_packet_stack, NET_RX_RING_SIZE * 4);
1128 NdisInitializeNPagedLookasideList(&xi->rx_lookaside_list, NULL, NULL, 0,
1129 MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH + sizeof(shared_buffer_t), XENNET_POOL_TAG, 0);
1131 XenNet_FillRing(xi);
1133 FUNCTION_EXIT();
1135 return TRUE;
1138 BOOLEAN
1139 XenNet_RxShutdown(xennet_info_t *xi)
1141 KIRQL old_irql;
1142 PNDIS_PACKET packet;
1144 FUNCTION_ENTER();
1146 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1147 xi->rx_shutting_down = TRUE;
1148 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1150 if (xi->config_rx_interrupt_moderation)
1152 KeCancelTimer(&xi->rx_timer);
1155 #if (NTDDI_VERSION >= NTDDI_WINXP)
1156 KeFlushQueuedDpcs();
1157 #endif
1159 while (xi->rx_outstanding)
1161 KdPrint((__DRIVER_NAME " Waiting for all packets to be returned\n"));
1162 KeWaitForSingleObject(&xi->packet_returned_event, Executive, KernelMode, FALSE, NULL);
1165 //KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1167 NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
1169 XenNet_BufferFree(xi);
1171 /* this works because get_packet_from_freelist won't allocate new packets when rx_shutting_down */
1172 while ((packet = get_packet_from_freelist(xi)) != NULL)
1173 NdisFreePacket(packet);
1174 stack_delete(xi->rx_packet_stack, NULL, NULL);
1175 NdisFreePacketPool(xi->rx_packet_pool);
1177 NdisDeleteNPagedLookasideList(&xi->rx_lookaside_list);
1179 stack_delete(xi->rx_pb_stack, NULL, NULL);
1180 //KeReleaseSpinLock(&xi->rx_lock, old_irql);
1182 FUNCTION_EXIT();
1184 return TRUE;