win-pvdrivers

view xennet/xennet_rx.c @ 875:8d7560d67376

xennet rx performance increases. not well tested yet. measurably faster and more scalable under SMP.
author James Harper <james.harper@bendigoit.com.au>
date Sun Mar 13 10:13:35 2011 +1100 (2011-03-13)
parents c3e9661e4f7e
children 254b8424e23b
line source
1 /*
2 PV Net Driver for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #include "xennet.h"
23 /* Not really necessary but keeps PREfast happy */
24 #if (NTDDI_VERSION >= NTDDI_WINXP)
25 static KDEFERRED_ROUTINE XenNet_RxBufferCheck;
26 #endif
28 LONG rx_pb_outstanding = 0;
30 static __inline shared_buffer_t *
31 get_pb_from_freelist(struct xennet_info *xi)
32 {
33 NDIS_STATUS status;
34 shared_buffer_t *pb;
35 PVOID ptr_ref;
37 if (stack_pop(xi->rx_pb_stack, &ptr_ref))
38 {
39 pb = ptr_ref;
40 pb->ref_count = 1;
41 InterlockedDecrement(&xi->rx_pb_free);
42 InterlockedIncrement(&rx_pb_outstanding);
43 return pb;
44 }
46 status = NdisAllocateMemoryWithTag(&pb, sizeof(shared_buffer_t), XENNET_POOL_TAG);
47 if (status != STATUS_SUCCESS)
48 {
49 return NULL;
50 }
51 status = NdisAllocateMemoryWithTag(&pb->virtual, PAGE_SIZE, XENNET_POOL_TAG);
52 if (status != STATUS_SUCCESS)
53 {
54 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
55 return NULL;
56 }
57 pb->gref = (grant_ref_t)xi->vectors.GntTbl_GrantAccess(xi->vectors.context, 0,
58 (ULONG)(MmGetPhysicalAddress(pb->virtual).QuadPart >> PAGE_SHIFT), FALSE, INVALID_GRANT_REF, (ULONG)'XNRX');
59 if (pb->gref == INVALID_GRANT_REF)
60 {
61 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
62 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
63 return NULL;
64 }
65 pb->offset = (USHORT)(ULONG_PTR)pb->virtual & (PAGE_SIZE - 1);
66 NdisAllocateBuffer(&status, &pb->buffer, xi->rx_buffer_pool, (PUCHAR)pb->virtual, PAGE_SIZE);
67 if (status != STATUS_SUCCESS)
68 {
69 xi->vectors.GntTbl_EndAccess(xi->vectors.context,
70 pb->gref, FALSE, (ULONG)'XNRX');
71 NdisFreeMemory(pb, sizeof(shared_buffer_t), 0);
72 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
73 return NULL;
74 }
75 InterlockedIncrement(&rx_pb_outstanding);
76 pb->ref_count = 1;
77 return pb;
78 }
80 static __inline VOID
81 ref_pb(struct xennet_info *xi, shared_buffer_t *pb)
82 {
83 UNREFERENCED_PARAMETER(xi);
84 InterlockedIncrement(&pb->ref_count);
85 }
87 static __inline VOID
88 put_pb_on_freelist(struct xennet_info *xi, shared_buffer_t *pb)
89 {
90 if (InterlockedDecrement(&pb->ref_count) == 0)
91 {
92 NdisAdjustBufferLength(pb->buffer, PAGE_SIZE);
93 NDIS_BUFFER_LINKAGE(pb->buffer) = NULL;
94 pb->next = NULL;
95 stack_push(xi->rx_pb_stack, pb);
96 InterlockedIncrement(&xi->rx_pb_free);
97 InterlockedDecrement(&rx_pb_outstanding);
98 }
99 }
101 // Called at DISPATCH_LEVEL with rx lock held
102 static NDIS_STATUS
103 XenNet_FillRing(struct xennet_info *xi)
104 {
105 unsigned short id;
106 shared_buffer_t *page_buf;
107 ULONG i, notify;
108 ULONG batch_target;
109 RING_IDX req_prod = xi->rx.req_prod_pvt;
110 netif_rx_request_t *req;
112 //FUNCTION_ENTER();
114 batch_target = xi->rx_target - (req_prod - xi->rx.rsp_cons);
116 if (batch_target < (xi->rx_target >> 2))
117 {
118 //FUNCTION_EXIT();
119 return NDIS_STATUS_SUCCESS; /* only refill if we are less than 3/4 full already */
120 }
122 for (i = 0; i < batch_target; i++)
123 {
124 page_buf = get_pb_from_freelist(xi);
125 if (!page_buf)
126 {
127 KdPrint((__DRIVER_NAME " Added %d out of %d buffers to rx ring (no free pages)\n", i, batch_target));
128 break;
129 }
130 xi->rx_id_free--;
132 /* Give to netback */
133 id = (USHORT)((req_prod + i) & (NET_RX_RING_SIZE - 1));
134 ASSERT(xi->rx_ring_pbs[id] == NULL);
135 xi->rx_ring_pbs[id] = page_buf;
136 req = RING_GET_REQUEST(&xi->rx, req_prod + i);
137 req->id = id;
138 req->gref = page_buf->gref;
139 ASSERT(req->gref != INVALID_GRANT_REF);
140 }
141 KeMemoryBarrier();
142 xi->rx.req_prod_pvt = req_prod + i;
143 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xi->rx, notify);
144 if (notify)
145 {
146 xi->vectors.EvtChn_Notify(xi->vectors.context, xi->event_channel);
147 }
149 //FUNCTION_EXIT();
151 return NDIS_STATUS_SUCCESS;
152 }
154 LONG total_allocated_packets = 0;
155 LARGE_INTEGER last_print_time;
157 /* lock free */
158 static PNDIS_PACKET
159 get_packet_from_freelist(struct xennet_info *xi)
160 {
161 NDIS_STATUS status;
162 PNDIS_PACKET packet;
164 NdisAllocatePacket(&status, &packet, xi->rx_packet_pool);
165 if (status != NDIS_STATUS_SUCCESS)
166 {
167 KdPrint((__DRIVER_NAME " cannot allocate packet\n"));
168 return NULL;
169 }
170 NDIS_SET_PACKET_HEADER_SIZE(packet, XN_HDR_SIZE);
171 NdisZeroMemory(packet->MiniportReservedEx, sizeof(packet->MiniportReservedEx));
172 InterlockedIncrement(&total_allocated_packets);
173 return packet;
174 }
176 /* lock free */
177 static VOID
178 put_packet_on_freelist(struct xennet_info *xi, PNDIS_PACKET packet)
179 {
180 LARGE_INTEGER current_time;
182 InterlockedDecrement(&total_allocated_packets);
183 NdisFreePacket(packet);
184 KeQuerySystemTime(&current_time);
185 if ((int)total_allocated_packets < 0 || (current_time.QuadPart - last_print_time.QuadPart) / 10000 > 1000)
186 {
187 last_print_time.QuadPart = current_time.QuadPart;
188 KdPrint(("total_allocated_packets = %d, rx_pb_outstanding = %d, rx_pb_free = %d\n", total_allocated_packets, rx_pb_outstanding, xi->rx_pb_free));
189 }
190 }
192 static PNDIS_PACKET
193 XenNet_MakePacket(struct xennet_info *xi, packet_info_t *pi)
194 {
195 NDIS_STATUS status;
196 PNDIS_PACKET packet;
197 PNDIS_BUFFER out_buffer;
198 USHORT new_ip4_length;
199 PUCHAR header_va;
200 ULONG out_remaining;
201 ULONG tcp_length;
202 ULONG header_extra;
203 shared_buffer_t *header_buf;
205 //FUNCTION_ENTER();
207 packet = get_packet_from_freelist(xi);
208 if (packet == NULL)
209 {
210 /* buffers will be freed in MakePackets */
211 KdPrint((__DRIVER_NAME " No free packets\n"));
212 //FUNCTION_EXIT();
213 return NULL;
214 }
216 header_buf = NdisAllocateFromNPagedLookasideList(&xi->rx_lookaside_list);
217 if (!header_buf)
218 {
219 KdPrint((__DRIVER_NAME " No free header buffers\n"));
220 put_packet_on_freelist(xi, packet);
221 return NULL;
222 }
223 header_va = (PUCHAR)(header_buf + 1);
224 NdisZeroMemory(header_buf, sizeof(shared_buffer_t));
225 NdisMoveMemory(header_va, pi->header, pi->header_length);
226 //KdPrint((__DRIVER_NAME " header_length = %d, current_lookahead = %d\n", pi->header_length, xi->current_lookahead));
227 //KdPrint((__DRIVER_NAME " ip4_header_length = %d\n", pi->ip4_header_length));
228 //KdPrint((__DRIVER_NAME " tcp_header_length = %d\n", pi->tcp_header_length));
229 /* make sure we satisfy the lookahead requirement */
230 XenNet_BuildHeader(pi, header_va, max(MIN_LOOKAHEAD_LENGTH, xi->current_lookahead) + MAX_ETH_HEADER_LENGTH);
231 header_extra = pi->header_length - (MAX_ETH_HEADER_LENGTH + pi->ip4_header_length + pi->tcp_header_length);
232 ASSERT(pi->header_length <= MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH);
233 NdisAllocateBuffer(&status, &out_buffer, xi->rx_buffer_pool, header_va, pi->header_length);
234 if (status != STATUS_SUCCESS)
235 {
236 KdPrint((__DRIVER_NAME " No free header buffers\n"));
237 NdisFreeToNPagedLookasideList(&xi->rx_lookaside_list, header_buf);
238 put_packet_on_freelist(xi, packet);
239 return NULL;
240 }
241 NdisChainBufferAtBack(packet, out_buffer);
242 *(shared_buffer_t **)&packet->MiniportReservedEx[0] = header_buf;
243 header_buf->next = pi->curr_pb;
245 // TODO: if there are only a few bytes left on the first buffer then add them to the header buffer too... maybe
247 //KdPrint((__DRIVER_NAME " split_required = %d\n", pi->split_required));
248 //KdPrint((__DRIVER_NAME " tcp_length = %d, mss = %d\n", pi->tcp_length, pi->mss));
249 //KdPrint((__DRIVER_NAME " total_length = %d\n", pi->total_length));
250 //KdPrint((__DRIVER_NAME " header_length = %d\n", pi->header_length));
251 //KdPrint((__DRIVER_NAME " header_extra = %d\n", header_extra));
252 if (pi->split_required)
253 {
254 tcp_length = (USHORT)min(pi->mss, pi->tcp_remaining);
255 new_ip4_length = (USHORT)(pi->ip4_header_length + pi->tcp_header_length + tcp_length);
256 //KdPrint((__DRIVER_NAME " new_ip4_length = %d\n", new_ip4_length));
257 //KdPrint((__DRIVER_NAME " this tcp_length = %d\n", tcp_length));
258 SET_NET_USHORT(&header_va[XN_HDR_SIZE + 2], new_ip4_length);
259 SET_NET_ULONG(&header_va[XN_HDR_SIZE + pi->ip4_header_length + 4], pi->tcp_seq);
260 pi->tcp_seq += tcp_length;
261 pi->tcp_remaining = (USHORT)(pi->tcp_remaining - tcp_length);
262 /* part of the packet is already present in the header buffer for lookahead */
263 out_remaining = tcp_length - header_extra;
264 }
265 else
266 {
267 out_remaining = pi->total_length - pi->header_length;
268 }
269 //KdPrint((__DRIVER_NAME " before loop - out_remaining = %d\n", out_remaining));
271 while (out_remaining != 0)
272 {
273 ULONG in_buffer_offset;
274 ULONG in_buffer_length;
275 ULONG out_length;
277 //KdPrint((__DRIVER_NAME " in loop - out_remaining = %d, curr_buffer = %p, curr_pb = %p\n", out_remaining, pi->curr_buffer, pi->curr_pb));
278 if (!pi->curr_buffer || !pi->curr_pb)
279 {
280 KdPrint((__DRIVER_NAME " out of buffers for packet\n"));
281 // TODO: free some stuff or we'll leak
282 return NULL;
283 }
284 NdisQueryBufferOffset(pi->curr_buffer, &in_buffer_offset, &in_buffer_length);
285 out_length = min(out_remaining, in_buffer_length - pi->curr_mdl_offset);
286 NdisCopyBuffer(&status, &out_buffer, xi->rx_buffer_pool, pi->curr_buffer, pi->curr_mdl_offset, out_length);
287 ASSERT(status == STATUS_SUCCESS); //TODO: properly handle error
288 NdisChainBufferAtBack(packet, out_buffer);
289 ref_pb(xi, pi->curr_pb);
290 pi->curr_mdl_offset = (USHORT)(pi->curr_mdl_offset + out_length);
291 if (pi->curr_mdl_offset == in_buffer_length)
292 {
293 NdisGetNextBuffer(pi->curr_buffer, &pi->curr_buffer);
294 pi->curr_pb = pi->curr_pb->next;
295 pi->curr_mdl_offset = 0;
296 }
297 out_remaining -= out_length;
298 }
299 if (pi->split_required)
300 {
301 XenNet_SumIpHeader(header_va, pi->ip4_header_length);
302 }
303 NDIS_SET_PACKET_STATUS(packet, NDIS_STATUS_SUCCESS);
304 if (header_extra > 0)
305 pi->header_length -= header_extra;
306 xi->rx_outstanding++;
307 ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
308 //FUNCTION_EXIT();
309 return packet;
310 }
312 /*
313 Windows appears to insist that the checksum on received packets is correct, and won't
314 believe us when we lie about it, which happens when the packet is generated on the
315 same bridge in Dom0. Doh!
316 This is only for TCP and UDP packets. IP checksums appear to be correct anyways.
317 */
319 static BOOLEAN
320 XenNet_SumPacketData(
321 packet_info_t *pi,
322 PNDIS_PACKET packet,
323 BOOLEAN set_csum
324 )
325 {
326 USHORT i;
327 PUCHAR buffer;
328 PMDL mdl;
329 UINT total_length;
330 UINT data_length;
331 UINT buffer_length;
332 USHORT buffer_offset;
333 ULONG csum;
334 PUSHORT csum_ptr;
335 USHORT remaining;
336 USHORT ip4_length;
337 BOOLEAN csum_span = TRUE; /* when the USHORT to be checksummed spans a buffer */
339 //FUNCTION_ENTER();
341 NdisGetFirstBufferFromPacketSafe(packet, &mdl, &buffer, &buffer_length, &total_length, NormalPagePriority);
342 ASSERT(mdl);
344 ip4_length = GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 2]);
345 data_length = ip4_length + XN_HDR_SIZE;
347 if ((USHORT)data_length > total_length)
348 {
349 KdPrint((__DRIVER_NAME " Size Mismatch %d (ip4_length + XN_HDR_SIZE) != %d (total_length)\n", ip4_length + XN_HDR_SIZE, total_length));
350 return FALSE;
351 }
353 switch (pi->ip_proto)
354 {
355 case 6:
356 ASSERT(buffer_length >= (USHORT)(XN_HDR_SIZE + pi->ip4_header_length + 17));
357 csum_ptr = (USHORT *)&buffer[XN_HDR_SIZE + pi->ip4_header_length + 16];
358 break;
359 case 17:
360 ASSERT(buffer_length >= (USHORT)(XN_HDR_SIZE + pi->ip4_header_length + 7));
361 csum_ptr = (USHORT *)&buffer[XN_HDR_SIZE + pi->ip4_header_length + 6];
362 break;
363 default:
364 KdPrint((__DRIVER_NAME " Don't know how to calc sum for IP Proto %d\n", pi->ip_proto));
365 //FUNCTION_EXIT();
366 return FALSE; // should never happen
367 }
369 if (set_csum)
370 *csum_ptr = 0;
372 csum = 0;
373 csum += GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 12]) + GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 14]); // src
374 csum += GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 16]) + GET_NET_PUSHORT(&buffer[XN_HDR_SIZE + 18]); // dst
375 csum += ((USHORT)buffer[XN_HDR_SIZE + 9]);
377 remaining = ip4_length - pi->ip4_header_length;
379 csum += remaining;
381 csum_span = FALSE;
382 buffer_offset = i = XN_HDR_SIZE + pi->ip4_header_length;
383 while (i < data_length)
384 {
385 /* don't include the checksum field itself in the calculation */
386 if ((pi->ip_proto == 6 && i == XN_HDR_SIZE + pi->ip4_header_length + 16) || (pi->ip_proto == 17 && i == XN_HDR_SIZE + pi->ip4_header_length + 6))
387 {
388 /* we know that this always happens in the header buffer so we are guaranteed the full two bytes */
389 i += 2;
390 buffer_offset += 2;
391 continue;
392 }
393 if (csum_span)
394 {
395 /* the other half of the next bit */
396 ASSERT(buffer_offset == 0);
397 csum += (USHORT)buffer[buffer_offset];
398 csum_span = FALSE;
399 i += 1;
400 buffer_offset += 1;
401 }
402 else if (buffer_offset == buffer_length - 1)
403 {
404 /* deal with a buffer ending on an odd byte boundary */
405 csum += (USHORT)buffer[buffer_offset] << 8;
406 csum_span = TRUE;
407 i += 1;
408 buffer_offset += 1;
409 }
410 else
411 {
412 csum += GET_NET_PUSHORT(&buffer[buffer_offset]);
413 i += 2;
414 buffer_offset += 2;
415 }
416 if (buffer_offset == buffer_length && i < total_length)
417 {
418 NdisGetNextBuffer(mdl, &mdl);
419 if (mdl == NULL)
420 {
421 KdPrint((__DRIVER_NAME " Ran out of buffers\n"));
422 return FALSE; // should never happen
423 }
424 NdisQueryBufferSafe(mdl, &buffer, &buffer_length, NormalPagePriority);
425 ASSERT(buffer_length);
426 buffer_offset = 0;
427 }
428 }
430 while (csum & 0xFFFF0000)
431 csum = (csum & 0xFFFF) + (csum >> 16);
433 if (set_csum)
434 {
435 *csum_ptr = (USHORT)~GET_NET_USHORT((USHORT)csum);
436 }
437 else
438 {
439 //FUNCTION_EXIT();
440 return (BOOLEAN)(*csum_ptr == (USHORT)~GET_NET_USHORT((USHORT)csum));
441 }
442 //FUNCTION_EXIT();
443 return TRUE;
444 }
446 static ULONG
447 XenNet_MakePackets(
448 struct xennet_info *xi,
449 PLIST_ENTRY rx_packet_list,
450 packet_info_t *pi
451 )
452 {
453 ULONG packet_count = 0;
454 PNDIS_PACKET packet;
455 PLIST_ENTRY entry;
456 UCHAR psh;
457 PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
458 ULONG parse_result;
459 //PNDIS_BUFFER buffer;
460 shared_buffer_t *page_buf;
462 //FUNCTION_ENTER();
464 parse_result = XenNet_ParsePacketHeader(pi, NULL, 0);
466 if (!XenNet_FilterAcceptPacket(xi, pi))
467 {
468 goto done;
469 }
471 switch (pi->ip_proto)
472 {
473 case 6: // TCP
474 if (pi->split_required)
475 break;
476 // fallthrough
477 case 17: // UDP
478 packet = XenNet_MakePacket(xi, pi);
479 if (packet == NULL)
480 {
481 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
482 xi->stat_rx_no_buffer++;
483 packet_count = 0;
484 goto done;
485 }
486 if (parse_result == PARSE_OK)
487 {
488 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
489 packet, TcpIpChecksumPacketInfo);
490 if (pi->csum_blank || pi->data_validated)
491 {
492 if (xi->setting_csum.V4Receive.TcpChecksum && pi->ip_proto == 6)
493 {
494 if (!pi->tcp_has_options || xi->setting_csum.V4Receive.TcpOptionsSupported)
495 {
496 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
497 }
498 } else if (xi->setting_csum.V4Receive.UdpChecksum && pi->ip_proto == 17)
499 {
500 csum_info->Receive.NdisPacketUdpChecksumSucceeded = TRUE;
501 }
502 if (pi->csum_blank)
503 {
504 XenNet_SumPacketData(pi, packet, TRUE);
505 }
506 }
507 else if (xi->config_csum_rx_check)
508 {
509 if (xi->setting_csum.V4Receive.TcpChecksum && pi->ip_proto == 6)
510 {
511 if (XenNet_SumPacketData(pi, packet, FALSE))
512 {
513 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
514 }
515 else
516 {
517 csum_info->Receive.NdisPacketTcpChecksumFailed = TRUE;
518 }
519 } else if (xi->setting_csum.V4Receive.UdpChecksum && pi->ip_proto == 17)
520 {
521 if (XenNet_SumPacketData(pi, packet, FALSE))
522 {
523 csum_info->Receive.NdisPacketUdpChecksumSucceeded = TRUE;
524 }
525 else
526 {
527 csum_info->Receive.NdisPacketUdpChecksumFailed = TRUE;
528 }
529 }
530 }
531 }
532 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
533 InsertTailList(rx_packet_list, entry);
534 packet_count = 1;
535 goto done;
536 default:
537 packet = XenNet_MakePacket(xi, pi);
538 if (packet == NULL)
539 {
540 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
541 xi->stat_rx_no_buffer++;
542 packet_count = 0;
543 goto done;
544 }
545 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
546 InsertTailList(rx_packet_list, entry);
547 packet_count = 1;
548 goto done;
549 }
551 pi->tcp_remaining = pi->tcp_length;
553 /* we can make certain assumptions here as the following code is only for tcp4 */
554 psh = pi->header[XN_HDR_SIZE + pi->ip4_header_length + 13] & 8;
555 while (pi->tcp_remaining)
556 {
557 PUCHAR header_va;
558 PMDL mdl;
559 UINT total_length;
560 UINT buffer_length;
561 packet = XenNet_MakePacket(xi, pi);
562 if (!packet)
563 {
564 KdPrint((__DRIVER_NAME " Ran out of packets\n"));
565 xi->stat_rx_no_buffer++;
566 break; /* we are out of memory - just drop the packets */
567 }
568 if (xi->setting_csum.V4Receive.TcpChecksum)
569 {
570 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
571 packet, TcpIpChecksumPacketInfo);
572 csum_info->Receive.NdisPacketTcpChecksumSucceeded = TRUE;
573 }
574 if (psh)
575 {
576 NdisGetFirstBufferFromPacketSafe(packet, &mdl, &header_va, &buffer_length, &total_length, NormalPagePriority);
577 if (pi->tcp_remaining)
578 header_va[XN_HDR_SIZE + pi->ip4_header_length + 13] &= ~8;
579 else
580 header_va[XN_HDR_SIZE + pi->ip4_header_length + 13] |= 8;
581 }
582 XenNet_SumPacketData(pi, packet, TRUE);
583 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
584 InsertTailList(rx_packet_list, entry);
585 packet_count++;
586 }
588 done:
589 page_buf = pi->first_pb;
590 while (page_buf)
591 {
592 shared_buffer_t *next_pb;
594 next_pb = page_buf->next;
595 put_pb_on_freelist(xi, page_buf);
596 page_buf = next_pb;
597 }
598 XenNet_ClearPacketInfo(pi);
599 //FUNCTION_EXIT();
600 return packet_count;
601 }
603 static BOOLEAN
604 XenNet_RxQueueDpcSynchronized(PVOID context)
605 {
606 struct xennet_info *xi = context;
608 KeInsertQueueDpc(&xi->rx_dpc, NULL, NULL);
610 return TRUE;
611 }
613 #define MAXIMUM_PACKETS_PER_INDICATE 32
615 /* We limit the number of packets per interrupt so that acks get a chance
616 under high rx load. The DPC is immediately re-scheduled */
617 /* this isn't actually done right now */
618 #define MAX_BUFFERS_PER_INTERRUPT 256
620 // Called at DISPATCH_LEVEL
621 static VOID
622 XenNet_RxBufferCheck(PKDPC dpc, PVOID context, PVOID arg1, PVOID arg2)
623 {
624 struct xennet_info *xi = context;
625 RING_IDX cons, prod;
626 LIST_ENTRY rx_packet_list;
627 PLIST_ENTRY entry;
628 PNDIS_PACKET packets[MAXIMUM_PACKETS_PER_INDICATE];
629 ULONG packet_count = 0;
630 ULONG buffer_count = 0;
631 struct netif_extra_info *ei;
632 USHORT id;
633 int more_to_do = FALSE;
634 packet_info_t *pi = &xi->rxpi[KeGetCurrentProcessorNumber() & 0xff];
635 //NDIS_STATUS status;
636 shared_buffer_t *page_buf;
637 shared_buffer_t *head_buf = NULL;
638 shared_buffer_t *tail_buf = NULL;
639 shared_buffer_t *last_buf = NULL;
640 BOOLEAN extra_info_flag = FALSE;
641 BOOLEAN more_data_flag = FALSE;
642 PNDIS_BUFFER buffer;
644 UNREFERENCED_PARAMETER(dpc);
645 UNREFERENCED_PARAMETER(arg1);
646 UNREFERENCED_PARAMETER(arg2);
648 //FUNCTION_ENTER();
650 if (!xi->connected)
651 return; /* a delayed DPC could let this come through... just do nothing */
653 InitializeListHead(&rx_packet_list);
655 /* get all the buffers off the ring as quickly as possible so the lock is held for a minimum amount of time */
657 KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
659 if (xi->rx_shutting_down)
660 {
661 /* there is a chance that our Dpc had been queued just before the shutdown... */
662 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
663 return;
664 }
666 if (xi->rx_partial_buf)
667 {
668 head_buf = xi->rx_partial_buf;
669 tail_buf = xi->rx_partial_buf;
670 while (tail_buf->next)
671 tail_buf = tail_buf->next;
672 more_data_flag = xi->rx_partial_more_data_flag;
673 extra_info_flag = xi->rx_partial_extra_info_flag;
674 xi->rx_partial_buf = NULL;
675 }
677 do {
678 prod = xi->rx.sring->rsp_prod;
679 KeMemoryBarrier(); /* Ensure we see responses up to 'prod'. */
681 for (cons = xi->rx.rsp_cons; cons != prod; cons++)
682 {
683 id = (USHORT)(cons & (NET_RX_RING_SIZE - 1));
684 page_buf = xi->rx_ring_pbs[id];
685 ASSERT(page_buf);
686 xi->rx_ring_pbs[id] = NULL;
687 xi->rx_id_free++;
688 memcpy(&page_buf->rsp, RING_GET_RESPONSE(&xi->rx, cons), max(sizeof(struct netif_rx_response), sizeof(struct netif_extra_info)));
689 if (!extra_info_flag)
690 {
691 if (page_buf->rsp.status <= 0
692 || page_buf->rsp.offset + page_buf->rsp.status > PAGE_SIZE)
693 {
694 KdPrint((__DRIVER_NAME " Error: rsp offset %d, size %d\n",
695 page_buf->rsp.offset, page_buf->rsp.status));
696 ASSERT(!extra_info_flag);
697 put_pb_on_freelist(xi, page_buf);
698 continue;
699 }
700 }
702 if (!head_buf)
703 {
704 head_buf = page_buf;
705 tail_buf = page_buf;
706 }
707 else
708 {
709 tail_buf->next = page_buf;
710 tail_buf = page_buf;
711 }
712 page_buf->next = NULL;
714 if (extra_info_flag)
715 {
716 ei = (struct netif_extra_info *)&page_buf->rsp;
717 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
718 }
719 else
720 {
721 more_data_flag = page_buf->rsp.flags & NETRXF_more_data;
722 extra_info_flag = page_buf->rsp.flags & NETRXF_extra_info;
723 }
725 if (!extra_info_flag && !more_data_flag)
726 last_buf = page_buf;
727 buffer_count++;
728 }
729 xi->rx.rsp_cons = cons;
731 /* Give netback more buffers */
732 XenNet_FillRing(xi);
734 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
735 if (!more_to_do)
736 {
737 xi->rx.sring->rsp_event = xi->rx.rsp_cons + 1;
738 mb();
739 more_to_do = RING_HAS_UNCONSUMED_RESPONSES(&xi->rx);
740 }
741 } while (more_to_do);
743 /* anything past last_buf belongs to an incomplete packet... */
744 if (last_buf && last_buf->next)
745 {
746 KdPrint((__DRIVER_NAME " Partial receive\n"));
747 xi->rx_partial_buf = last_buf->next;
748 xi->rx_partial_more_data_flag = more_data_flag;
749 xi->rx_partial_extra_info_flag = extra_info_flag;
750 last_buf->next = NULL;
751 }
753 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
755 #if 0
756 do this on a timer or something during packet manufacture
757 if (buffer_count >= MAX_BUFFERS_PER_INTERRUPT)
758 {
759 /* fire again immediately */
760 KdPrint((__DRIVER_NAME " Dpc Duration Exceeded\n"));
761 KeInsertQueueDpc(&xi->rx_dpc, NULL, NULL);
762 //xi->vectors.EvtChn_Sync(xi->vectors.context, XenNet_RxQueueDpcSynchronized, xi);
763 }
764 #endif
766 /* make packets out of the buffers */
767 page_buf = head_buf;
768 extra_info_flag = FALSE;
769 more_data_flag = FALSE;
770 while (page_buf)
771 {
772 shared_buffer_t *next_buf = page_buf->next;
774 page_buf->next = NULL;
775 if (extra_info_flag)
776 {
777 //KdPrint((__DRIVER_NAME " processing extra info\n"));
778 ei = (struct netif_extra_info *)&page_buf->rsp;
779 extra_info_flag = ei->flags & XEN_NETIF_EXTRA_FLAG_MORE;
780 switch (ei->type)
781 {
782 case XEN_NETIF_EXTRA_TYPE_GSO:
783 switch (ei->u.gso.type)
784 {
785 case XEN_NETIF_GSO_TYPE_TCPV4:
786 pi->mss = ei->u.gso.size;
787 //KdPrint((__DRIVER_NAME " mss = %d\n", pi->mss));
788 // TODO - put this assertion somewhere ASSERT(header_len + pi->mss <= PAGE_SIZE); // this limits MTU to PAGE_SIZE - XN_HEADER_LEN
789 break;
790 default:
791 KdPrint((__DRIVER_NAME " Unknown GSO type (%d) detected\n", ei->u.gso.type));
792 break;
793 }
794 break;
795 default:
796 KdPrint((__DRIVER_NAME " Unknown extra info type (%d) detected\n", ei->type));
797 break;
798 }
799 put_pb_on_freelist(xi, page_buf);
800 }
801 else
802 {
803 ASSERT(!page_buf->rsp.offset);
804 if (!more_data_flag) // handling the packet's 1st buffer
805 {
806 if (page_buf->rsp.flags & NETRXF_csum_blank)
807 pi->csum_blank = TRUE;
808 if (page_buf->rsp.flags & NETRXF_data_validated)
809 pi->data_validated = TRUE;
810 }
811 buffer = page_buf->buffer;
812 NdisAdjustBufferLength(buffer, page_buf->rsp.status);
813 //KdPrint((__DRIVER_NAME " buffer = %p, pb = %p\n", buffer, page_buf));
814 if (pi->first_pb)
815 {
816 ASSERT(pi->curr_pb);
817 //KdPrint((__DRIVER_NAME " additional buffer\n"));
818 pi->curr_pb->next = page_buf;
819 pi->curr_pb = page_buf;
820 ASSERT(pi->curr_buffer);
821 NDIS_BUFFER_LINKAGE(pi->curr_buffer) = buffer;
822 pi->curr_buffer = buffer;
823 }
824 else
825 {
826 pi->first_pb = page_buf;
827 pi->curr_pb = page_buf;
828 pi->first_buffer = buffer;
829 pi->curr_buffer = buffer;
830 }
831 pi->mdl_count++;
832 extra_info_flag = page_buf->rsp.flags & NETRXF_extra_info;
833 more_data_flag = page_buf->rsp.flags & NETRXF_more_data;
834 pi->total_length = pi->total_length + page_buf->rsp.status;
835 }
837 /* Packet done, add it to the list */
838 if (!more_data_flag && !extra_info_flag)
839 {
840 pi->curr_pb = pi->first_pb;
841 pi->curr_buffer = pi->first_buffer;
842 XenNet_MakePackets(xi, &rx_packet_list, pi);
843 }
845 page_buf = next_buf;
846 }
847 ASSERT(!more_data_flag && !extra_info_flag);
849 xi->stat_rx_ok += packet_count;
851 /* indicate packets to NDIS */
852 entry = RemoveHeadList(&rx_packet_list);
853 packet_count = 0;
854 while (entry != &rx_packet_list)
855 {
856 PNDIS_PACKET packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
857 ASSERT(*(shared_buffer_t **)&packet->MiniportReservedEx[0]);
859 packets[packet_count++] = packet;
860 entry = RemoveHeadList(&rx_packet_list);
861 if (packet_count == MAXIMUM_PACKETS_PER_INDICATE || entry == &rx_packet_list)
862 {
863 NdisMIndicateReceivePacket(xi->adapter_handle, packets, packet_count);
864 packet_count = 0;
865 }
866 }
867 //FUNCTION_EXIT();
868 }
870 /* called at DISPATCH_LEVEL */
871 /* it's okay for return packet to be called while resume_state != RUNNING as the packet will simply be added back to the freelist, the grants will be fixed later */
872 VOID
873 XenNet_ReturnPacket(
874 IN NDIS_HANDLE MiniportAdapterContext,
875 IN PNDIS_PACKET Packet
876 )
877 {
878 struct xennet_info *xi = MiniportAdapterContext;
879 PNDIS_BUFFER buffer;
880 shared_buffer_t *page_buf = *(shared_buffer_t **)&Packet->MiniportReservedEx[0];
882 //FUNCTION_ENTER();
884 //KdPrint((__DRIVER_NAME " page_buf = %p\n", page_buf));
886 NdisUnchainBufferAtFront(Packet, &buffer);
888 while (buffer)
889 {
890 shared_buffer_t *next_buf;
891 ASSERT(page_buf);
892 next_buf = page_buf->next;
893 if (!page_buf->virtual)
894 {
895 /* this isn't actually a share_buffer, it is some memory allocated for the header - just free it */
896 PUCHAR va;
897 UINT len;
898 NdisQueryBufferSafe(buffer, &va, &len, NormalPagePriority);
899 NdisFreeToNPagedLookasideList(&xi->rx_lookaside_list, va - sizeof(shared_buffer_t));
900 NdisFreeBuffer(buffer);
901 }
902 else
903 {
904 //KdPrint((__DRIVER_NAME " returning page_buf %p with id %d\n", page_buf, page_buf->id));
905 if (buffer != page_buf->buffer)
906 NdisFreeBuffer(buffer);
907 put_pb_on_freelist(xi, page_buf);
908 }
909 NdisUnchainBufferAtFront(Packet, &buffer);
910 page_buf = next_buf;
911 }
913 put_packet_on_freelist(xi, Packet);
914 xi->rx_outstanding--;
916 if (!xi->rx_outstanding && xi->rx_shutting_down)
917 KeSetEvent(&xi->packet_returned_event, IO_NO_INCREMENT, FALSE);
919 KeAcquireSpinLockAtDpcLevel(&xi->rx_lock);
921 XenNet_FillRing(xi);
923 KeReleaseSpinLockFromDpcLevel(&xi->rx_lock);
925 //FUNCTION_EXIT();
926 }
928 /*
929 Free all Rx buffers (on halt, for example)
930 The ring must be stopped at this point.
931 */
933 static VOID
934 XenNet_PurgeRing(struct xennet_info *xi)
935 {
936 int i;
937 for (i = 0; i < NET_RX_RING_SIZE; i++)
938 {
939 if (xi->rx_ring_pbs[i] != NULL)
940 {
941 put_pb_on_freelist(xi, xi->rx_ring_pbs[i]);
942 xi->rx_ring_pbs[i] = NULL;
943 }
944 }
945 }
947 static VOID
948 XenNet_BufferFree(struct xennet_info *xi)
949 {
950 shared_buffer_t *pb;
952 XenNet_PurgeRing(xi);
954 while ((pb = get_pb_from_freelist(xi)) != NULL)
955 {
956 NdisFreeBuffer(pb->buffer);
957 xi->vectors.GntTbl_EndAccess(xi->vectors.context,
958 pb->gref, FALSE, (ULONG)'XNRX');
959 NdisFreeMemory(pb->virtual, PAGE_SIZE, 0);
960 }
961 }
963 VOID
964 XenNet_RxResumeStart(xennet_info_t *xi)
965 {
966 KIRQL old_irql;
968 FUNCTION_ENTER();
970 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
971 XenNet_PurgeRing(xi);
972 KeReleaseSpinLock(&xi->rx_lock, old_irql);
974 FUNCTION_EXIT();
975 }
977 VOID
978 XenNet_BufferAlloc(xennet_info_t *xi)
979 {
980 //NDIS_STATUS status;
981 int i;
983 xi->rx_id_free = NET_RX_RING_SIZE;
984 xi->rx_outstanding = 0;
986 for (i = 0; i < NET_RX_RING_SIZE; i++)
987 {
988 xi->rx_ring_pbs[i] = NULL;
989 }
990 }
992 VOID
993 XenNet_RxResumeEnd(xennet_info_t *xi)
994 {
995 KIRQL old_irql;
997 FUNCTION_ENTER();
999 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1000 //XenNet_BufferAlloc(xi);
1001 XenNet_FillRing(xi);
1002 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1004 FUNCTION_EXIT();
1007 BOOLEAN
1008 XenNet_RxInit(xennet_info_t *xi)
1010 NDIS_STATUS status;
1012 FUNCTION_ENTER();
1014 xi->rx_shutting_down = FALSE;
1015 KeInitializeSpinLock(&xi->rx_lock);
1016 KeInitializeEvent(&xi->packet_returned_event, SynchronizationEvent, FALSE);
1017 KeInitializeTimer(&xi->rx_timer);
1018 KeInitializeDpc(&xi->rx_dpc, XenNet_RxBufferCheck, xi);
1019 //KeSetTargetProcessorDpc(&xi->rx_dpc, 0);
1020 //KeSetImportanceDpc(&xi->rx_dpc, HighImportance);
1021 //KeInitializeDpc(&xi->rx_timer_dpc, XenNet_RxTimerDpc, xi);
1022 status = NdisAllocateMemoryWithTag((PVOID)&xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), XENNET_POOL_TAG);
1023 if (status != NDIS_STATUS_SUCCESS)
1025 KdPrint(("NdisAllocateMemoryWithTag failed with 0x%x\n", status));
1026 return FALSE;
1028 NdisZeroMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount());
1030 stack_new(&xi->rx_pb_stack, NET_RX_RING_SIZE * 4);
1032 XenNet_BufferAlloc(xi);
1034 NdisAllocatePacketPool(&status, &xi->rx_packet_pool, NET_RX_RING_SIZE * 4,
1035 PROTOCOL_RESERVED_SIZE_IN_PACKET);
1036 if (status != NDIS_STATUS_SUCCESS)
1038 KdPrint(("NdisAllocatePacketPool failed with 0x%x\n", status));
1039 return FALSE;
1042 NdisInitializeNPagedLookasideList(&xi->rx_lookaside_list, NULL, NULL, 0,
1043 MAX_ETH_HEADER_LENGTH + MAX_LOOKAHEAD_LENGTH + sizeof(shared_buffer_t), XENNET_POOL_TAG, 0);
1045 XenNet_FillRing(xi);
1047 FUNCTION_EXIT();
1049 return TRUE;
1052 BOOLEAN
1053 XenNet_RxShutdown(xennet_info_t *xi)
1055 KIRQL old_irql;
1057 FUNCTION_ENTER();
1059 KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1060 xi->rx_shutting_down = TRUE;
1061 KeReleaseSpinLock(&xi->rx_lock, old_irql);
1063 if (xi->config_rx_interrupt_moderation)
1065 KeCancelTimer(&xi->rx_timer);
1068 KeRemoveQueueDpc(&xi->rx_dpc);
1069 #if (NTDDI_VERSION >= NTDDI_WINXP)
1070 KeFlushQueuedDpcs();
1071 #endif
1073 while (xi->rx_outstanding)
1075 KdPrint((__DRIVER_NAME " Waiting for all packets to be returned\n"));
1076 KeWaitForSingleObject(&xi->packet_returned_event, Executive, KernelMode, FALSE, NULL);
1079 //KeAcquireSpinLock(&xi->rx_lock, &old_irql);
1081 NdisFreeMemory(xi->rxpi, sizeof(packet_info_t) * NdisSystemProcessorCount(), 0);
1083 XenNet_BufferFree(xi);
1085 NdisFreePacketPool(xi->rx_packet_pool);
1087 NdisDeleteNPagedLookasideList(&xi->rx_lookaside_list);
1089 //KeReleaseSpinLock(&xi->rx_lock, old_irql);
1091 FUNCTION_EXIT();
1093 return TRUE;