win-pvdrivers

view xennet/xennet_tx.c @ 230:93f3ccb775b6

Automated merge with ssh://win-pvdrivers@xenbits.xensource.com/win-pvdrivers.hg
author Andy Grover <andy.grover@oracle.com>
date Wed Mar 26 18:38:16 2008 -0700 (2008-03-26)
parents a9a5b54bee92 c33404027885
children 0893bd5ff2cb
line source
1 /*
2 PV Net Driver for Windows Xen HVM Domains
3 Copyright (C) 2007 James Harper
4 Copyright (C) 2007 Andrew Grover <andy.grover@oracle.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
21 #include "xennet.h"
23 #define FREELIST_ID_ERROR 0xFFFF
25 #ifdef XEN_PROFILE
26 #define PC_INC(var) var++
27 #else
28 #define PC_INC(var)
29 #endif
31 static USHORT
32 get_id_from_freelist(struct xennet_info *xi)
33 {
34 if (xi->tx_id_free - xi->tx_no_id_free == 0)
35 return FREELIST_ID_ERROR;
36 xi->tx_id_free--;
37 return xi->tx_id_list[xi->tx_id_free];
38 }
40 static USHORT
41 get_no_id_from_freelist(struct xennet_info *xi)
42 {
43 if (xi->tx_id_free - xi->tx_no_id_free == 0)
44 return FREELIST_ID_ERROR;
45 xi->tx_no_id_free--;
46 return 0;
47 }
49 static VOID
50 put_id_on_freelist(struct xennet_info *xi, USHORT id)
51 {
52 xi->tx_id_list[xi->tx_id_free] = id;
53 xi->tx_id_free++;
54 }
56 static VOID
57 put_no_id_on_freelist(struct xennet_info *xi)
58 {
59 xi->tx_no_id_free++;
60 }
62 static grant_ref_t
63 get_gref_from_freelist(struct xennet_info *xi)
64 {
65 if (xi->tx_gref_free == 0)
66 return 0;
67 xi->tx_gref_free--;
68 return xi->tx_gref_list[xi->tx_gref_free];
69 }
71 static VOID
72 put_gref_on_freelist(struct xennet_info *xi, grant_ref_t gref)
73 {
74 xi->tx_gref_list[xi->tx_gref_free] = gref;
75 xi->tx_gref_free++;
76 }
79 #define SWAP_USHORT(x) (USHORT)((((x & 0xFF) << 8)|((x >> 8) & 0xFF)))
81 /*
82 * Windows assumes that if we can do large send offload then we can
83 * do IP header csum offload, so we have to fake it!
84 */
85 VOID
86 XenNet_SumHeader(
87 PMDL mdl /* first buffer of the packet - containing the header */
88 )
89 {
90 PVOID buffer = MmGetSystemAddressForMdlSafe(mdl, NormalPagePriority);
91 PUSHORT ushorts = (PUSHORT)buffer;
93 USHORT length_in_ushorts;
94 USHORT i;
95 ULONG csum = 0;
97 ASSERT(buffer);
98 switch (SWAP_USHORT(ushorts[6]))
99 {
100 case 0x0800:
101 /* check if buffer is long enough to contain ethernet header + minimum ip header */
102 ushorts = &ushorts[0x07];
103 length_in_ushorts = ((SWAP_USHORT(ushorts[0]) >> 8) & 0x0F) * 2;
104 /* check if buffer is long enough to contain options too */
105 break;
106 default:
107 return;
108 }
109 ushorts[5] = 0;
110 for (i = 0; i < length_in_ushorts; i++)
111 {
112 csum += SWAP_USHORT(ushorts[i]);
113 }
114 while (csum & 0xFFFF0000)
115 csum = (csum & 0xFFFF) + (csum >> 16);
116 ushorts[5] = SWAP_USHORT(~csum);
117 }
119 /* Place a buffer on tx ring. */
120 static struct netif_tx_request*
121 XenNet_PutOnTxRing(struct xennet_info *xi, ULONGLONG addr, size_t len, uint16_t flags)
122 {
123 struct netif_tx_request *tx;
124 unsigned short id;
125 PFN_NUMBER pfn = (PFN_NUMBER)(addr >> PAGE_SHIFT);
126 ULONG offset = BYTE_OFFSET(addr);
128 id = get_id_from_freelist(xi);
129 /* TODO: check id against FREELIST_ID_ERROR */
130 ASSERT(xi->tx_pkts[id] == NULL);
131 tx = RING_GET_REQUEST(&xi->tx, xi->tx.req_prod_pvt);
133 tx->gref = get_gref_from_freelist(xi);
134 ASSERT(tx->gref != 0);
135 ASSERT(xi->tx_grefs[id] == 0);
136 xi->tx_grefs[id] = tx->gref;
138 xi->XenInterface.GntTbl_GrantAccess(
139 xi->XenInterface.InterfaceHeader.Context, 0,
140 pfn, FALSE, tx->gref);
141 tx->id = id;
142 tx->offset = (uint16_t)offset;
143 tx->size = (uint16_t)len;
144 tx->flags = flags;
145 PC_INC(ProfCount_TxPacketsTotal);
147 return tx;
148 }
151 /* Called at DISPATCH_LEVEL with tx_lock held */
153 /*
154 * Send one NDIS_PACKET. This may involve multiple entries on TX ring.
155 */
156 static BOOLEAN
157 XenNet_HWSendPacket(struct xennet_info *xi, PNDIS_PACKET packet)
158 {
159 struct netif_tx_request *tx = NULL;
160 struct netif_extra_info *ei;
161 PNDIS_TCP_IP_CHECKSUM_PACKET_INFO csum_info;
162 PSCATTER_GATHER_LIST sg_list;
163 UINT total_packet_length;
164 ULONG sg_num = 0;
165 ULONG mss; // 0 if not using large send
166 PMDL first_buffer;
167 int cycles = 0;
168 ULONGLONG sg_elem_addr;
169 ULONG sg_elem_len;
170 ULONG sg_elem_pages;
171 ULONG sg_elem_page;
172 ULONG chunk_len;
173 uint16_t flags;
174 #if defined(XEN_PROFILE)
175 LARGE_INTEGER tsc, dummy;
177 tsc = KeQueryPerformanceCounter(&dummy);
178 #endif
180 NdisQueryPacket(packet, NULL, NULL, &first_buffer, &total_packet_length);
182 flags = NETTXF_more_data;
183 mss = PtrToUlong(NDIS_PER_PACKET_INFO_FROM_PACKET(packet, TcpLargeSendPacketInfo));
184 if (mss > 0)
185 {
186 flags |= NETTXF_extra_info | NETTXF_csum_blank | NETTXF_data_validated;
187 XenNet_SumHeader(first_buffer);
188 PC_INC(ProfCount_TxPacketsLargeOffload);
189 }
191 sg_list = NDIS_PER_PACKET_INFO_FROM_PACKET(packet, ScatterGatherListPacketInfo);
193 /*
194 * See io/netif.h. Must put (A) 1st packet, then (B) optional extra_info, then
195 * (C) rest of packets on the ring.
196 */
197 /* (A) */
198 tx = XenNet_PutOnTxRing(xi, sg_list->Elements[0].Address.QuadPart,
199 sg_list->Elements[0].Length, flags);
200 xi->tx.req_prod_pvt++;
202 /* (B) */
203 if (mss > 0)
204 {
205 get_no_id_from_freelist(xi);
206 ei = (struct netif_extra_info *)RING_GET_REQUEST(&xi->tx, xi->tx.req_prod_pvt);
207 ei->type = XEN_NETIF_EXTRA_TYPE_GSO;
208 ei->flags = NETTXF_more_data;
209 ei->u.gso.size = (USHORT) mss;
210 ei->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
211 ei->u.gso.pad = 0;
212 ei->u.gso.features = 0;
214 xi->tx.req_prod_pvt++;
215 }
217 /* (C) */
218 for (sg_num = 1; sg_num < sg_list->NumberOfElements; sg_num++)
219 {
220 //KdPrint((__DRIVER_NAME " i = %d\n", i));
221 ASSERT(cycles++ < 256);
223 sg_elem_addr = sg_list->Elements[sg_num].Address.QuadPart;
224 sg_elem_len = sg_list->Elements[sg_num].Length;
225 sg_elem_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES(sg_elem_addr, sg_elem_len);
226 for (sg_elem_page = 0; sg_elem_page < sg_elem_pages; sg_elem_page++)
227 {
228 chunk_len = min(sg_elem_len, PAGE_SIZE - BYTE_OFFSET(sg_elem_addr));
230 flags = NETTXF_more_data;
231 csum_info = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&NDIS_PER_PACKET_INFO_FROM_PACKET(
232 packet, TcpIpChecksumPacketInfo);
233 if (csum_info->Transmit.NdisPacketTcpChecksum
234 || csum_info->Transmit.NdisPacketUdpChecksum)
235 {
236 flags |= NETTXF_csum_blank | NETTXF_data_validated;
237 PC_INC(ProfCount_TxPacketsCsumOffload);
238 }
240 tx = XenNet_PutOnTxRing(xi, sg_elem_addr, chunk_len, flags);
242 sg_elem_addr += chunk_len;
243 sg_elem_len -= chunk_len;
245 xi->tx.req_prod_pvt++;
246 }
247 }
249 /* only set the packet on the last buffer, clear more_data */
250 ASSERT(tx);
251 ASSERT(tx->id);
252 xi->tx_pkts[tx->id] = packet;
253 tx->flags &= ~NETTXF_more_data;
255 return TRUE;
256 }
258 /* Called at DISPATCH_LEVEL with tx_lock held */
259 /* TODO: calc tx ring slots beforehand and abort if not enough available */
261 static VOID
262 XenNet_SendQueuedPackets(struct xennet_info *xi)
263 {
264 PLIST_ENTRY entry;
265 PNDIS_PACKET packet;
266 int notify;
267 #if defined(XEN_PROFILE)
268 LARGE_INTEGER tsc, dummy;
269 #endif
271 int cycles = 0;
272 BOOLEAN success;
274 #if defined(XEN_PROFILE)
275 tsc = KeQueryPerformanceCounter(&dummy);
276 #endif
278 entry = RemoveHeadList(&xi->tx_waiting_pkt_list);
279 /* if empty, the above returns head*, not NULL */
280 while (entry != &xi->tx_waiting_pkt_list)
281 {
282 ASSERT(cycles++ < 256);
283 //KdPrint((__DRIVER_NAME " Packet ready to send\n"));
284 packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
285 success = XenNet_HWSendPacket(xi, packet);
286 if (!success)
287 break;
288 entry = RemoveHeadList(&xi->tx_waiting_pkt_list);
289 }
291 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xi->tx, notify);
292 if (notify)
293 {
294 xi->XenInterface.EvtChn_Notify(xi->XenInterface.InterfaceHeader.Context,
295 xi->event_channel);
296 }
298 #if defined(XEN_PROFILE)
299 ProfTime_SendQueuedPackets.QuadPart += KeQueryPerformanceCounter(&dummy).QuadPart - tsc.QuadPart;
300 ProfCount_SendQueuedPackets++;
301 #endif
302 }
304 // Called at DISPATCH_LEVEL
305 NDIS_STATUS
306 XenNet_TxBufferGC(struct xennet_info *xi)
307 {
308 RING_IDX cons, prod;
309 unsigned short id;
310 PNDIS_PACKET packets[NET_TX_RING_SIZE];
311 ULONG packet_count = 0;
312 int moretodo;
313 ULONG i;
314 UINT total_packet_length;
315 int cycles = 0;
316 #if defined(XEN_PROFILE)
317 LARGE_INTEGER tsc, dummy;
318 #endif
320 ASSERT(xi->connected);
321 ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
323 // KdPrint((__DRIVER_NAME " --> " __FUNCTION__ "\n"));
325 #if defined(XEN_PROFILE)
326 tsc = KeQueryPerformanceCounter(&dummy);
327 #endif
329 KeAcquireSpinLockAtDpcLevel(&xi->tx_lock);
331 do {
332 ASSERT(cycles++ < 256);
333 prod = xi->tx.sring->rsp_prod;
334 KeMemoryBarrier(); /* Ensure we see responses up to 'rp'. */
336 for (cons = xi->tx.rsp_cons; cons != prod; cons++)
337 {
338 struct netif_tx_response *txrsp;
340 ASSERT(cycles++ < 256);
342 txrsp = RING_GET_RESPONSE(&xi->tx, cons);
343 if (txrsp->status == NETIF_RSP_NULL)
344 {
345 // KdPrint((__DRIVER_NAME " NETIF_RSP_NULL\n"));
346 put_no_id_on_freelist(xi);
347 continue; // This would be the response to an extra_info packet
348 }
350 id = txrsp->id;
351 packets[packet_count] = xi->tx_pkts[id];
352 if (packets[packet_count])
353 {
354 NdisQueryPacket(packets[packet_count], NULL, NULL, NULL, &total_packet_length);
355 if (NDIS_PER_PACKET_INFO_FROM_PACKET(packets[packet_count], TcpLargeSendPacketInfo) != 0)
356 {
357 NDIS_PER_PACKET_INFO_FROM_PACKET(packets[packet_count], TcpLargeSendPacketInfo) = UlongToPtr(total_packet_length);
358 //KdPrint((__DRIVER_NAME " Large Send Response = %d\n", NDIS_PER_PACKET_INFO_FROM_PACKET(packets[packet_count], TcpLargeSendPacketInfo)));
359 }
360 xi->tx_pkts[id] = NULL;
361 packet_count++;
362 xi->stat_tx_ok++;
363 }
364 put_gref_on_freelist(xi, xi->tx_grefs[id]);
365 xi->tx_grefs[id] = 0;
366 put_id_on_freelist(xi, id);
367 xi->tx_outstanding--;
368 }
370 xi->tx.rsp_cons = prod;
372 RING_FINAL_CHECK_FOR_RESPONSES(&xi->tx, moretodo);
373 } while (moretodo);
375 /* if queued packets, send them now */
376 XenNet_SendQueuedPackets(xi);
378 KeReleaseSpinLockFromDpcLevel(&xi->tx_lock);
380 for (i = 0; i < packet_count; i++)
381 {
382 /* A miniport driver must release any spin lock that it is holding before
383 calling NdisMSendComplete. */
384 NdisMSendComplete(xi->adapter_handle, packets[i], NDIS_STATUS_SUCCESS);
385 }
387 // KdPrint((__DRIVER_NAME " <-- " __FUNCTION__ "\n"));
389 #if defined(XEN_PROFILE)
390 ProfTime_TxBufferGC.QuadPart += KeQueryPerformanceCounter(&dummy).QuadPart - tsc.QuadPart;
391 ProfCount_TxBufferGC++;
392 #endif
394 return NDIS_STATUS_SUCCESS;
395 }
397 VOID
398 XenNet_SendPackets(
399 IN NDIS_HANDLE MiniportAdapterContext,
400 IN PPNDIS_PACKET PacketArray,
401 IN UINT NumberOfPackets
402 )
403 {
404 struct xennet_info *xi = MiniportAdapterContext;
405 PNDIS_PACKET packet;
406 UINT i;
407 PLIST_ENTRY entry;
408 KIRQL OldIrql;
409 #if defined(XEN_PROFILE)
410 LARGE_INTEGER tsc, dummy;
411 KIRQL OldIrql2;
412 #endif
414 #if defined(XEN_PROFILE)
415 KeRaiseIrql(DISPATCH_LEVEL, &OldIrql2);
416 tsc = KeQueryPerformanceCounter(&dummy);
417 #endif
419 KeAcquireSpinLock(&xi->tx_lock, &OldIrql);
421 // KdPrint((__DRIVER_NAME " --> " __FUNCTION__ "\n"));
422 for (i = 0; i < NumberOfPackets; i++)
423 {
424 packet = PacketArray[i];
425 ASSERT(packet);
426 *(ULONG *)&packet->MiniportReservedEx = 0;
427 entry = (PLIST_ENTRY)&packet->MiniportReservedEx[sizeof(PVOID)];
428 InsertTailList(&xi->tx_waiting_pkt_list, entry);
429 xi->tx_outstanding++;
430 #if defined(XEN_PROFILE)
431 ProfCount_PacketsPerSendPackets++;
432 #endif
433 }
435 XenNet_SendQueuedPackets(xi);
437 KeReleaseSpinLock(&xi->tx_lock, OldIrql);
439 #if defined(XEN_PROFILE)
440 ProfTime_SendPackets.QuadPart += KeQueryPerformanceCounter(&dummy).QuadPart - tsc.QuadPart;
441 ProfCount_SendPackets++;
442 KeLowerIrql(OldIrql2);
443 #endif
445 #if defined(XEN_PROFILE)
446 if ((ProfCount_SendPackets & 1023) == 0)
447 {
448 KdPrint((__DRIVER_NAME " ***\n"));
449 KdPrint((__DRIVER_NAME " RxBufferAlloc Count = %10d, Avg Time = %10ld\n", ProfCount_RxBufferAlloc, (ProfCount_RxBufferAlloc == 0)?0:(ProfTime_RxBufferAlloc.QuadPart / ProfCount_RxBufferAlloc)));
450 KdPrint((__DRIVER_NAME " ReturnPacket Count = %10d, Avg Time = %10ld\n", ProfCount_ReturnPacket, (ProfCount_ReturnPacket == 0)?0:(ProfTime_ReturnPacket.QuadPart / ProfCount_ReturnPacket)));
451 KdPrint((__DRIVER_NAME " RxBufferCheck Count = %10d, Avg Time = %10ld\n", ProfCount_RxBufferCheck, (ProfCount_RxBufferCheck == 0)?0:(ProfTime_RxBufferCheck.QuadPart / ProfCount_RxBufferCheck)));
452 KdPrint((__DRIVER_NAME " RxBufferCheckTop Avg Time = %10ld\n", (ProfCount_RxBufferCheck == 0)?0:(ProfTime_RxBufferCheckTopHalf.QuadPart / ProfCount_RxBufferCheck)));
453 KdPrint((__DRIVER_NAME " RxBufferCheckBot Avg Time = %10ld\n", (ProfCount_RxBufferCheck == 0)?0:(ProfTime_RxBufferCheckBotHalf.QuadPart / ProfCount_RxBufferCheck)));
454 KdPrint((__DRIVER_NAME " Linearize Count = %10d, Avg Time = %10ld\n", ProfCount_Linearize, (ProfCount_Linearize == 0)?0:(ProfTime_Linearize.QuadPart / ProfCount_Linearize)));
455 KdPrint((__DRIVER_NAME " SendPackets Count = %10d, Avg Time = %10ld\n", ProfCount_SendPackets, (ProfCount_SendPackets == 0)?0:(ProfTime_SendPackets.QuadPart / ProfCount_SendPackets)));
456 KdPrint((__DRIVER_NAME " Packets per SendPackets = %10d\n", (ProfCount_SendPackets == 0)?0:(ProfCount_PacketsPerSendPackets / ProfCount_SendPackets)));
457 KdPrint((__DRIVER_NAME " SendQueuedPackets Count = %10d, Avg Time = %10ld\n", ProfCount_SendQueuedPackets, (ProfCount_SendQueuedPackets == 0)?0:(ProfTime_SendQueuedPackets.QuadPart / ProfCount_SendQueuedPackets)));
458 KdPrint((__DRIVER_NAME " TxBufferGC Count = %10d, Avg Time = %10ld\n", ProfCount_TxBufferGC, (ProfCount_TxBufferGC == 0)?0:(ProfTime_TxBufferGC.QuadPart / ProfCount_TxBufferGC)));
459 KdPrint((__DRIVER_NAME " RxPackets Total = %10d, Csum Offload = %10d, Calls To Receive = %10d\n", ProfCount_RxPacketsTotal, ProfCount_RxPacketsCsumOffload, ProfCount_CallsToIndicateReceive));
460 KdPrint((__DRIVER_NAME " TxPackets Total = %10d, Csum Offload = %10d, Large Offload = %10d\n", ProfCount_TxPacketsTotal, ProfCount_TxPacketsCsumOffload, ProfCount_TxPacketsLargeOffload));
461 }
462 #endif
463 // KdPrint((__DRIVER_NAME " <-- " __FUNCTION__ "\n"));
464 }
466 static void
467 XenNet_TxBufferFree(struct xennet_info *xi)
468 {
469 PLIST_ENTRY entry;
470 PNDIS_PACKET packet;
471 USHORT i;
472 grant_ref_t gref;
474 ASSERT(!xi->connected);
476 /* Free packets in tx queue */
477 entry = RemoveHeadList(&xi->tx_waiting_pkt_list);
478 while (entry != &xi->tx_waiting_pkt_list)
479 {
480 packet = CONTAINING_RECORD(entry, NDIS_PACKET, MiniportReservedEx[sizeof(PVOID)]);
481 NdisMSendComplete(xi->adapter_handle, packet, NDIS_STATUS_FAILURE);
482 entry = RemoveHeadList(&xi->tx_waiting_pkt_list);
483 }
485 /* free sent-but-not-completed packets */
486 for (i = 0; i < NET_TX_RING_SIZE; i++)
487 {
488 packet = xi->tx_pkts[i];
489 if (packet != NULL)
490 NdisMSendComplete(xi->adapter_handle, packet, NDIS_STATUS_FAILURE);
491 gref = xi->tx_grefs[i];
492 if (gref != 0)
493 xi->XenInterface.GntTbl_EndAccess(
494 xi->XenInterface.InterfaceHeader.Context, gref, TRUE);
495 }
496 }
498 BOOLEAN
499 XenNet_TxInit(xennet_info_t *xi)
500 {
501 USHORT i;
503 xi->tx_mdl = AllocatePage();
504 xi->tx_pgs = MmGetMdlVirtualAddress(xi->tx_mdl);
505 SHARED_RING_INIT(xi->tx_pgs);
506 FRONT_RING_INIT(&xi->tx, xi->tx_pgs, PAGE_SIZE);
507 xi->tx_ring_ref = xi->XenInterface.GntTbl_GrantAccess(
508 xi->XenInterface.InterfaceHeader.Context, 0,
509 *MmGetMdlPfnArray(xi->tx_mdl), FALSE, 0);
510 xi->tx_id_free = 0;
511 xi->tx_no_id_free = 0;
512 for (i = 0; i < NET_TX_RING_SIZE; i++)
513 {
514 xi->tx_pkts[i] = NULL;
515 put_id_on_freelist(xi, i);
516 }
517 xi->tx_gref_free = 0;
518 for (i = 0; i < NET_TX_RING_SIZE; i++)
519 {
520 xi->tx_grefs[i] = 0;
521 put_gref_on_freelist(xi, xi->XenInterface.GntTbl_GetRef(
522 xi->XenInterface.InterfaceHeader.Context));
523 }
524 return TRUE;
525 }
527 BOOLEAN
528 XenNet_TxShutdown(xennet_info_t *xi)
529 {
530 ULONG i;
532 XenNet_TxBufferFree(xi);
534 /* free TX resources */
535 if (xi->XenInterface.GntTbl_EndAccess(
536 xi->XenInterface.InterfaceHeader.Context, xi->tx_ring_ref, 0))
537 {
538 xi->tx_ring_ref = GRANT_INVALID_REF;
539 FreePages(xi->tx_mdl);
540 }
541 /* if EndAccess fails then tx/rx ring pages LEAKED -- it's not safe to reuse
542 pages Dom0 still has access to */
543 xi->tx_pgs = NULL;
545 for (i = 0; i < NET_TX_RING_SIZE; i++)
546 {
547 xi->XenInterface.GntTbl_PutRef(
548 xi->XenInterface.InterfaceHeader.Context, xi->tx_gref_list[i]);
549 }
551 return TRUE;
552 }