Skip to content

Conversation

@moritzbuhl
Copy link
Collaborator

@moritzbuhl moritzbuhl commented Apr 16, 2025

The patch below can enable GRO on Linux (https://lwn.net/Articles/768995/, https://lwn.net/ml/netdev/a8112a7fbbfc39d5b59e5ace0d3c1409824f9261.1539957909.git.pabeni@redhat.com/).
I noticed that it only needs a sockopt and that udpbench does not
need many additional changes to cope with the changes in receive
behavior.

GRO:

root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -G -m 1024 -t 10 -b 655350000 -l 1420 recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 34723
send: syscalls 4930, packets 5048320, frame 1, payload 1420, ip 1448, ether 1486, begin 1744804762.685554, end 1744804772.682420, duration 9.996866, bit/s 6.003324e+09, start 1744804762.681582, stop 1744804772.682526
recv: syscalls 199, packets 5047393, frame 1, payload 1420, ip 1448, ether 1486, begin 1744804762.686053, end 1744804772.790384, duration 10.104331, bit/s 5.938385e+09, start 1744804761.240173, stop 1744804773.931088

NO GRO:

root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 10 -b 655350000 -l 1420 recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 43523
send: syscalls 4895, packets 5012480, frame 1, payload 1420, ip 1448, ether 1486, begin 1744804969.817430, end 1744804979.815481, duration 9.998051, bit/s 5.959998e+09, start 1744804969.813364, stop 1744804979.815601
recv: syscalls 4906, packets 5011369, frame 1, payload 1420, ip 1448, ether 1486, begin 1744804969.817863, end 1744804979.922393, duration 10.104530, bit/s 5.895886e+09, start 1744804968.007205, stop 1744804981.062712

Currently the bitrate calculation is hacky with GRO, it takes the
total received payload and divides it by the rcvlen to get the
number of packets.
By using the UDP_GRO cmsg_type one supposedly could get back the
individual packet size and then divide for each msg in mmsg:
if(cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_GRO) { ...

However, I cannot get it to work. Below is my attempted diff for
that:

--- udpbench.c	2025-04-16 14:11:25.960592761 +0200
+++ udpbench-gro-cmsg.c	2025-04-16 13:58:25.024956595 +0200
@@ -67,6 +67,7 @@
 void	udp_setbuffersize(int, int, int);
 #if defined(__linux__) && defined(UDP_GRO)
 void	udp_setgro(int);
+int	getgro_size(struct msghdr *msg);
 #endif
 void	udp_setrouteralert(int);
 void	udp_send(int, int, unsigned long);
@@ -846,6 +847,22 @@
 	if (setsockopt(udp_socket, IPPROTO_UDP, UDP_GRO, &on, len) == -1)
 		err(1, "setsockopt gro");
 }
+
+int
+getgro_size(struct msghdr *msg)
+{
+	struct cmsghdr *cmsg;
+	int pktsz = 0;
+
+	for(cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if(cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_GRO) {
+			memcpy(&pktsz, CMSG_DATA(cmsg), sizeof(pktsz));
+			break;
+		}
+	}
+
+	return pktsz;
+}
 #endif

 void
@@ -877,9 +894,10 @@
 	struct mmsghdr *mmsg, *mhdr;
 	struct iovec *iov;
 	char *payload;
-
 #if defined(__linux__) && defined(UDP_GRO)
-	if (!fill && gro)
+	char *cmsgs;
+
+	if (gro && !fill)
 		paylen = IP_MAXPACKET;
 #endif

@@ -893,17 +911,29 @@
 		err(1, "calloc payload");
 	if (fill)
 		arc4random_buf(payload, packets * paylen);
+#if defined(__linux__) && defined(UDP_GRO)
+	if (gro && !fill &&
+	    (payload = calloc(packets, CMSG_SPACE(sizeof(int)))) == NULL)
+		err(1, "calloc cmsg");
+#endif

 	mhdr = mmsg;
 	while (packets > 0) {
 		mhdr->msg_hdr.msg_iov = iov;
 		mhdr->msg_hdr.msg_iovlen = 1;
+#if defined(__linux__) && defined(UDP_GRO)
+		if (gro && !fill) {
+			mhdr->msg_hdr.msg_control = cmsgs;
+			mhdr->msg_hdr.msg_controllen = CMSG_SPACE(sizeof(int));
+		}
+#endif
 		iov->iov_base = payload;
 		iov->iov_len = paylen;

 		mhdr++;
 		iov++;
 		payload += paylen;
+		cmsgs += CMSG_SPACE(sizeof(int));
 		packets--;
 	}

@@ -915,6 +945,7 @@
 {
 	free(mmsg->msg_hdr.msg_iov->iov_base);
 	free(mmsg->msg_hdr.msg_iov);
+	free(mmsg->msg_hdr.msg_control);
 	free(mmsg);
 }

@@ -1002,9 +1033,6 @@
 	struct mmsghdr *mmsg;
 	char *payload;
 	size_t udplen;
-#if defined(__linux__) && defined(UDP_GRO)
-	size_t total_received_payload = 0;
-#endif
 	ssize_t rcvlen;
 	socklen_t len;
 	int pkts;
@@ -1096,14 +1124,27 @@
 		}
 		timerclear(final);
 		bored = 0;
-		packet += pkts;
 #if defined(__linux__) && defined(UDP_GRO)
 		if (gro) {
-			int i;
-			for (i = 0; i < pkts; i++)
-				total_received_payload += mmsg[i].msg_len;
+			int i, packet_size;
+			/* XXX: assume that all msgs have the same GSO size */
+			for (i = 0; i < pkts; i++) {
+				packet_size = getgro_size(&mmsg[i].msg_hdr);
+				if (packet_size == 0) // XXX
+{
+printf("gro fail %u\n", mmsg[i].msg_len);
+					packet_size = mmsg[i].msg_len;
+}
+				else
+					printf("gro works\n");
+				packet += mmsg[i].msg_len / packet_size;
+				if (mmsg[i].msg_len % packet_size != 0)
+					packet++;
+			}
+			pkts = 0;
 		}
 #endif
+		packet += pkts;
 	}

 	if (gettimeofday(&end, NULL) == -1)
@@ -1117,11 +1158,6 @@
 		/* new final is duration without packets */
 		timersub(&tmp, final, final);
 	}
-#if defined(__linux__) && defined(UDP_GRO)
-	/* XXX: assume that all msgs will be of size rcvlen */
-	if (gro)
-		packet = total_received_payload / rcvlen;
-#endif
 	status_init("recv", syscall, packet, paylen, udp_family, &begin, &end);
 	if (mmsglen)
 		mmsg_free(mmsg);
 		

@moritzbuhl
Copy link
Collaborator Author

The new commit also does UDP segment offload for sendmmsg(2).

root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 10 -b 655350000 -l 1420 -G send 10.0.0.2
root@t1's password:
peername: 10.0.0.2 12345
sockname: 10.0.0.1 50563
recv: syscalls 8094, packets 8274991, frame 1, payload 1420, ip 1448, ether 1486, begin 1744876741.400318, end 1744876751.351043, duration 9.950725, bit/s 9.886023e+09, start 1744876741.244992, stop 1744876752.491345
send: syscalls 176, packets 8275354, frame 1, payload 1420, ip 1448, ether 1486, begin 1744876741.400510, end 1744876751.246762, duration 9.846252, bit/s 9.991356e+09, start 1744876741.246667, stop 1744876751.247076

@moritzbuhl
Copy link
Collaborator Author

There are still some problems with -l values smaller equals 520:

root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 5 -b 655350000 -l 521 -G recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 34380
packet from UDP_GRO=10206582, tot_rcvd/rcvlen=10206581
send: syscalls 81, packets 10321125, frame 1, payload 521, ip 549, ether 587, begin 1744878726.682971, end 1744878731.533548, duration 4.850577, bit/s 9.992214e+09, start 1744878726.533454, stop 1744878731.533816
recv: syscalls 316, packets 10206582, frame 1, payload 521, ip 549, ether 587, begin 1744878726.686358, end 1744878731.642382, duration 4.956024, bit/s 9.671081e+09, start 1744878725.081107, stop 1744878732.782950

root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 5 -b 655350000 -l 520 -G recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 49440
udpbench send: send: Message too long
c^C
root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 10 -b 655350000 -l 20 -G recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 58993
udpbench send: send: Message too long


^C
root@t0:~/udpbench# ./udpbench -r root@t1 -R /root/udpbench/udpbench -m 1024 -t 10 -b 655350000 -l 500 -G recv 10.0.0.1
sockname: 10.0.0.1 12345
peername: 10.0.0.2 41950
udpbench send: send: Invalid argument

@moritzbuhl
Copy link
Collaborator Author

Changing '-m still lead to EMSGSIZE, which means it is the amount of packets per msghdr. 0xffff/520 = 126, 0xffff/521 = 125, and since 521 works, 125 pkts per msghdr seems to be the limit.

@moritzbuhl moritzbuhl changed the title Add -G flag to enable GRO in Linux. Add -G flag to enable UDP segment offload in Linux. Apr 17, 2025
@moritzbuhl
Copy link
Collaborator Author

Here is a link to the initial GSO patch: https://lwn.net/ml/netdev/20180417200059.30154-6-willemdebruijn.kernel@gmail.com/

@bluhm
Copy link
Owner

bluhm commented May 23, 2025

Could you please rebase it and provide man page and test in Linux GNUmakefile.

@bluhm bluhm merged commit 5643123 into bluhm:master May 24, 2025
1 check passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants