From 4b650575cf22dd3bf1b820e027836a331305c4a9 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 07:49:44 +0200 Subject: [PATCH 1/6] RIB Integration Proof of concept. Since the interfaces are still available to the kernel, Quagga can listen and install routes for the interfaces assigned to netmap-fwd Ribsync module will listen for those RIB events and will update it.s internal routing table accordingly First test: create ribsync module, hook into the event system and listen for RIB events --- Makefile | 4 +- netmap-fwd.c | 8 +++ ribsync.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++ ribsync.h | 6 ++ 4 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 ribsync.c create mode 100644 ribsync.h diff --git a/Makefile b/Makefile index 2520484..7cba961 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ OUT=netmap-fwd OBJS=arp.o cleanup.o cli.o config.o ether.o event.o icmp.o if.o inet.o -OBJS+=ip.o net.o netmap.o netmap-fwd.o radix.o util.o +OBJS+=ip.o net.o netmap.o netmap-fwd.o radix.o util.o ribsync.o INCLUDES=arp.h cleanup.h cli.h config.h counters.h ether.h event.h icmp.h -INCLUDES+=if.h inet.h ip.h net.h netmap.h radix.h util.h +INCLUDES+=if.h inet.h ip.h net.h netmap.h radix.h util.h ribsync.h LDFLAGS=-L/usr/local/lib -levent -lutil -lucl CCFLAGS=-O2 -fPIC -g -Wall -Wshadow -Wcast-qual -Wcast-align -Wwrite-strings diff --git a/netmap-fwd.c b/netmap-fwd.c index 0cf53ff..1038934 100644 --- a/netmap-fwd.c +++ b/netmap-fwd.c @@ -46,6 +46,7 @@ #include "event.h" #include "if.h" #include "inet.h" +#include "ribsync.h" #include "util.h" #ifndef PREFIX @@ -129,6 +130,7 @@ main(int argc, char **argv) printf("error: cannot initialize the inet data structures.\n"); exit(1); } + ribsync_init(); while (argc > 0) { ifn = argv[0]; @@ -161,6 +163,12 @@ main(int argc, char **argv) cleanup(); exit(1); } + if (ribsync_open() == -1) { + printf("cannot open the kernel PF_ROUTE socket.\n"); + cleanup(); + exit(1); + } + event_base_dispatch(ev_get_base()); cleanup(); diff --git a/ribsync.c b/ribsync.c new file mode 100644 index 0000000..f454051 --- /dev/null +++ b/ribsync.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "cleanup.h" +#include "cli.h" +#include "event.h" +#include "inet.h" +#include "ribsync.h" + +union rtsocket_msg { + char buf[1024]; + struct rt_msghdr rtm; + struct if_msghdr ifm; + struct ifa_msghdr ifam; + struct if_announcemsghdr ifann; +}; + +static int +ribsync_cli_stats(struct cli *cli, struct cli_args *args) +{ + const char *p; + + if (1 == args->args) { + p = "RIBSYNC STATISTICS\n"; + if (cli_obuf_append(cli, p, strlen(p)) == -1) + return (-1); + } + + return (0); +} + +int +ribsync_init(void){ + /* Register the ribsync cli command. */ + cli_cmd_add("ribsync", "ribsync - monitors kernel routing table\n", ribsync_cli_stats, NULL); + + // cleanup_add(ribsync_cleanup, NULL); + return 0; +} + +struct sockaddr_route { + struct sockaddr_in route_dst; + struct sockaddr_in route_mask; + struct sockaddr_in route_gw; + int route_flasgs; +}; + +static struct sockaddr_route +parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, size_t ppos, int rt_flags) +{ + size_t i=0; + int maskvec[] = {RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_GENMASK, RTA_IFP, RTA_IFA, RTA_AUTHOR, RTA_BRD}; + + struct sockaddr_route rt_addr; + while (ppos < len && i < sizeof(maskvec)/sizeof(maskvec[0])) { + + if (addrs_mask & maskvec[i]) { + const struct sockaddr *sa = (const struct sockaddr *)((const char *)msg_data + ppos); + + if ( maskvec[i] == RTA_DST) { + rt_addr.route_dst = *(const struct sockaddr_in*)sa; + + }else if ( maskvec[i] == RTA_GATEWAY) { + rt_addr.route_gw = *(const struct sockaddr_in*)sa; + + }else if ( maskvec[i] == RTA_NETMASK) { + rt_addr.route_mask = *(const struct sockaddr_in*)sa; + } + + // jump to next socketaddr stuct + ppos += sa->sa_len; + } + i++; + } + + printf("R:%s", inet_ntoa(rt_addr.route_dst.sin_addr)); + printf("/%s", inet_ntoa(rt_addr.route_mask.sin_addr)); + printf("->%s\n", inet_ntoa(rt_addr.route_gw.sin_addr)); + + return rt_addr; +} + +static void +ribsync_ev_data(evutil_socket_t socket, short event, void *data) { + + union rtsocket_msg recv_data; + struct sockaddr_route rt_addr; + + recv_data.rtm.rtm_msglen = 4; + + int r1 = recv(socket, &recv_data, sizeof(recv_data), 0); + if (-1 == r1) { + printf("[EE] pf_socket recv error"); + return; + } + + if (r1 < 4 || r1 < recv_data.rtm.rtm_msglen) { + printf("SHORT READ (have %d want %hu), SKIPPING.\n", r1, recv_data.rtm.rtm_msglen); + return; + } + + if ( 0 != recv_data.rtm.rtm_errno ) { + printf("Route message contains errors(%d), SKIPPING.\n", recv_data.rtm.rtm_errno); + return; + } + + printf("Received %d bytes. Version %d, Type %#x, Len %d, Err: %d\n", r1, + recv_data.rtm.rtm_version, + recv_data.rtm.rtm_type, + recv_data.rtm.rtm_msglen, + recv_data.rtm.rtm_errno + ); + + switch (recv_data.rtm.rtm_type) { + case RTM_ADD: + printf("Add route: "); + rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + break; + case RTM_DELETE: + printf("Remove route: "); + rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + break; + // case RTM_CHANGE: + // case RTM_NEWADDR: + // case RTM_DELADDR: + // case RTM_IFINFO: + // case RTM_IFANNOUNCE: + } +} + +int ribsync_open(void) { + + int rt_socket = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC); + if (-1 == rt_socket) { + return -1; + } + + fd_set fds; + FD_ZERO(&fds); + FD_SET(rt_socket, &fds); + + // Setup the event for pf_route socket. + struct event *ev; + ev = event_new(ev_get_base(), rt_socket, EV_READ | EV_PERSIST, ribsync_ev_data, NULL); + event_add(ev, NULL); + + return 0; +} \ No newline at end of file diff --git a/ribsync.h b/ribsync.h new file mode 100644 index 0000000..65e491d --- /dev/null +++ b/ribsync.h @@ -0,0 +1,6 @@ +#ifndef __RIB_SYNC__ +#define __RIB_SYNC__ +int ribsync_init(void); +int ribsync_open(void); + +#endif \ No newline at end of file From e0da886e98fd45be01c641f1682b4621ab1203b9 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 16:04:49 +0200 Subject: [PATCH 2/6] Fix sockaddr align when parsing rt_sock data. There is some 64bit(long) alignment involved here, have no ideea where it is documented I have used dnsmasq code: https://github.com/liquidm/dnsmasq/blob/8b285b8cd2e289e532b106a797d540f016eb7ab0/src/bpf.c#L436 --- ribsync.c | 13 ++++++++++--- ribsync.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ribsync.c b/ribsync.c index f454051..bdaa18d 100644 --- a/ribsync.c +++ b/ribsync.c @@ -78,8 +78,15 @@ parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, si rt_addr.route_mask = *(const struct sockaddr_in*)sa; } - // jump to next socketaddr stuct - ppos += sa->sa_len; + // jump to next socketaddr struct + size_t diff = sa->sa_len; + if (!diff) { + diff = sizeof(long); + } + ppos += diff; + if (diff & (sizeof(long) - 1)) { + ppos += sizeof(long) - (diff & (sizeof(long) - 1)); + } } i++; } @@ -156,4 +163,4 @@ int ribsync_open(void) { event_add(ev, NULL); return 0; -} \ No newline at end of file +} diff --git a/ribsync.h b/ribsync.h index 65e491d..2354700 100644 --- a/ribsync.h +++ b/ribsync.h @@ -3,4 +3,4 @@ int ribsync_init(void); int ribsync_open(void); -#endif \ No newline at end of file +#endif From f69b6bb8c86eac3383b153924ca0290c7cc01d81 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 18:36:40 +0200 Subject: [PATCH 3/6] Added two public route add/del functions to the inet module. Could not use existing route(add/del) functions because I would have to expose internal state outside of inet module and I don't want to do that. I basically used inet_cli_route_(add/del) and took out the cli parsing stuff. --- inet.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ inet.h | 3 ++ ribsync.c | 36 +++++++++++++++++---- 3 files changed, 127 insertions(+), 7 deletions(-) diff --git a/inet.c b/inet.c index 99b3d88..4c0bc24 100644 --- a/inet.c +++ b/inet.c @@ -680,3 +680,98 @@ inet_cli_route(struct cli *cli, struct cli_args *args) return (err); } + +/* + * Calculate prefix length of a netmask passed as a sockaddr ipv4 structure + */ +static int +calc_masklen_ipv4(struct sockaddr_in *addr) +{ + int prefixLength = 0; + uint32_t m = ntohl( *(uint32_t*) &addr->sin_addr); + while (m & 0x80000000) { + prefixLength++; + m = m << 1; + } + + if(prefixLength == 0) { + // This is a /32 - host + return 32; + } + return prefixLength; +} + +int +inet_route_add_ipv4( + struct sockaddr_in addr_net, + struct sockaddr_in addr_mask, + struct sockaddr_in addr_gw, + int flags) +{ + struct inet *inet; + inet = &g_inet; + struct inet_rtentry *rt; + struct radix_node *rn; + + if (calc_masklen_ipv4(&addr_mask) < 32) { + addr_net.sin_addr.s_addr &= addr_mask.sin_addr.s_addr; + } + + rn = inet->rnh->rnh_matchaddr( &addr_gw, inet->rnh); + if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) { + if(rn == NULL) { + printf("\n[DBG]-add- rn is null\n"); + } + printf("\n[WARN]-add- Network is unreachable\n"); + return -1; + } + + rt = (struct inet_rtentry *)rn; + if (inet_addroute(&addr_net, &addr_gw, &addr_mask, flags, rt->nmif) != 0) { + printf ("\n[WARN]-add- Cannot add route..(allready in table?)\n"); + return -1; + } + printf("\t -OK-\n"); + return 0; +} + +int +inet_route_del_ipv4( + struct sockaddr_in addr_net, + struct sockaddr_in addr_mask, + struct sockaddr_in addr_gw, int flags) +{ + + struct inet *inet; + inet = &g_inet; + struct inet_rtentry *rt; + struct radix_node *rn; + + if ( calc_masklen_ipv4(&addr_mask) < 32) { + addr_net.sin_addr.s_addr &= addr_mask.sin_addr.s_addr; + } + + rn = inet->rnh->rnh_lookup(&addr_net, &addr_mask, inet->rnh); + if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0){ + printf( "\n[WARN]-del- Route has not been found\n"); + return -1; + } + + rt = (struct inet_rtentry *)rn; + /* Check if the gateway address matches. */ + if ( rt->gw.sin_addr.s_addr != addr_gw.sin_addr.s_addr) { + printf( "\n[WARN]-del- Route has not been found (gw not match)\n\n"); + return -1; + } + + rn = inet->rnh->rnh_deladdr(&addr_net, &addr_mask, inet->rnh); + if (rn == NULL) { + printf( "\n[WARN]-del- Route could not be deleted\n"); + return -1; + } + + rt = (struct inet_rtentry *)rn; + free(rt); + printf("\t -OK-\n"); + return 0; +} diff --git a/inet.h b/inet.h index 3826bca..42704a7 100644 --- a/inet.h +++ b/inet.h @@ -66,3 +66,6 @@ void inet_addr_if_free(struct nm_if *); struct inet_addr *inet_get_if_addr(struct nm_if *); struct inet_addr *inet_our_addr(struct in_addr *); struct inet_addr *inet_our_broadcast(struct in_addr *); + +int inet_route_add_ipv4( struct sockaddr_in, struct sockaddr_in, struct sockaddr_in, int); +int inet_route_del_ipv4( struct sockaddr_in, struct sockaddr_in, struct sockaddr_in, int); diff --git a/ribsync.c b/ribsync.c index bdaa18d..0d22f78 100644 --- a/ribsync.c +++ b/ribsync.c @@ -49,6 +49,13 @@ ribsync_init(void){ return 0; } +void +dump_sockaddr_in(struct sockaddr_in *addr) +{ + printf(" sin_family: %d \n",addr->sin_family); + printf(" sin_addr: %s\n",inet_ntoa(addr->sin_addr)); +} + struct sockaddr_route { struct sockaddr_in route_dst; struct sockaddr_in route_mask; @@ -91,9 +98,9 @@ parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, si i++; } - printf("R:%s", inet_ntoa(rt_addr.route_dst.sin_addr)); + printf("%s", inet_ntoa(rt_addr.route_dst.sin_addr)); printf("/%s", inet_ntoa(rt_addr.route_mask.sin_addr)); - printf("->%s\n", inet_ntoa(rt_addr.route_gw.sin_addr)); + printf(" -> %s", inet_ntoa(rt_addr.route_gw.sin_addr)); return rt_addr; } @@ -122,21 +129,24 @@ ribsync_ev_data(evutil_socket_t socket, short event, void *data) { return; } - printf("Received %d bytes. Version %d, Type %#x, Len %d, Err: %d\n", r1, + /*printf("Received %d bytes. Version %d, Type %#x, Len %d\n", r1, recv_data.rtm.rtm_version, recv_data.rtm.rtm_type, - recv_data.rtm.rtm_msglen, - recv_data.rtm.rtm_errno - ); + recv_data.rtm.rtm_msglen + );*/ + int rt_status=0; switch (recv_data.rtm.rtm_type) { case RTM_ADD: printf("Add route: "); rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + rt_status = inet_route_add_ipv4(rt_addr.route_dst, rt_addr.route_mask, rt_addr.route_gw, recv_data.rtm.rtm_flags); break; case RTM_DELETE: - printf("Remove route: "); + printf("Del route: "); rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + rt_status = inet_route_del_ipv4(rt_addr.route_dst, rt_addr.route_mask, rt_addr.route_gw, recv_data.rtm.rtm_flags); + break; // case RTM_CHANGE: // case RTM_NEWADDR: @@ -144,6 +154,18 @@ ribsync_ev_data(evutil_socket_t socket, short event, void *data) { // case RTM_IFINFO: // case RTM_IFANNOUNCE: } + if( -1 == rt_status) { + printf("[DBG] Route dst\n"); + dump_sockaddr_in(&rt_addr.route_dst); + + printf("[DBG] Route netmask\n"); + dump_sockaddr_in(&rt_addr.route_mask); + + printf("[DBG] Route gateway\n"); + dump_sockaddr_in(&rt_addr.route_gw); + printf("\n"); + } + fflush(stdout); } int ribsync_open(void) { From 73b9e56d9cce0eb39f5460caf2d2c71a5a5e0123 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 19:04:51 +0200 Subject: [PATCH 4/6] Increase cli buffer sizes so it can print full bgp table This will increase mem usage (~200mb) so this should be just a temp. workaround --- cli.c | 2 +- util.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cli.c b/cli.c index c8d4553..b258a47 100644 --- a/cli.c +++ b/cli.c @@ -48,7 +48,7 @@ #include "util.h" #define CMDMAXSZ 64 -#define MAXCLIBUF 4096 +#define MAXCLIBUF 16777216 /* CLI client data. */ struct cli { diff --git a/util.c b/util.c index 68c54f2..5bde2c6 100644 --- a/util.c +++ b/util.c @@ -37,7 +37,9 @@ #include "util.h" -#define MAXBUFSZ (BUFSZ * 1024) +// default was 256 * 1024 which is enough fo 70 routes +// 256 * 131072 should be enough for more than ~1.1 mil +#define MAXBUFSZ (BUFSZ * 131072) int dprintf(const char *fmt, ...) From 67fc85c119a3099ca20bd6540967f461781a8d59 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 19:07:26 +0200 Subject: [PATCH 5/6] resid could be greather than MAXCLIBUF, so we should handle that also --- cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli.c b/cli.c index b258a47..6f7859f 100644 --- a/cli.c +++ b/cli.c @@ -500,7 +500,7 @@ cli_ev_read(struct cli *cli) int more; ssize_t i, len; - if (cli->resid == MAXCLIBUF) { + if (cli->resid >= MAXCLIBUF) { DPRINTF( "dropping cli connection - unsupported cli command (%d)\n", cli->fd); From e2b3f9f6b1d8311ef4293b0ed276250f71c61c64 Mon Sep 17 00:00:00 2001 From: Zollner Robert Date: Sat, 13 Feb 2016 19:34:02 +0200 Subject: [PATCH 6/6] Some style formatting to be more in line with the project --- ribsync.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ribsync.c b/ribsync.c index 0d22f78..7462471 100644 --- a/ribsync.c +++ b/ribsync.c @@ -41,7 +41,8 @@ ribsync_cli_stats(struct cli *cli, struct cli_args *args) } int -ribsync_init(void){ +ribsync_init(void) +{ /* Register the ribsync cli command. */ cli_cmd_add("ribsync", "ribsync - monitors kernel routing table\n", ribsync_cli_stats, NULL); @@ -64,7 +65,7 @@ struct sockaddr_route { }; static struct sockaddr_route -parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, size_t ppos, int rt_flags) +parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, size_t ppos) { size_t i=0; int maskvec[] = {RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_GENMASK, RTA_IFP, RTA_IFA, RTA_AUTHOR, RTA_BRD}; @@ -106,8 +107,8 @@ parse_rt_addr(const union rtsocket_msg *msg_data, size_t len, int addrs_mask, si } static void -ribsync_ev_data(evutil_socket_t socket, short event, void *data) { - +ribsync_ev_data(evutil_socket_t socket, short event, void *data) +{ union rtsocket_msg recv_data; struct sockaddr_route rt_addr; @@ -139,12 +140,12 @@ ribsync_ev_data(evutil_socket_t socket, short event, void *data) { switch (recv_data.rtm.rtm_type) { case RTM_ADD: printf("Add route: "); - rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr)); rt_status = inet_route_add_ipv4(rt_addr.route_dst, rt_addr.route_mask, rt_addr.route_gw, recv_data.rtm.rtm_flags); break; case RTM_DELETE: printf("Del route: "); - rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr), recv_data.rtm.rtm_flags); + rt_addr = parse_rt_addr(&recv_data, r1,recv_data.rtm.rtm_addrs, sizeof(struct rt_msghdr)); rt_status = inet_route_del_ipv4(rt_addr.route_dst, rt_addr.route_mask, rt_addr.route_gw, recv_data.rtm.rtm_flags); break; @@ -168,8 +169,8 @@ ribsync_ev_data(evutil_socket_t socket, short event, void *data) { fflush(stdout); } -int ribsync_open(void) { - +int ribsync_open(void) +{ int rt_socket = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC); if (-1 == rt_socket) { return -1;