From 72f3642bc9d0e972f6b42b33566cc368fad7f13e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 20:24:13 +0000 Subject: [PATCH 001/148] Force dwarf-2 for now. It's dumb and it's been fixed in subsequent versions of stable/10 and head, but not on what I'm running. dwarf-4 doesn't get recognised by some of the base tools. --- lib/libuinet/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index eb68988..c0dcf80 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -2,7 +2,7 @@ # Derived from FreeBSD auto-generated kernel Makefile and # machine-specific Makefile templates # -DEBUG=-ggdb -O0 +DEBUG=-gdwarf-2 -O0 TOPDIR?=${CURDIR}/../.. S=${TOPDIR}/sys From ea2cd41d3cfc4da49c5ce040d6f3884d402150f1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 20:34:57 +0000 Subject: [PATCH 002/148] Initial hacked up sysctl unix socket. It's incomplete and totally barnjacked. But right now I just want to verify that the thread is created and the socket is correctly created. --- bin/passive/Makefile | 4 +- bin/passive/passive.c | 10 +++ bin/passive/sysctl_api.c | 128 +++++++++++++++++++++++++++++++++++++++ bin/passive/sysctl_api.h | 12 ++++ 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 bin/passive/sysctl_api.c create mode 100644 bin/passive/sysctl_api.h diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 372f659..477ba2c 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -1,9 +1,9 @@ - - TOPDIR?=${CURDIR}/../.. PROG=passive +SRCS=passive.c sysctl_api.c + UINET_LIBS=uinet CFLAGS= -I${TOPDIR}/lib/libev diff --git a/bin/passive/passive.c b/bin/passive/passive.c index 62d2f15..73c80d1 100644 --- a/bin/passive/passive.c +++ b/bin/passive/passive.c @@ -30,10 +30,12 @@ #include #include #include +#include #include #include "uinet_api.h" +#include "sysctl_api.h" #define EV_STANDALONE 1 #define EV_UINET_ENABLE 1 @@ -471,6 +473,7 @@ int main (int argc, char **argv) struct uinet_in_addr tmpinaddr; int ifnetmap_count = 0; int ifpcap_count = 0; + pthread_t sysctl_thr; for (i = 0; i < MAX_INTERFACES; i++) { interfaces[i].loop = NULL; @@ -689,6 +692,10 @@ int main (int argc, char **argv) interface_thread_start, &interfaces[i]); } + error = pthread_create(&sysctl_thr, NULL, passive_sysctl_listener, NULL); + if (error != 0) { + printf("Failed to bring up sysctl thread: %d\n", errno); + } for (i = 0; i < num_interfaces; i++) { if (0 == interfaces[i].thread_create_result) @@ -699,5 +706,8 @@ int main (int argc, char **argv) uinet_ifdestroy_byname(interfaces[i].alias); } + /* XXX only do this if we successfully started the thread! */ + pthread_join(sysctl_thr, NULL); + return (0); } diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c new file mode 100644 index 0000000..7f39498 --- /dev/null +++ b/bin/passive/sysctl_api.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 Adrian Chadd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "uinet_api.h" +#include "sysctl_api.h" + + +#define SYSCTL_BUF_LEN 131072 +#define SYSCTL_MAX_BUF_LEN 1048576 + +void * +passive_sysctl_listener(void *arg) +{ + int s, ns, r; + struct sockaddr_un sun; + char *rbuf, *wbuf; + + rbuf = malloc(SYSCTL_BUF_LEN); + if (rbuf == NULL) { + printf("%s: malloc failed: %d\n", __func__, errno); + return (NULL); + } + + bzero(&sun, sizeof(sun)); + strcpy(sun.sun_path, "/tmp/sysctl.sock"); + sun.sun_len = 0; + sun.sun_family = AF_UNIX; + + printf("sysctl_listener: starting listener on %s\n", sun.sun_path); + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + fprintf(stderr, "%s: socket failed: %d\n", __func__, errno); + return NULL; + } + + r = bind(s, (struct sockaddr *) &sun, sizeof(sun)); + if (r < 0) { + fprintf(stderr, "%s: bind failed: %d\n", __func__, errno); + return NULL; + } + + r = listen(s, 10); + if (r < 0) { + fprintf(stderr, "%s: listen failed: %d\n", __func__, errno); + return NULL; + } + + /* + * Yes, I could make this threaded or non-blocking.. + */ + for (;;) { + struct sockaddr_un sun_n; + socklen_t sl; + int len; + + ns = accept(s, (struct sockaddr *) &sun_n, &sl); + if (ns < 0) { + fprintf(stderr, "%s: accept failed: %d\n", __func__, errno); + continue; + } + + /* Read data - assume we can get it all in one hit */ + len = read(ns, rbuf, SYSCTL_BUF_LEN); + + /* + * If the read is less than the request header, then we + * just turf it for now. + */ + if (len < sizeof(struct sysctl_req_hdr)) { + fprintf(stderr, "%s: fd %d: len=%d, too short\n", __func__, ns, len); + goto next; + } + + /* + * Validate length fields and payload + */ + + /* + * Allocate response buffer + */ + + /* Issue sysctl */ + + /* Write data */ + +next: + /* Close */ + close(ns); + } + + return NULL; +} diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h new file mode 100644 index 0000000..2fa5ca6 --- /dev/null +++ b/bin/passive/sysctl_api.h @@ -0,0 +1,12 @@ +#ifndef __SYSCTL_API_H__ +#define __SYSCTL_API_H__ + +struct sysctl_req_hdr { + uint32_t sysctl_str_len; + uint32_t sysctl_dst_len; + uint32_t sysctl_src_len; +}; + +extern void * passive_sysctl_listener(void *arg); + +#endif From 47175cdd8a40bdd11625c3d4e92a35fa12b3df4f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 21:55:12 +0000 Subject: [PATCH 003/148] Paranoia? --- lib/libuinet/uinet_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 19d6430..2079158 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -439,7 +439,7 @@ uinet_soaccept(struct uinet_socket *listener, struct uinet_sockaddr **nam, struc struct socket *head = (struct socket *)listener; struct socket *so; struct sockaddr *sa = NULL; - int error; + int error = 0; if (nam) *nam = NULL; From c404a89d1b7cb62119e3987109831052d18c8600 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 21:55:19 +0000 Subject: [PATCH 004/148] Add in a hack to expose the general sysctlbyname API. --- lib/libuinet/api_include/uinet_config.h | 9 +++++++++ lib/libuinet/uinet_api.symlist | 1 + lib/libuinet/uinet_config_kernel.c | 12 +++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/api_include/uinet_config.h b/lib/libuinet/api_include/uinet_config.h index 75d336d..7b68d81 100644 --- a/lib/libuinet/api_include/uinet_config.h +++ b/lib/libuinet/api_include/uinet_config.h @@ -150,6 +150,15 @@ const char *uinet_ifgenericname(uinet_ifcookie_t cookie); */ int uinet_config_blackhole(uinet_blackhole_t action); +/* + * general sysctl interface. + * + * XXX doesn't belong here! + */ +int +uinet_sysctl(char *name, char *oldp, size_t *oldplen, + char *newp, size_t newplen, size_t *retval, int flags); + #ifdef __cplusplus } #endif diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index 346278a..e2e7b51 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -67,3 +67,4 @@ uinet_synfilter_deferral_get_cookie uinet_synfilter_getconninfo uinet_synfilter_getl2info uinet_synfilter_install +uinet_sysctl diff --git a/lib/libuinet/uinet_config_kernel.c b/lib/libuinet/uinet_config_kernel.c index 91798bd..bcfda27 100644 --- a/lib/libuinet/uinet_config_kernel.c +++ b/lib/libuinet/uinet_config_kernel.c @@ -30,7 +30,6 @@ #include "uinet_config.h" #include "uinet_config_internal.h" - int uinet_config_blackhole(uinet_blackhole_t action) { @@ -67,3 +66,14 @@ uinet_config_blackhole(uinet_blackhole_t action) &val, sizeof(int), NULL, 0); return (error); } + +int +uinet_sysctl(char *name, char *oldp, size_t *oldplen, + char *newp, size_t newplen, size_t *retval, int flags) +{ + int error; + + error = kernel_sysctlbyname(curthread, name, oldp, oldplen, + newp, newplen, retval, flags); + return (error); +} From 90f0da5cb5f9a3b0de8608f1305bb628735c60e0 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 21:55:42 +0000 Subject: [PATCH 005/148] Flesh out more of this totally untested code. --- bin/passive/sysctl_api.c | 117 +++++++++++++++++++++++++++++++++++++-- bin/passive/sysctl_api.h | 18 +++++- 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 7f39498..9a57e55 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -36,20 +36,23 @@ #include #include #include +#include #include "uinet_api.h" +#include "uinet_config.h" #include "sysctl_api.h" - #define SYSCTL_BUF_LEN 131072 #define SYSCTL_MAX_BUF_LEN 1048576 +#define SYSCTL_MAX_STR_LEN 1024 +#define SYSCTL_MAX_REQ_BUF_LEN 1048576 void * passive_sysctl_listener(void *arg) { int s, ns, r; struct sockaddr_un sun; - char *rbuf, *wbuf; + char *rbuf, *wbuf = NULL; rbuf = malloc(SYSCTL_BUF_LEN); if (rbuf == NULL) { @@ -88,6 +91,16 @@ passive_sysctl_listener(void *arg) struct sockaddr_un sun_n; socklen_t sl; int len; + struct sysctl_req_hdr *hdr; + char *sbuf = NULL; + char *req_str = NULL; + size_t wbuf_len = 0; + size_t sbuf_len = 0; + size_t rval; + int error; + struct sysctl_resp_hdr rhdr; + + bzero(&rhdr, sizeof(rhdr)); ns = accept(s, (struct sockaddr *) &sun_n, &sl); if (ns < 0) { @@ -107,19 +120,115 @@ passive_sysctl_listener(void *arg) goto next; } + hdr = (struct sysctl_req_hdr *) rbuf; + /* * Validate length fields and payload + * + * XXX TODO type, flags, strlen, srclen + */ + if (le32toh(hdr->sysctl_req_len) != len) { + fprintf(stderr, "%s: fd %d: req_len (%d) != len (%d)\n", + __func__, + ns, + le32toh(hdr->sysctl_req_len), + len); + } + if (le32toh(hdr->sysctl_req_len) != + le32toh(hdr->sysctl_str_len) + + le32toh(hdr->sysctl_src_len) + + le32toh(hdr->sysctl_dst_len) + + sizeof(struct sysctl_req_hdr)) { + fprintf(stderr, "%s: fd %d: length mismatch\n", + __func__, + ns); + goto next; + } + + if (le32toh(hdr->sysctl_dst_len) > SYSCTL_MAX_BUF_LEN) { + fprintf(stderr, "%s: fd %d: dst_len %d > %d\n", + __func__, + ns, + le32toh(hdr->sysctl_dst_len), + SYSCTL_MAX_BUF_LEN); + goto next; + } + + /* + * Populate the request string. */ + req_str = malloc(le32toh(hdr->sysctl_str_len) + 1); + if (req_str == NULL) { + fprintf(stderr, "%s; fd %d: malloc failed (req_str)\n", + __func__, + ns); + goto next; + } + memcpy(req_str, rbuf + sizeof(struct sysctl_req_hdr), + le32toh(hdr->sysctl_str_len)); + req_str[le32toh(hdr->sysctl_str_len)] = '\0'; /* - * Allocate response buffer + * If there's a request buffer, populate that. */ + if (le32toh(hdr->sysctl_src_len) > 0) { + sbuf = rbuf + le32toh(hdr->sysctl_src_len); + sbuf_len = le32toh(hdr->sysctl_src_len); + } + + /* + * Allocate response buffer if requested. + */ + if (le32toh(hdr->sysctl_dst_len) > 0) { + wbuf = malloc(le32toh(hdr->sysctl_dst_len)); + if (wbuf == NULL) { + fprintf(stderr, "%s: fd %d: malloc failed: %d\n", + __func__, + ns, + errno); + goto next; + } + wbuf_len = le32toh(hdr->sysctl_dst_len); + } /* Issue sysctl */ + fprintf(stderr, + "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", + __func__, + ns, + req_str, + le32toh(hdr->sysctl_src_len), + le32toh(hdr->sysctl_dst_len)); + + error = uinet_sysctl(req_str, + wbuf, &wbuf_len, + sbuf, sbuf_len, + &rval, + 0); + + fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%d, rval=%d\n", + __func__, + ns, + (int) error, + (int) wbuf_len, + (int) rval); + +#if 0 + /* + * XXX validate the response back from uinet_sysctl() + * is within bounds! + */ - /* Write data */ + /* Construct our response */ + rhdr.sysctl_resp_len = htole32(sizeof(struct sysctl_resp_hdr) + wbuf_len); +#endif next: + if (wbuf != NULL) + free(wbuf); + if (req_str != NULL) + free(req_str); + /* Close */ close(ns); } diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index 2fa5ca6..02e3cfe 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -2,9 +2,23 @@ #define __SYSCTL_API_H__ struct sysctl_req_hdr { + uint32_t sysctl_req_len; /* length of the whole payload */ + uint32_t sysctl_req_type; /* Type of the message */ + uint32_t sysctl_req_flags; /* Message flags */ + + /* This is the sysctl specific stuff */ uint32_t sysctl_str_len; - uint32_t sysctl_dst_len; - uint32_t sysctl_src_len; + uint32_t sysctl_dst_len; /* result (new) */ + uint32_t sysctl_src_len; /* request (old) */ +}; + +struct sysctl_resp_hdr { + uint32_t sysctl_resp_len; + uint32_t sysctl_resp_type; + + /* This is the sysctl specific stuff */ + uint32_t sysctl_dst_len; /* response buffer length */ + uint32_t sysctl_dst_errno; /* sysctl errno value */ }; extern void * passive_sysctl_listener(void *arg); From 8595c1383bec98c7d0b9e4344a4baae84fac34f3 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 23:43:42 +0000 Subject: [PATCH 006/148] * add a call to uinet_initialize_thread(); - it's required or things like curthread fail. * modify the IO loop to keep reading as required. It's goto hell and thus it must be rewritten. --- bin/passive/sysctl_api.c | 57 ++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 9a57e55..563edea 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -54,6 +54,8 @@ passive_sysctl_listener(void *arg) struct sockaddr_un sun; char *rbuf, *wbuf = NULL; + uinet_initialize_thread(); + rbuf = malloc(SYSCTL_BUF_LEN); if (rbuf == NULL) { printf("%s: malloc failed: %d\n", __func__, errno); @@ -99,6 +101,7 @@ passive_sysctl_listener(void *arg) size_t rval; int error; struct sysctl_resp_hdr rhdr; + int rlen = 0; bzero(&rhdr, sizeof(rhdr)); @@ -108,40 +111,60 @@ passive_sysctl_listener(void *arg) continue; } - /* Read data - assume we can get it all in one hit */ - len = read(ns, rbuf, SYSCTL_BUF_LEN); + /* XXX I hate gotos */ +readmore: + /* Read data */ + len = read(ns, rbuf + rlen, SYSCTL_BUF_LEN - rlen); + if (len <= 0) { + fprintf(stderr, "%s: fd %d: read returned %d, errno=%d\n", + __func__, + ns, + len, + errno); + goto next; + } + + rlen += len; /* - * If the read is less than the request header, then we - * just turf it for now. + * Not enough data? Keep reading. */ - if (len < sizeof(struct sysctl_req_hdr)) { - fprintf(stderr, "%s: fd %d: len=%d, too short\n", __func__, ns, len); - goto next; + if (rlen < sizeof(struct sysctl_req_hdr)) { + fprintf(stderr, "%s: fd %d: read %d btyes, rlen is now %d\n", + __func__, + ns, + len, + rlen); + goto readmore; } hdr = (struct sysctl_req_hdr *) rbuf; /* - * Validate length fields and payload - * - * XXX TODO type, flags, strlen, srclen + * Do we have enough data to cover the payload length? */ - if (le32toh(hdr->sysctl_req_len) != len) { - fprintf(stderr, "%s: fd %d: req_len (%d) != len (%d)\n", - __func__, - ns, - le32toh(hdr->sysctl_req_len), - len); + if (le32toh(hdr->sysctl_req_len) < rlen) { + goto readmore; } + + /* + * Ok, validate the various lengths. + */ + if (le32toh(hdr->sysctl_req_len) != le32toh(hdr->sysctl_str_len) + le32toh(hdr->sysctl_src_len) - + le32toh(hdr->sysctl_dst_len) + sizeof(struct sysctl_req_hdr)) { fprintf(stderr, "%s: fd %d: length mismatch\n", __func__, ns); + fprintf(stderr, "%s: fd %d: hdr_len=%d, req_len=%d, str_len=%d, src_len=%d\n", + __func__, + ns, + (int) sizeof(struct sysctl_req_hdr), + le32toh(hdr->sysctl_req_len), + le32toh(hdr->sysctl_str_len), + le32toh(hdr->sysctl_src_len)); goto next; } From 5e7cf697915600ca44e176d801b69f50bf0ca3b3 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 15 Apr 2014 23:44:50 +0000 Subject: [PATCH 007/148] Start fleshing out a test sysctl program. --- bin/sysctl/Makefile | 14 +++++++ bin/sysctl/sysctl.c | 96 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 bin/sysctl/Makefile create mode 100644 bin/sysctl/sysctl.c diff --git a/bin/sysctl/Makefile b/bin/sysctl/Makefile new file mode 100644 index 0000000..68cec67 --- /dev/null +++ b/bin/sysctl/Makefile @@ -0,0 +1,14 @@ +TOPDIR?=${CURDIR}/../.. + +PROG=sysctl + +SRCS=sysctl.c + +UINET_LIBS=uinet + +CFLAGS= -I${TOPDIR}/lib/libev -I../passive/ +LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap + +DEBUG_FLAGS=-g -O0 + +include ${TOPDIR}/mk/prog.mk diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c new file mode 100644 index 0000000..fc911fc --- /dev/null +++ b/bin/sysctl/sysctl.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "sysctl_api.h" + +int +main(int argc, char *argv[]) +{ + + int s; + struct sockaddr_un sun; + int r; + struct sysctl_req_hdr req_hdr; + struct sysctl_resp_hdr *resp_hdr; + char *req_buf, *resp_buf; + char *req_str; + size_t reqbuf_len, respbuf_len; + + /* Fake up a request structure for now */ + req_str = strdup("net.inet.tcp.stats"); + reqbuf_len = 0; + respbuf_len = 1048576; + req_buf = NULL; + resp_buf = NULL; + + /* Ok, allocate req/resp buffers as required */ + if (reqbuf_len > 0) { + req_buf = calloc(1, reqbuf_len); + if (req_buf == NULL) + err(1, "calloc"); + } + + if (respbuf_len > 0) { + resp_buf = calloc(1, respbuf_len); + if (resp_buf == NULL) + err(1, "calloc"); + } + + /* Connect to the destination socket */ + bzero(&sun, sizeof(sun)); + + strcpy(sun.sun_path, "/tmp/sysctl.sock"); + sun.sun_len = 0; + sun.sun_family = AF_UNIX; + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + err(1, "socket"); + } + + r = connect(s, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)); + if (r < 0) { + err(1, "connect"); + } + + /* Craft request header */ + bzero(&req_hdr, sizeof(req_hdr)); + req_hdr.sysctl_req_len = htole32(sizeof(req_hdr) + strlen(req_str) + reqbuf_len); + req_hdr.sysctl_req_type = 0; /* XXX */ + req_hdr.sysctl_req_flags = 0; /* XXX */ + req_hdr.sysctl_str_len = htole32(strlen(req_str)); + req_hdr.sysctl_dst_len = htole32(respbuf_len); + req_hdr.sysctl_src_len = htole32(reqbuf_len); + + /* Send request */ + r = write(s, &req_hdr, sizeof(req_hdr)); + if (r != sizeof(req_hdr)) { + err(1, "write (hdr)"); + } + r = write(s, req_str, strlen(req_str)); + if (r != strlen(req_str)) { + err(1, "write (req_str)"); + } + r = write(s, req_buf, reqbuf_len); + if (r != reqbuf_len) { + err(1, "write (req_buf)"); + } + + /* Read response */ + + /* Done */ + close(s); + + exit(0); +} From 5d9854237d9f417aa6cc86a1d04433b9a668ce4c Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 18:53:44 +0000 Subject: [PATCH 008/148] * Add an unused flags header. * comments. --- bin/passive/sysctl_api.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index 02e3cfe..749933a 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -10,15 +10,22 @@ struct sysctl_req_hdr { uint32_t sysctl_str_len; uint32_t sysctl_dst_len; /* result (new) */ uint32_t sysctl_src_len; /* request (old) */ + + /* sysctl string follows, non-NUL terminated */ + + /* srcbuf follows, if srclen != 0 */ }; struct sysctl_resp_hdr { uint32_t sysctl_resp_len; uint32_t sysctl_resp_type; + uint32_t sysctl_resp_flags; /* This is the sysctl specific stuff */ uint32_t sysctl_dst_len; /* response buffer length */ uint32_t sysctl_dst_errno; /* sysctl errno value */ + + /* Response follows, if sysctl_dst_len != 0 */ }; extern void * passive_sysctl_listener(void *arg); From e4e76b99de856778c8309651282097ff4113bbfc Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 18:53:52 +0000 Subject: [PATCH 009/148] If we suceeeded in the call, write out a response. --- bin/passive/sysctl_api.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 563edea..bf84521 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -236,15 +236,29 @@ passive_sysctl_listener(void *arg) (int) wbuf_len, (int) rval); -#if 0 /* - * XXX validate the response back from uinet_sysctl() - * is within bounds! + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. */ /* Construct our response */ rhdr.sysctl_resp_len = htole32(sizeof(struct sysctl_resp_hdr) + wbuf_len); -#endif + rhdr.sysctl_resp_type = 0; /* XXX */ + rhdr.sysctl_resp_flags = 0; /* XXX */ + + if (errno == 0) + rhdr.sysctl_dst_len = htole32(rval); + else + rhdr.sysctl_dst_len = 0; + rhdr.sysctl_dst_errno = error; + + write(ns, &rhdr, sizeof(rhdr)); + if (wbuf_len > 0) { + write(ns, wbuf, wbuf_len); + } + + /* Done! */ next: if (wbuf != NULL) From f52004a421ea9372d94a47b8c1dead31de4bbbd4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 18:54:09 +0000 Subject: [PATCH 010/148] Handle the response from the server. --- bin/sysctl/sysctl.c | 82 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index fc911fc..ee4dc15 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -23,29 +23,39 @@ main(int argc, char *argv[]) int r; struct sysctl_req_hdr req_hdr; struct sysctl_resp_hdr *resp_hdr; - char *req_buf, *resp_buf; + char *req_buf, *read_buf; char *req_str; size_t reqbuf_len, respbuf_len; + size_t readbuf_len; + size_t read_ofs = 0; /* Fake up a request structure for now */ req_str = strdup("net.inet.tcp.stats"); reqbuf_len = 0; respbuf_len = 1048576; req_buf = NULL; - resp_buf = NULL; + read_buf = NULL; - /* Ok, allocate req/resp buffers as required */ + /* Ok, allocate request buffer */ if (reqbuf_len > 0) { req_buf = calloc(1, reqbuf_len); if (req_buf == NULL) err(1, "calloc"); } - if (respbuf_len > 0) { - resp_buf = calloc(1, respbuf_len); - if (resp_buf == NULL) - err(1, "calloc"); - } + /* + * Calculate the readbuf_len. It's the combination + * of the header size and the response payload. + */ + readbuf_len = respbuf_len + sizeof(struct sysctl_resp_hdr); + + /* + * Allocate the response buffer. This includes the + * response header and response payload if required. + */ + read_buf = calloc(1, readbuf_len); + if (read_buf == NULL) + err(1, "calloc"); /* Connect to the destination socket */ bzero(&sun, sizeof(sun)); @@ -88,6 +98,62 @@ main(int argc, char *argv[]) } /* Read response */ + while (read_ofs < readbuf_len) { + /* Don't try to read if we have a full buffer */ + if (readbuf_len - read_ofs <= 0) + break; + r = read(s, read_buf + read_ofs, readbuf_len - read_ofs); + if (r < 0) { + err(1, "read"); + } if (r == 0) { + fprintf(stderr, "%s: read early EOF\n", __func__); + break; + } + + read_ofs += r; + + /* if we don't have enough data for the header, continue */ + if (read_ofs < sizeof(struct sysctl_resp_hdr)) { + continue; + } + + /* Grab the response header */ + resp_hdr = (struct sysctl_resp_hdr *) read_buf; + + /* + * Is the response length greater than respbuf_len? + * Then the response is too large. Naughty server. + */ + if (le32toh(resp_hdr->sysctl_resp_len) > + respbuf_len + sizeof(struct sysctl_resp_hdr)) { + fprintf(stderr, "%s: resp_len (%d) is too long!\n", + __func__, + le32toh(resp_hdr->sysctl_resp_len)); + break; + } + + /* Do we have enough data to match the response length? */ + if (read_ofs < le32toh(resp_hdr->sysctl_resp_len)) + continue; + + /* We have enough data - woo! More sanity checks! */ + + /* + * Does the response buffer length exceed what we allocated? + * again, too big a response; bad coder. + */ + + if (le32toh(resp_hdr->sysctl_dst_len) > respbuf_len) { + fprintf(stderr, "%s: dst_len (%d) is too long!\n", + __func__, + le32toh(resp_hdr->sysctl_dst_len)); + break; + } + printf("%s: received response: errno=%d, %d bytes\n", + __func__, + le32toh(resp_hdr->sysctl_dst_errno), + le32toh(resp_hdr->sysctl_dst_len)); + } /* Done */ close(s); From cee453e032325feaf0550d86b5c4ab8d7e5c4a47 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 20:26:21 +0000 Subject: [PATCH 011/148] * Refactor out the string sysctl fetch code into a separate routine. * start fleshing out what the multiple request type thing will look like. --- bin/passive/sysctl_api.c | 284 ++++++++++++++++++++++----------------- bin/passive/sysctl_api.h | 6 + 2 files changed, 166 insertions(+), 124 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index bf84521..18e0986 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -47,12 +47,163 @@ #define SYSCTL_MAX_STR_LEN 1024 #define SYSCTL_MAX_REQ_BUF_LEN 1048576 +/* + * Handle sysctl string type requests. + * + * Returns 1 if the connection should stay open; 0 if + * not. + */ +static int +passive_sysctl_reqtype_str(int ns, char *buf, int len) +{ + struct sysctl_req_hdr *hdr; + int retval = 0; + char *sbuf = NULL; + char *req_str = NULL; + size_t wbuf_len = 0; + size_t sbuf_len = 0; + size_t rval; + int error; + struct sysctl_resp_hdr rhdr; + char *wbuf = NULL; + + /* Request header; zero response header */ + hdr = (struct sysctl_req_hdr *) buf; + bzero(&rhdr, sizeof(rhdr)); + + /* + * Validate the various lengths. + */ + + if (le32toh(hdr->sysctl_req_len) != + le32toh(hdr->sysctl_str_len) + + le32toh(hdr->sysctl_src_len) + + sizeof(struct sysctl_req_hdr)) { + fprintf(stderr, "%s: fd %d: length mismatch\n", + __func__, + ns); + fprintf(stderr, "%s: fd %d: hdr_len=%d, req_len=%d, str_len=%d, src_len=%d\n", + __func__, + ns, + (int) sizeof(struct sysctl_req_hdr), + le32toh(hdr->sysctl_req_len), + le32toh(hdr->sysctl_str_len), + le32toh(hdr->sysctl_src_len)); + retval = 0; + goto finish; + } + + if (le32toh(hdr->sysctl_dst_len) > SYSCTL_MAX_BUF_LEN) { + fprintf(stderr, "%s: fd %d: dst_len %d > %d\n", + __func__, + ns, + le32toh(hdr->sysctl_dst_len), + SYSCTL_MAX_BUF_LEN); + retval = 0; + goto finish; + } + + /* + * Populate the request string. + */ + req_str = malloc(le32toh(hdr->sysctl_str_len) + 1); + if (req_str == NULL) { + fprintf(stderr, "%s; fd %d: malloc failed (req_str)\n", + __func__, + ns); + retval = 0; + goto finish; + } + + memcpy(req_str, buf + sizeof(struct sysctl_req_hdr), + le32toh(hdr->sysctl_str_len)); + req_str[le32toh(hdr->sysctl_str_len)] = '\0'; + + /* + * If there's a request buffer, populate that. + */ + if (le32toh(hdr->sysctl_src_len) > 0) { + sbuf = buf + le32toh(hdr->sysctl_src_len); + sbuf_len = le32toh(hdr->sysctl_src_len); + } + + /* + * Allocate response buffer if requested. + */ + if (le32toh(hdr->sysctl_dst_len) > 0) { + wbuf = malloc(le32toh(hdr->sysctl_dst_len)); + if (wbuf == NULL) { + fprintf(stderr, "%s: fd %d: malloc failed: %d\n", + __func__, + ns, + errno); + retval = 0; + goto finish; + } + wbuf_len = le32toh(hdr->sysctl_dst_len); + } + + /* Issue sysctl */ + fprintf(stderr, + "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", + __func__, + ns, + req_str, + le32toh(hdr->sysctl_src_len), + le32toh(hdr->sysctl_dst_len)); + + error = uinet_sysctl(req_str, + wbuf, &wbuf_len, + sbuf, sbuf_len, + &rval, + 0); + + fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%d, rval=%d\n", + __func__, + ns, + (int) error, + (int) wbuf_len, + (int) rval); + + /* + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. + */ + + /* Construct our response */ + rhdr.sysctl_resp_len = htole32(sizeof(struct sysctl_resp_hdr) + wbuf_len); + rhdr.sysctl_resp_type = 0; /* XXX */ + rhdr.sysctl_resp_flags = 0; /* XXX */ + + if (errno == 0) + rhdr.sysctl_dst_len = htole32(rval); + else + rhdr.sysctl_dst_len = 0; + rhdr.sysctl_dst_errno = error; + + write(ns, &rhdr, sizeof(rhdr)); + if (wbuf_len > 0) { + write(ns, wbuf, wbuf_len); + } + + /* Done! */ + retval = 1; + +finish: + if (req_str != NULL) + free(req_str); + if (wbuf != NULL) + free(wbuf); + return (retval); +} + void * passive_sysctl_listener(void *arg) { int s, ns, r; struct sockaddr_un sun; - char *rbuf, *wbuf = NULL; + char *rbuf; uinet_initialize_thread(); @@ -94,17 +245,8 @@ passive_sysctl_listener(void *arg) socklen_t sl; int len; struct sysctl_req_hdr *hdr; - char *sbuf = NULL; - char *req_str = NULL; - size_t wbuf_len = 0; - size_t sbuf_len = 0; - size_t rval; - int error; - struct sysctl_resp_hdr rhdr; int rlen = 0; - bzero(&rhdr, sizeof(rhdr)); - ns = accept(s, (struct sockaddr *) &sun_n, &sl); if (ns < 0) { fprintf(stderr, "%s: accept failed: %d\n", __func__, errno); @@ -148,124 +290,18 @@ passive_sysctl_listener(void *arg) } /* - * Ok, validate the various lengths. + * We have the entire payload. Let's dispatch based + * on type. */ + (void) passive_sysctl_reqtype_str(ns, rbuf, rlen); - if (le32toh(hdr->sysctl_req_len) != - le32toh(hdr->sysctl_str_len) - + le32toh(hdr->sysctl_src_len) - + sizeof(struct sysctl_req_hdr)) { - fprintf(stderr, "%s: fd %d: length mismatch\n", - __func__, - ns); - fprintf(stderr, "%s: fd %d: hdr_len=%d, req_len=%d, str_len=%d, src_len=%d\n", - __func__, - ns, - (int) sizeof(struct sysctl_req_hdr), - le32toh(hdr->sysctl_req_len), - le32toh(hdr->sysctl_str_len), - le32toh(hdr->sysctl_src_len)); - goto next; - } - - if (le32toh(hdr->sysctl_dst_len) > SYSCTL_MAX_BUF_LEN) { - fprintf(stderr, "%s: fd %d: dst_len %d > %d\n", - __func__, - ns, - le32toh(hdr->sysctl_dst_len), - SYSCTL_MAX_BUF_LEN); - goto next; - } - - /* - * Populate the request string. + /* XXX until we've taught the loop about + * how to consume readbuf data right and + * have the remainder data be moved to the + * head of the queue, let's just close it for + * now. */ - req_str = malloc(le32toh(hdr->sysctl_str_len) + 1); - if (req_str == NULL) { - fprintf(stderr, "%s; fd %d: malloc failed (req_str)\n", - __func__, - ns); - goto next; - } - memcpy(req_str, rbuf + sizeof(struct sysctl_req_hdr), - le32toh(hdr->sysctl_str_len)); - req_str[le32toh(hdr->sysctl_str_len)] = '\0'; - - /* - * If there's a request buffer, populate that. - */ - if (le32toh(hdr->sysctl_src_len) > 0) { - sbuf = rbuf + le32toh(hdr->sysctl_src_len); - sbuf_len = le32toh(hdr->sysctl_src_len); - } - - /* - * Allocate response buffer if requested. - */ - if (le32toh(hdr->sysctl_dst_len) > 0) { - wbuf = malloc(le32toh(hdr->sysctl_dst_len)); - if (wbuf == NULL) { - fprintf(stderr, "%s: fd %d: malloc failed: %d\n", - __func__, - ns, - errno); - goto next; - } - wbuf_len = le32toh(hdr->sysctl_dst_len); - } - - /* Issue sysctl */ - fprintf(stderr, - "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", - __func__, - ns, - req_str, - le32toh(hdr->sysctl_src_len), - le32toh(hdr->sysctl_dst_len)); - - error = uinet_sysctl(req_str, - wbuf, &wbuf_len, - sbuf, sbuf_len, - &rval, - 0); - - fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%d, rval=%d\n", - __func__, - ns, - (int) error, - (int) wbuf_len, - (int) rval); - - /* - * XXX Validate the response back from uinet_sysctl() - * is within bounds for the response back to the - * client. - */ - - /* Construct our response */ - rhdr.sysctl_resp_len = htole32(sizeof(struct sysctl_resp_hdr) + wbuf_len); - rhdr.sysctl_resp_type = 0; /* XXX */ - rhdr.sysctl_resp_flags = 0; /* XXX */ - - if (errno == 0) - rhdr.sysctl_dst_len = htole32(rval); - else - rhdr.sysctl_dst_len = 0; - rhdr.sysctl_dst_errno = error; - - write(ns, &rhdr, sizeof(rhdr)); - if (wbuf_len > 0) { - write(ns, wbuf, wbuf_len); - } - - /* Done! */ - next: - if (wbuf != NULL) - free(wbuf); - if (req_str != NULL) - free(req_str); - /* Close */ close(ns); } diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index 749933a..9ddcc43 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -1,6 +1,12 @@ #ifndef __SYSCTL_API_H__ #define __SYSCTL_API_H__ +typedef enum { + SYSCTL_REQ_NONE = 0, + SYSCTL_REQ_STR = 1, + SYSCTL_REQ_OID = 2, +} sysctl_req_type_t; + struct sysctl_req_hdr { uint32_t sysctl_req_len; /* length of the whole payload */ uint32_t sysctl_req_type; /* Type of the message */ From c84933c4931ee788d6e151d49eead4e658717d04 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 20:36:20 +0000 Subject: [PATCH 012/148] It's time I added support for alternate test strings. --- bin/sysctl/sysctl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index ee4dc15..09fd10c 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -29,8 +29,13 @@ main(int argc, char *argv[]) size_t readbuf_len; size_t read_ofs = 0; + if (argc < 2) { + printf("Usage: sysctl \n"); + exit(127); + } + /* Fake up a request structure for now */ - req_str = strdup("net.inet.tcp.stats"); + req_str = strdup(argv[1]); reqbuf_len = 0; respbuf_len = 1048576; req_buf = NULL; From e12180d1cc499b74579c40f21f3a23f2ba627f58 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 20:37:18 +0000 Subject: [PATCH 013/148] Do some more length validation. --- bin/passive/sysctl_api.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 18e0986..4dc0535 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -255,6 +255,13 @@ passive_sysctl_listener(void *arg) /* XXX I hate gotos */ readmore: + + /* Do we have space left in our incoming buffer? */ + if (rlen >= SYSCTL_BUF_LEN) { + fprintf(stderr, "%s: fd %d: read too much?\n", __func__, ns); + goto next; + } + /* Read data */ len = read(ns, rbuf + rlen, SYSCTL_BUF_LEN - rlen); if (len <= 0) { @@ -266,13 +273,15 @@ passive_sysctl_listener(void *arg) goto next; } + /* Keep track of how much data is in the incoming buffer */ rlen += len; /* * Not enough data? Keep reading. */ if (rlen < sizeof(struct sysctl_req_hdr)) { - fprintf(stderr, "%s: fd %d: read %d btyes, rlen is now %d\n", + fprintf(stderr, + "%s: fd %d: read %d btyes, rlen is now %d\n", __func__, ns, len, @@ -282,6 +291,24 @@ passive_sysctl_listener(void *arg) hdr = (struct sysctl_req_hdr *) rbuf; + /* + * Validate sysctl_req_len so we don't try to read way more + * than we have buffer space for. + * + * We assume that we're only getting to this point + * when the header is at the beginning of the buffer; + * not that we're doing pipelined requests. + */ + if (le32toh(hdr->sysctl_req_len) >= SYSCTL_BUF_LEN) { + fprintf(stderr, + "%s: fd %d: req_len (%d) is too big (%d)\n", + __func__, + ns, + le32toh(hdr->sysctl_req_len), + SYSCTL_BUF_LEN); + goto next; + } + /* * Do we have enough data to cover the payload length? */ From e8e23b005562f7a65a6277197dbcd8c727c99289 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 20:48:32 +0000 Subject: [PATCH 014/148] Add in libnv from -HEAD. I just realised how much I don't want to be hand-crafting this crap and libnv should do it for me. Let's try it out. --- lib/libnv/Makefile | 161 ++++ lib/libnv/common_impl.h | 37 + lib/libnv/dnv.h | 106 +++ lib/libnv/dnvlist.c | 252 ++++++ lib/libnv/msgio.c | 390 +++++++++ lib/libnv/msgio.h | 50 ++ lib/libnv/nv.3 | 604 ++++++++++++++ lib/libnv/nv.h | 273 +++++++ lib/libnv/nv_impl.h | 130 +++ lib/libnv/nvlist.c | 1707 +++++++++++++++++++++++++++++++++++++++ lib/libnv/nvlist_impl.h | 43 + lib/libnv/nvpair.c | 1333 ++++++++++++++++++++++++++++++ lib/libnv/nvpair_impl.h | 58 ++ 13 files changed, 5144 insertions(+) create mode 100644 lib/libnv/Makefile create mode 100644 lib/libnv/common_impl.h create mode 100644 lib/libnv/dnv.h create mode 100644 lib/libnv/dnvlist.c create mode 100644 lib/libnv/msgio.c create mode 100644 lib/libnv/msgio.h create mode 100644 lib/libnv/nv.3 create mode 100644 lib/libnv/nv.h create mode 100644 lib/libnv/nv_impl.h create mode 100644 lib/libnv/nvlist.c create mode 100644 lib/libnv/nvlist_impl.h create mode 100644 lib/libnv/nvpair.c create mode 100644 lib/libnv/nvpair_impl.h diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile new file mode 100644 index 0000000..7c24ba1 --- /dev/null +++ b/lib/libnv/Makefile @@ -0,0 +1,161 @@ +# $FreeBSD: head/lib/libnv/Makefile 258065 2013-11-12 19:39:14Z pjd $ + +LIB= nv +SHLIBDIR?= /lib +SHLIB_MAJOR= 0 + +SRCS= dnvlist.c +SRCS+= msgio.c +SRCS+= nvlist.c +SRCS+= nvpair.c + +INCS= dnv.h +INCS+= nv.h + +MAN+= nv.3 + +MLINKS+=nv.3 libnv.3 \ + nv.3 nvlist.3 +MLINKS+=nv.3 nvlist_create.3 \ + nv.3 nvlist_destroy.3 \ + nv.3 nvlist_error.3 \ + nv.3 nvlist_empty.3 \ + nv.3 nvlist_clone.3 \ + nv.3 nvlist_dump.3 \ + nv.3 nvlist_fdump.3 \ + nv.3 nvlist_size.3 \ + nv.3 nvlist_pack.3 \ + nv.3 nvlist_unpack.3 \ + nv.3 nvlist_send.3 \ + nv.3 nvlist_recv.3 \ + nv.3 nvlist_xfer.3 \ + nv.3 nvlist_next.3 \ + nv.3 nvlist_exists.3 \ + nv.3 nvlist_exists_type.3 \ + nv.3 nvlist_exists_null.3 \ + nv.3 nvlist_exists_bool.3 \ + nv.3 nvlist_exists_number.3 \ + nv.3 nvlist_exists_string.3 \ + nv.3 nvlist_exists_nvlist.3 \ + nv.3 nvlist_exists_descriptor.3 \ + nv.3 nvlist_exists_binary.3 \ + nv.3 nvlist_add_null.3 \ + nv.3 nvlist_add_bool.3 \ + nv.3 nvlist_add_number.3 \ + nv.3 nvlist_add_string.3 \ + nv.3 nvlist_add_stringf.3 \ + nv.3 nvlist_add_stringv.3 \ + nv.3 nvlist_add_nvlist.3 \ + nv.3 nvlist_add_descriptor.3 \ + nv.3 nvlist_add_binary.3 \ + nv.3 nvlist_move_string.3 \ + nv.3 nvlist_move_nvlist.3 \ + nv.3 nvlist_move_descriptor.3 \ + nv.3 nvlist_move_binary.3 \ + nv.3 nvlist_get_bool.3 \ + nv.3 nvlist_get_number.3 \ + nv.3 nvlist_get_string.3 \ + nv.3 nvlist_get_nvlist.3 \ + nv.3 nvlist_get_descriptor.3 \ + nv.3 nvlist_get_binary.3 \ + nv.3 nvlist_take_bool.3 \ + nv.3 nvlist_take_number.3 \ + nv.3 nvlist_take_string.3 \ + nv.3 nvlist_take_nvlist.3 \ + nv.3 nvlist_take_descriptor.3 \ + nv.3 nvlist_take_binary.3 \ + nv.3 nvlist_free.3 \ + nv.3 nvlist_free_type.3 \ + nv.3 nvlist_free_null.3 \ + nv.3 nvlist_free_bool.3 \ + nv.3 nvlist_free_number.3 \ + nv.3 nvlist_free_string.3 \ + nv.3 nvlist_free_nvlist.3 \ + nv.3 nvlist_free_descriptor.3 \ + nv.3 nvlist_free_binary.3 +MLINKS+=nv.3 nvlist_existsf.3 \ + nv.3 nvlist_existsf_type.3 \ + nv.3 nvlist_existsf_null.3 \ + nv.3 nvlist_existsf_bool.3 \ + nv.3 nvlist_existsf_number.3 \ + nv.3 nvlist_existsf_string.3 \ + nv.3 nvlist_existsf_nvlist.3 \ + nv.3 nvlist_existsf_descriptor.3 \ + nv.3 nvlist_existsf_binary.3 \ + nv.3 nvlist_addf_null.3 \ + nv.3 nvlist_addf_bool.3 \ + nv.3 nvlist_addf_number.3 \ + nv.3 nvlist_addf_string.3 \ + nv.3 nvlist_addf_nvlist.3 \ + nv.3 nvlist_addf_descriptor.3 \ + nv.3 nvlist_addf_binary.3 \ + nv.3 nvlist_movef_string.3 \ + nv.3 nvlist_movef_nvlist.3 \ + nv.3 nvlist_movef_descriptor.3 \ + nv.3 nvlist_movef_binary.3 \ + nv.3 nvlist_getf_bool.3 \ + nv.3 nvlist_getf_number.3 \ + nv.3 nvlist_getf_string.3 \ + nv.3 nvlist_getf_nvlist.3 \ + nv.3 nvlist_getf_descriptor.3 \ + nv.3 nvlist_getf_binary.3 \ + nv.3 nvlist_takef_bool.3 \ + nv.3 nvlist_takef_number.3 \ + nv.3 nvlist_takef_string.3 \ + nv.3 nvlist_takef_nvlist.3 \ + nv.3 nvlist_takef_descriptor.3 \ + nv.3 nvlist_takef_binary.3 \ + nv.3 nvlist_freef.3 \ + nv.3 nvlist_freef_type.3 \ + nv.3 nvlist_freef_null.3 \ + nv.3 nvlist_freef_bool.3 \ + nv.3 nvlist_freef_number.3 \ + nv.3 nvlist_freef_string.3 \ + nv.3 nvlist_freef_nvlist.3 \ + nv.3 nvlist_freef_descriptor.3 \ + nv.3 nvlist_freef_binary.3 +MLINKS+=nv.3 nvlist_existsv.3 \ + nv.3 nvlist_existsv_type.3 \ + nv.3 nvlist_existsv_null.3 \ + nv.3 nvlist_existsv_bool.3 \ + nv.3 nvlist_existsv_number.3 \ + nv.3 nvlist_existsv_string.3 \ + nv.3 nvlist_existsv_nvlist.3 \ + nv.3 nvlist_existsv_descriptor.3 \ + nv.3 nvlist_existsv_binary.3 \ + nv.3 nvlist_addv_null.3 \ + nv.3 nvlist_addv_bool.3 \ + nv.3 nvlist_addv_number.3 \ + nv.3 nvlist_addv_string.3 \ + nv.3 nvlist_addv_nvlist.3 \ + nv.3 nvlist_addv_descriptor.3 \ + nv.3 nvlist_addv_binary.3 \ + nv.3 nvlist_movev_string.3 \ + nv.3 nvlist_movev_nvlist.3 \ + nv.3 nvlist_movev_descriptor.3 \ + nv.3 nvlist_movev_binary.3 \ + nv.3 nvlist_getv_bool.3 \ + nv.3 nvlist_getv_number.3 \ + nv.3 nvlist_getv_string.3 \ + nv.3 nvlist_getv_nvlist.3 \ + nv.3 nvlist_getv_descriptor.3 \ + nv.3 nvlist_getv_binary.3 \ + nv.3 nvlist_takev_bool.3 \ + nv.3 nvlist_takev_number.3 \ + nv.3 nvlist_takev_string.3 \ + nv.3 nvlist_takev_nvlist.3 \ + nv.3 nvlist_takev_descriptor.3 \ + nv.3 nvlist_takev_binary.3 \ + nv.3 nvlist_freef.3 \ + nv.3 nvlist_freev_type.3 \ + nv.3 nvlist_freev_null.3 \ + nv.3 nvlist_freev_bool.3 \ + nv.3 nvlist_freev_number.3 \ + nv.3 nvlist_freev_string.3 \ + nv.3 nvlist_freev_nvlist.3 \ + nv.3 nvlist_freev_descriptor.3 \ + nv.3 nvlist_freev_binary.3 + +WARNS?= 6 + +.include diff --git a/lib/libnv/common_impl.h b/lib/libnv/common_impl.h new file mode 100644 index 0000000..04bd453 --- /dev/null +++ b/lib/libnv/common_impl.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/common_impl.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _COMMON_IMPL_H_ +#define _COMMON_IMPL_H_ + +#define fd_is_valid(fd) (fcntl((fd), F_GETFL) != -1 || errno != EBADF) + +#endif /* !_COMMON_IMPL_H_ */ diff --git a/lib/libnv/dnv.h b/lib/libnv/dnv.h new file mode 100644 index 0000000..c4ba65b --- /dev/null +++ b/lib/libnv/dnv.h @@ -0,0 +1,106 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/dnv.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _DNV_H_ +#define _DNV_H_ + +#include + +#include +#include +#include + +#ifndef _NVLIST_T_DECLARED +#define _NVLIST_T_DECLARED +struct nvlist; + +typedef struct nvlist nvlist_t; +#endif + +/* + * The dnvlist_get functions returns value associated with the given name. + * If it returns a pointer, the pointer represents internal buffer and should + * not be freed by the caller. + * If no element of the given name and type exists, the function will return + * provided default value. + */ + +bool dnvlist_get_bool(const nvlist_t *nvl, const char *name, bool defval); +uint64_t dnvlist_get_number(const nvlist_t *nvl, const char *name, uint64_t defval); +const char *dnvlist_get_string(const nvlist_t *nvl, const char *name, const char *defval); +const nvlist_t *dnvlist_get_nvlist(const nvlist_t *nvl, const char *name, const nvlist_t *defval); +int dnvlist_get_descriptor(const nvlist_t *nvl, const char *name, int defval); +const void *dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep, const void *defval, size_t defsize); + +bool dnvlist_getf_bool(const nvlist_t *nvl, bool defval, const char *namefmt, ...) __printflike(3, 4); +uint64_t dnvlist_getf_number(const nvlist_t *nvl, uint64_t defval, const char *namefmt, ...) __printflike(3, 4); +const char *dnvlist_getf_string(const nvlist_t *nvl, const char *defval, const char *namefmt, ...) __printflike(3, 4); +const nvlist_t *dnvlist_getf_nvlist(const nvlist_t *nvl, const nvlist_t *defval, const char *namefmt, ...) __printflike(3, 4); +int dnvlist_getf_descriptor(const nvlist_t *nvl, int defval, const char *namefmt, ...) __printflike(3, 4); +const void *dnvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, size_t defsize, const char *namefmt, ...) __printflike(5, 6); + +bool dnvlist_getv_bool(const nvlist_t *nvl, bool defval, const char *namefmt, va_list nameap) __printflike(3, 0); +uint64_t dnvlist_getv_number(const nvlist_t *nvl, uint64_t defval, const char *namefmt, va_list nameap) __printflike(3, 0); +const char *dnvlist_getv_string(const nvlist_t *nvl, const char *defval, const char *namefmt, va_list nameap) __printflike(3, 0); +const nvlist_t *dnvlist_getv_nvlist(const nvlist_t *nvl, const nvlist_t *defval, const char *namefmt, va_list nameap) __printflike(3, 0); +int dnvlist_getv_descriptor(const nvlist_t *nvl, int defval, const char *namefmt, va_list nameap) __printflike(3, 0); +const void *dnvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, size_t defsize, const char *namefmt, va_list nameap) __printflike(5, 0); + +/* + * The dnvlist_take functions returns value associated with the given name and + * remove corresponding nvpair. + * If it returns a pointer, the caller has to free it. + * If no element of the given name and type exists, the function will return + * provided default value. + */ + +bool dnvlist_take_bool(nvlist_t *nvl, const char *name, bool defval); +uint64_t dnvlist_take_number(nvlist_t *nvl, const char *name, uint64_t defval); +char *dnvlist_take_string(nvlist_t *nvl, const char *name, char *defval); +nvlist_t *dnvlist_take_nvlist(nvlist_t *nvl, const char *name, nvlist_t *defval); +int dnvlist_take_descriptor(nvlist_t *nvl, const char *name, int defval); +void *dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep, void *defval, size_t defsize); + +bool dnvlist_takef_bool(nvlist_t *nvl, bool defval, const char *namefmt, ...) __printflike(3, 4); +uint64_t dnvlist_takef_number(nvlist_t *nvl, uint64_t defval, const char *namefmt, ...) __printflike(3, 4); +char *dnvlist_takef_string(nvlist_t *nvl, char *defval, const char *namefmt, ...) __printflike(3, 4); +nvlist_t *dnvlist_takef_nvlist(nvlist_t *nvl, nvlist_t *defval, const char *namefmt, ...) __printflike(3, 4); +int dnvlist_takef_descriptor(nvlist_t *nvl, int defval, const char *namefmt, ...) __printflike(3, 4); +void *dnvlist_takef_binary(nvlist_t *nvl, size_t *sizep, void *defval, size_t defsize, const char *namefmt, ...) __printflike(5, 6); + +bool dnvlist_takev_bool(nvlist_t *nvl, bool defval, const char *namefmt, va_list nameap) __printflike(3, 0); +uint64_t dnvlist_takev_number(nvlist_t *nvl, uint64_t defval, const char *namefmt, va_list nameap) __printflike(3, 0); +char *dnvlist_takev_string(nvlist_t *nvl, char *defval, const char *namefmt, va_list nameap) __printflike(3, 0); +nvlist_t *dnvlist_takev_nvlist(nvlist_t *nvl, nvlist_t *defval, const char *namefmt, va_list nameap) __printflike(3, 0); +int dnvlist_takev_descriptor(nvlist_t *nvl, int defval, const char *namefmt, va_list nameap) __printflike(3, 0); +void *dnvlist_takev_binary(nvlist_t *nvl, size_t *sizep, void *defval, size_t defsize, const char *namefmt, va_list nameap) __printflike(5, 0); + +#endif /* !_DNV_H_ */ diff --git a/lib/libnv/dnvlist.c b/lib/libnv/dnvlist.c new file mode 100644 index 0000000..97d02ee --- /dev/null +++ b/lib/libnv/dnvlist.c @@ -0,0 +1,252 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/lib/libnv/dnvlist.c 258065 2013-11-12 19:39:14Z pjd $"); + +#include +#include +#include + +#include "nv.h" +#include "nv_impl.h" + +#include "dnv.h" + +#define DNVLIST_GET(ftype, type) \ +ftype \ +dnvlist_get_##type(const nvlist_t *nvl, const char *name, ftype defval) \ +{ \ + \ + return (dnvlist_getf_##type(nvl, defval, "%s", name)); \ +} + +DNVLIST_GET(bool, bool) +DNVLIST_GET(uint64_t, number) +DNVLIST_GET(const char *, string) +DNVLIST_GET(const nvlist_t *, nvlist) +DNVLIST_GET(int, descriptor) + +#undef DNVLIST_GET + +const void * +dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep, + const void *defval, size_t defsize) +{ + + return (dnvlist_getf_binary(nvl, sizep, defval, defsize, "%s", name)); +} + +#define DNVLIST_GETF(ftype, type) \ +ftype \ +dnvlist_getf_##type(const nvlist_t *nvl, ftype defval, \ + const char *namefmt, ...) \ +{ \ + va_list nameap; \ + ftype value; \ + \ + va_start(nameap, namefmt); \ + value = dnvlist_getv_##type(nvl, defval, namefmt, nameap); \ + va_end(nameap); \ + \ + return (value); \ +} + +DNVLIST_GETF(bool, bool) +DNVLIST_GETF(uint64_t, number) +DNVLIST_GETF(const char *, string) +DNVLIST_GETF(const nvlist_t *, nvlist) +DNVLIST_GETF(int, descriptor) + +#undef DNVLIST_GETF + +const void * +dnvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, + size_t defsize, const char *namefmt, ...) +{ + va_list nameap; + const void *value; + + va_start(nameap, namefmt); + value = dnvlist_getv_binary(nvl, sizep, defval, defsize, namefmt, + nameap); + va_end(nameap); + + return (value); +} + +#define DNVLIST_GETV(ftype, type) \ +ftype \ +dnvlist_getv_##type(const nvlist_t *nvl, ftype defval, \ + const char *namefmt, va_list nameap) \ +{ \ + va_list cnameap; \ + ftype value; \ + \ + va_copy(cnameap, nameap); \ + if (nvlist_existsv_##type(nvl, namefmt, cnameap)) \ + value = nvlist_getv_##type(nvl, namefmt, nameap); \ + else \ + value = defval; \ + va_end(cnameap); \ + return (value); \ +} + +DNVLIST_GETV(bool, bool) +DNVLIST_GETV(uint64_t, number) +DNVLIST_GETV(const char *, string) +DNVLIST_GETV(const nvlist_t *, nvlist) +DNVLIST_GETV(int, descriptor) + +#undef DNVLIST_GETV + +const void * +dnvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const void *defval, + size_t defsize, const char *namefmt, va_list nameap) +{ + va_list cnameap; + const void *value; + + va_copy(cnameap, nameap); + if (nvlist_existsv_binary(nvl, namefmt, cnameap)) { + value = nvlist_getv_binary(nvl, sizep, namefmt, nameap); + } else { + if (sizep != NULL) + *sizep = defsize; + value = defval; + } + va_end(cnameap); + return (value); +} + +#define DNVLIST_TAKE(ftype, type) \ +ftype \ +dnvlist_take_##type(nvlist_t *nvl, const char *name, ftype defval) \ +{ \ + \ + return (dnvlist_takef_##type(nvl, defval, "%s", name)); \ +} + +DNVLIST_TAKE(bool, bool) +DNVLIST_TAKE(uint64_t, number) +DNVLIST_TAKE(char *, string) +DNVLIST_TAKE(nvlist_t *, nvlist) +DNVLIST_TAKE(int, descriptor) + +#undef DNVLIST_TAKE + +void * +dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep, + void *defval, size_t defsize) +{ + + return (dnvlist_takef_binary(nvl, sizep, defval, defsize, "%s", name)); +} + +#define DNVLIST_TAKEF(ftype, type) \ +ftype \ +dnvlist_takef_##type(nvlist_t *nvl, ftype defval, \ + const char *namefmt, ...) \ +{ \ + va_list nameap; \ + ftype value; \ + \ + va_start(nameap, namefmt); \ + value = dnvlist_takev_##type(nvl, defval, namefmt, nameap); \ + va_end(nameap); \ + \ + return (value); \ +} + +DNVLIST_TAKEF(bool, bool) +DNVLIST_TAKEF(uint64_t, number) +DNVLIST_TAKEF(char *, string) +DNVLIST_TAKEF(nvlist_t *, nvlist) +DNVLIST_TAKEF(int, descriptor) + +#undef DNVLIST_TAKEF + +void * +dnvlist_takef_binary(nvlist_t *nvl, size_t *sizep, void *defval, + size_t defsize, const char *namefmt, ...) +{ + va_list nameap; + void *value; + + va_start(nameap, namefmt); + value = dnvlist_takev_binary(nvl, sizep, defval, defsize, namefmt, + nameap); + va_end(nameap); + + return (value); +} + +#define DNVLIST_TAKEV(ftype, type) \ +ftype \ +dnvlist_takev_##type(nvlist_t *nvl, ftype defval, const char *namefmt, \ + va_list nameap) \ +{ \ + va_list cnameap; \ + ftype value; \ + \ + va_copy(cnameap, nameap); \ + if (nvlist_existsv_##type(nvl, namefmt, cnameap)) \ + value = nvlist_takev_##type(nvl, namefmt, nameap); \ + else \ + value = defval; \ + va_end(cnameap); \ + return (value); \ +} + +DNVLIST_TAKEV(bool, bool) +DNVLIST_TAKEV(uint64_t, number) +DNVLIST_TAKEV(char *, string) +DNVLIST_TAKEV(nvlist_t *, nvlist) +DNVLIST_TAKEV(int, descriptor) + +#undef DNVLIST_TAKEV + +void * +dnvlist_takev_binary(nvlist_t *nvl, size_t *sizep, void *defval, + size_t defsize, const char *namefmt, va_list nameap) +{ + va_list cnameap; + void *value; + + va_copy(cnameap, nameap); + if (nvlist_existsv_binary(nvl, namefmt, cnameap)) { + value = nvlist_takev_binary(nvl, sizep, namefmt, nameap); + } else { + if (sizep != NULL) + *sizep = defsize; + value = defval; + } + va_end(cnameap); + return (value); +} diff --git a/lib/libnv/msgio.c b/lib/libnv/msgio.c new file mode 100644 index 0000000..f896b16 --- /dev/null +++ b/lib/libnv/msgio.c @@ -0,0 +1,390 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * Copyright (c) 2013 Mariusz Zaborski + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/lib/libnv/msgio.c 261408 2014-02-02 19:06:00Z pjd $"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_PJDLOG +#include +#endif + +#include "common_impl.h" +#include "msgio.h" + +#ifndef HAVE_PJDLOG +#include +#define PJDLOG_ASSERT(...) assert(__VA_ARGS__) +#define PJDLOG_RASSERT(expr, ...) assert(expr) +#define PJDLOG_ABORT(...) abort() +#endif + +static int +msghdr_add_fd(struct cmsghdr *cmsg, int fd) +{ + + PJDLOG_ASSERT(fd >= 0); + + if (!fd_is_valid(fd)) { + errno = EBADF; + return (-1); + } + + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd)); + + return (0); +} + +static int +msghdr_get_fd(struct cmsghdr *cmsg) +{ + int fd; + + if (cmsg == NULL || cmsg->cmsg_level != SOL_SOCKET || + cmsg->cmsg_type != SCM_RIGHTS || + cmsg->cmsg_len != CMSG_LEN(sizeof(fd))) { + errno = EINVAL; + return (-1); + } + + bcopy(CMSG_DATA(cmsg), &fd, sizeof(fd)); +#ifndef MSG_CMSG_CLOEXEC + /* + * If the MSG_CMSG_CLOEXEC flag is not available we cannot set the + * close-on-exec flag atomically, but we still want to set it for + * consistency. + */ + (void) fcntl(fd, F_SETFD, FD_CLOEXEC); +#endif + + return (fd); +} + +static void +fd_wait(int fd, bool doread) +{ + fd_set fds; + + PJDLOG_ASSERT(fd >= 0); + + FD_ZERO(&fds); + FD_SET(fd, &fds); + (void)select(fd + 1, doread ? &fds : NULL, doread ? NULL : &fds, + NULL, NULL); +} + +static int +msg_recv(int sock, struct msghdr *msg) +{ + int flags; + + PJDLOG_ASSERT(sock >= 0); + +#ifdef MSG_CMSG_CLOEXEC + flags = MSG_CMSG_CLOEXEC; +#else + flags = 0; +#endif + + for (;;) { + fd_wait(sock, true); + if (recvmsg(sock, msg, flags) == -1) { + if (errno == EINTR) + continue; + return (-1); + } + break; + } + + return (0); +} + +static int +msg_send(int sock, const struct msghdr *msg) +{ + + PJDLOG_ASSERT(sock >= 0); + + for (;;) { + fd_wait(sock, false); + if (sendmsg(sock, msg, 0) == -1) { + if (errno == EINTR) + continue; + return (-1); + } + break; + } + + return (0); +} + +int +cred_send(int sock) +{ + unsigned char credbuf[CMSG_SPACE(sizeof(struct cmsgcred))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + uint8_t dummy; + + bzero(credbuf, sizeof(credbuf)); + bzero(&msg, sizeof(msg)); + bzero(&iov, sizeof(iov)); + + /* + * XXX: We send one byte along with the control message, because + * setting msg_iov to NULL only works if this is the first + * packet send over the socket. Once we send some data we + * won't be able to send credentials anymore. This is most + * likely a kernel bug. + */ + dummy = 0; + iov.iov_base = &dummy; + iov.iov_len = sizeof(dummy); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = credbuf; + msg.msg_controllen = sizeof(credbuf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(sizeof(struct cmsgcred)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_CREDS; + + if (msg_send(sock, &msg) == -1) + return (-1); + + return (0); +} + +int +cred_recv(int sock, struct cmsgcred *cred) +{ + unsigned char credbuf[CMSG_SPACE(sizeof(struct cmsgcred))]; + struct msghdr msg; + struct cmsghdr *cmsg; + struct iovec iov; + uint8_t dummy; + + bzero(credbuf, sizeof(credbuf)); + bzero(&msg, sizeof(msg)); + bzero(&iov, sizeof(iov)); + + iov.iov_base = &dummy; + iov.iov_len = sizeof(dummy); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = credbuf; + msg.msg_controllen = sizeof(credbuf); + + if (msg_recv(sock, &msg) == -1) + return (-1); + + cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || + cmsg->cmsg_len != CMSG_LEN(sizeof(struct cmsgcred)) || + cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_CREDS) { + errno = EINVAL; + return (-1); + } + bcopy(CMSG_DATA(cmsg), cred, sizeof(*cred)); + + return (0); +} + +int +fd_send(int sock, const int *fds, size_t nfds) +{ + struct msghdr msg; + struct cmsghdr *cmsg; + unsigned int i; + int serrno, ret; + + if (nfds == 0 || fds == NULL) { + errno = EINVAL; + return (-1); + } + + bzero(&msg, sizeof(msg)); + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_controllen = nfds * CMSG_SPACE(sizeof(int)); + msg.msg_control = calloc(1, msg.msg_controllen); + if (msg.msg_control == NULL) + return (-1); + + ret = -1; + + for (i = 0, cmsg = CMSG_FIRSTHDR(&msg); i < nfds && cmsg != NULL; + i++, cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (msghdr_add_fd(cmsg, fds[i]) == -1) + goto end; + } + + if (msg_send(sock, &msg) == -1) + goto end; + + ret = 0; +end: + serrno = errno; + free(msg.msg_control); + errno = serrno; + return (ret); +} + +int +fd_recv(int sock, int *fds, size_t nfds) +{ + struct msghdr msg; + struct cmsghdr *cmsg; + unsigned int i; + int serrno, ret; + + if (nfds == 0 || fds == NULL) { + errno = EINVAL; + return (-1); + } + + bzero(&msg, sizeof(msg)); + msg.msg_iov = NULL; + msg.msg_iovlen = 0; + msg.msg_controllen = nfds * CMSG_SPACE(sizeof(int)); + msg.msg_control = calloc(1, msg.msg_controllen); + if (msg.msg_control == NULL) + return (-1); + + ret = -1; + + if (msg_recv(sock, &msg) == -1) + goto end; + + for (i = 0, cmsg = CMSG_FIRSTHDR(&msg); i < nfds && cmsg != NULL; + i++, cmsg = CMSG_NXTHDR(&msg, cmsg)) { + fds[i] = msghdr_get_fd(cmsg); + if (fds[i] < 0) + break; + } + + if (cmsg != NULL || i < nfds) { + int fd; + + /* + * We need to close all received descriptors, even if we have + * different control message (eg. SCM_CREDS) in between. + */ + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + fd = msghdr_get_fd(cmsg); + if (fd >= 0) + close(fd); + } + errno = EINVAL; + goto end; + } + + ret = 0; +end: + serrno = errno; + free(msg.msg_control); + errno = serrno; + return (ret); +} + +int +buf_send(int sock, void *buf, size_t size) +{ + ssize_t done; + unsigned char *ptr; + + PJDLOG_ASSERT(sock >= 0); + PJDLOG_ASSERT(size > 0); + PJDLOG_ASSERT(buf != NULL); + + ptr = buf; + do { + fd_wait(sock, false); + done = send(sock, ptr, size, 0); + if (done == -1) { + if (errno == EINTR) + continue; + return (-1); + } else if (done == 0) { + errno = ENOTCONN; + return (-1); + } + size -= done; + ptr += done; + } while (size > 0); + + return (0); +} + +int +buf_recv(int sock, void *buf, size_t size) +{ + ssize_t done; + unsigned char *ptr; + + PJDLOG_ASSERT(sock >= 0); + PJDLOG_ASSERT(buf != NULL); + + ptr = buf; + while (size > 0) { + fd_wait(sock, true); + done = recv(sock, ptr, size, 0); + if (done == -1) { + if (errno == EINTR) + continue; + return (-1); + } else if (done == 0) { + errno = ENOTCONN; + return (-1); + } + size -= done; + ptr += done; + } + + return (0); +} diff --git a/lib/libnv/msgio.h b/lib/libnv/msgio.h new file mode 100644 index 0000000..8d888d4 --- /dev/null +++ b/lib/libnv/msgio.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Copyright (c) 2013 Mariusz Zaborski + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/msgio.h 259430 2013-12-15 22:58:09Z pjd $ + */ + +#ifndef _MSGIO_H_ +#define _MSGIO_H_ + +struct cmsgcred; +struct iovec; +struct msghdr; + +int cred_send(int sock); +int cred_recv(int sock, struct cmsgcred *cred); + +int fd_send(int sock, const int *fds, size_t nfds); +int fd_recv(int sock, int *fds, size_t nfds); + +int buf_send(int sock, void *buf, size_t size); +int buf_recv(int sock, void *buf, size_t size); + +#endif /* !_MSGIO_H_ */ diff --git a/lib/libnv/nv.3 b/lib/libnv/nv.3 new file mode 100644 index 0000000..c24afd1 --- /dev/null +++ b/lib/libnv/nv.3 @@ -0,0 +1,604 @@ +.\" +.\" Copyright (c) 2013 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: head/lib/libnv/nv.3 263479 2014-03-21 15:30:31Z bdrewery $ +.\" +.Dd March 21, 2014 +.Dt NV 3 +.Os +.Sh NAME +.Nm nvlist_create , +.Nm nvlist_destroy , +.Nm nvlist_error , +.Nm nvlist_empty , +.Nm nvlist_exists , +.Nm nvlist_free , +.Nm nvlist_clone , +.Nm nvlist_dump , +.Nm nvlist_fdump , +.Nm nvlist_size , +.Nm nvlist_pack , +.Nm nvlist_unpack , +.Nm nvlist_send , +.Nm nvlist_recv , +.Nm nvlist_xfer , +.Nm nvlist_next , +.Nm nvlist_add , +.Nm nvlist_move , +.Nm nvlist_get , +.Nm nvlist_take +.Nd "library for name/value pairs" +.Sh LIBRARY +.Lb libnv +.Sh SYNOPSIS +.In nv.h +.Ft "nvlist_t *" +.Fn nvlist_create "int flags" +.Ft void +.Fn nvlist_destroy "nvlist_t *nvl" +.Ft int +.Fn nvlist_error "const nvlist_t *nvl" +.Ft bool +.Fn nvlist_empty "const nvlist_t *nvl" +.\" +.Ft "nvlist_t *" +.Fn nvlist_clone "const nvlist_t *nvl" +.\" +.Ft void +.Fn nvlist_dump "const nvlist_t *nvl, int fd" +.Ft void +.Fn nvlist_fdump "const nvlist_t *nvl, FILE *fp" +.\" +.Ft size_t +.Fn nvlist_size "const nvlist_t *nvl" +.Ft "void *" +.Fn nvlist_pack "const nvlist_t *nvl" "size_t *sizep" +.Ft "nvlist_t *" +.Fn nvlist_unpack "const void *buf" "size_t size" +.\" +.Ft int +.Fn nvlist_send "int sock" "const nvlist_t *nvl" +.Ft "nvlist_t *" +.Fn nvlist_recv "int sock" +.Ft "nvlist_t *" +.Fn nvlist_xfer "int sock" "nvlist_t *nvl" +.\" +.Ft "const char *" +.Fn nvlist_next "const nvlist_t *nvl" "int *typep" "void **cookiep" +.\" +.Ft bool +.Fn nvlist_exists "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_type "const nvlist_t *nvl" "const char *name" "int type" +.Ft bool +.Fn nvlist_exists_null "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_bool "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_number "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_string "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_nvlist "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_descriptor "const nvlist_t *nvl" "const char *name" +.Ft bool +.Fn nvlist_exists_binary "const nvlist_t *nvl" "const char *name" +.\" +.Ft void +.Fn nvlist_add_null "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_add_bool "nvlist_t *nvl" "const char *name" "bool value" +.Ft void +.Fn nvlist_add_number "nvlist_t *nvl" "const char *name" "uint64_t value" +.Ft void +.Fn nvlist_add_string "nvlist_t *nvl" "const char *name" "const char *value" +.Ft void +.Fn nvlist_add_stringf "nvlist_t *nvl" "const char *name" "const char *valuefmt" "..." +.Ft void +.Fn nvlist_add_stringv "nvlist_t *nvl" "const char *name" "const char *valuefmt" "va_list valueap" +.Ft void +.Fn nvlist_add_nvlist "nvlist_t *nvl" "const char *name" "const nvlist_t *value" +.Ft void +.Fn nvlist_add_descriptor "nvlist_t *nvl" "const char *name" "int value" +.Ft void +.Fn nvlist_add_binary "nvlist_t *nvl" "const char *name" "const void *value" "size_t size" +.\" +.Ft void +.Fn nvlist_move_string "nvlist_t *nvl" "const char *name" "char *value" +.Ft void +.Fn nvlist_move_nvlist "nvlist_t *nvl" "const char *name" "nvlist_t *value" +.Ft void +.Fn nvlist_move_descriptor "nvlist_t *nvl" "const char *name" "int value" +.Ft void +.Fn nvlist_move_binary "nvlist_t *nvl" "const char *name" "void *value" "size_t size" +.\" +.Ft bool +.Fn nvlist_get_bool "const nvlist_t *nvl" "const char *name" +.Ft uint64_t +.Fn nvlist_get_number "const nvlist_t *nvl" "const char *name" +.Ft "const char *" +.Fn nvlist_get_string "const nvlist_t *nvl" "const char *name" +.Ft "const nvlist_t *" +.Fn nvlist_get_nvlist "const nvlist_t *nvl" "const char *name" +.Ft int +.Fn nvlist_get_descriptor "const nvlist_t *nvl" "const char *name" +.Ft "const void *" +.Fn nvlist_get_binary "const nvlist_t *nvl" "const char *name" "size_t *sizep" +.\" +.Ft bool +.Fn nvlist_take_bool "nvlist_t *nvl" "const char *name" +.Ft uint64_t +.Fn nvlist_take_number "nvlist_t *nvl" "const char *name" +.Ft "char *" +.Fn nvlist_take_string "nvlist_t *nvl" "const char *name" +.Ft "nvlist_t *" +.Fn nvlist_take_nvlist "nvlist_t *nvl" "const char *name" +.Ft int +.Fn nvlist_take_descriptor "nvlist_t *nvl" "const char *name" +.Ft "void *" +.Fn nvlist_take_binary "nvlist_t *nvl" "const char *name" "size_t *sizep" +.\" +.Ft void +.Fn nvlist_free "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_type "nvlist_t *nvl" "const char *name" "int type" +.\" +.Ft void +.Fn nvlist_free_null "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_bool "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_number "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_string "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_nvlist "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_descriptor "nvlist_t *nvl" "const char *name" +.Ft void +.Fn nvlist_free_binary "nvlist_t *nvl" "const char *name" +.Sh DESCRIPTION +The +.Nm libnv +library allows to easily manage name value pairs as well as send and receive +them over sockets. +A group (list) of name value pairs is called an +.Nm nvlist . +The API supports the following data types: +.Bl -ohang -offset indent +.It Sy null ( NV_TYPE_NULL ) +There is no data associated with the name. +.It Sy bool ( NV_TYPE_BOLL ) +The value can be either +.Dv true +or +.Dv false . +.It Sy number ( NV_TYPE_NUMBER ) +The value is a number stored as +.Vt uint64_t . +.It Sy string ( NV_TYPE_STRING ) +The value is a C string. +.It Sy nvlist ( NV_TYPE_NVLIST ) +The value is a nested nvlist. +.It Sy descriptor ( NV_TYPE_DESCRIPTOR ) +The value is a file descriptor. +Note that file descriptors can be sent only over +.Xr unix 4 +domain sockets. +.It Sy binary ( NV_TYPE_BINARY ) +The value is a binary buffer. +.El +.Pp +The +.Fn nvlist_create +function allocates memory and initializes an nvlist. +.Pp +The following flag can be provided: +.Pp +.Bl -tag -width "NV_FLAG_IGNORE_CASE" -compact -offset indent +.It Dv NV_FLAG_IGNORE_CASE +Perform case-insensitive lookups of provided names. +.El +.Pp +The +.Fn nvlist_destroy +function destroys the given nvlist. +Function does nothing if +.Dv NULL +nvlist is provided. +Function never modifies the +.Va errno +global variable. +.Pp +The +.Fn nvlist_error +function returns any error value that the nvlist accumulated. +If the given nvlist is +.Dv NULL +the +.Er ENOMEM +error will be returned. +.Pp +The +.Fn nvlist_empty +functions returns +.Dv true +if the given nvlist is empty and +.Dv false +otherwise. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_clone +functions clones the given nvlist. +The clone shares no resources with its origin. +This also means that all file descriptors that are part of the nvlist will be +duplicated with the +.Xr dup 2 +system call before placing them in the clone. +.Pp +The +.Fn nvlist_dump +dumps nvlist content for debugging purposes to the given file descriptor +.Fa fd . +.Pp +The +.Fn nvlist_fdump +dumps nvlist content for debugging purposes to the given file stream +.Fa fp . +.Pp +The +.Fn nvlist_size +function returns the size of the given nvlist after converting it to binary +buffer with the +.Fn nvlist_pack +function. +.Pp +The +.Fn nvlist_pack +function converts the given nvlist to a binary buffer. +The function allocates memory for the buffer, which should be freed with the +.Xr free 3 +function. +If the +.Fa sizep +argument is not +.Dv NULL , +the size of the buffer will be stored there. +The function returns +.Dv NULL +in case of an error (allocation failure). +If the nvlist contains any file descriptors +.Dv NULL +will be returned. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_unpack +function converts the given buffer to the nvlist. +The function returns +.Dv NULL +in case of an error. +.Pp +The +.Fn nvlist_send +function sends the given nvlist over the socket given by the +.Fa sock +argument. +Note that nvlist that contains file descriptors can only be send over +.Xr unix 4 +domain sockets. +.Pp +The +.Fn nvlist_recv +function receives nvlist over the socket given by the +.Fa sock +argument. +.Pp +The +.Fn nvlist_xfer +function sends the given nvlist over the socket given by the +.Fa sock +argument and receives nvlist over the same socket. +The given nvlist is always destroyed. +.Pp +The +.Fn nvlist_next +function iterates over the given nvlist returning names and types of subsequent +elements. +The +.Fa cookiep +argument allows the function to figure out which element should be returned +next. +The +.Va *cookiep +should be set to +.Dv NULL +for the first call and should not be changed later. +Returning +.Dv NULL +means there are no more elements on the nvlist. +The +.Fa typep +argument can be NULL. +Elements may not be removed from the nvlist while traversing it. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_exists +function returns +.Dv true +if element of the given name exists (besides of its type) or +.Dv false +otherwise. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_exists_type +function returns +.Dv true +if element of the given name and the given type exists or +.Dv false +otherwise. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_exists_null , +.Fn nvlist_exists_bool , +.Fn nvlist_exists_number , +.Fn nvlist_exists_string , +.Fn nvlist_exists_nvlist , +.Fn nvlist_exists_descriptor , +.Fn nvlist_exists_binary +functions return +.Dv true +if element of the given name and the given type determined by the function name +exists or +.Dv false +otherwise. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_add_null , +.Fn nvlist_add_bool , +.Fn nvlist_add_number , +.Fn nvlist_add_string , +.Fn nvlist_add_stringf , +.Fn nvlist_add_stringv , +.Fn nvlist_add_nvlist , +.Fn nvlist_add_descriptor , +.Fn nvlist_add_binary +functions add element to the given nvlist. +When adding string or binary buffor the functions will allocate memory +and copy the data over. +When adding nvlist, the nvlist will be cloned and clone will be added. +When adding descriptor, the descriptor will be duplicated using the +.Xr dup 2 +system call and the new descriptor will be added. +If an error occurs while adding new element, internal error is set which can be +examined using the +.Fn nvlist_error +function. +.Pp +The +.Fn nvlist_move_string , +.Fn nvlist_move_nvlist , +.Fn nvlist_move_descriptor , +.Fn nvlist_move_binary +functions add new element to the given nvlist, but unlike +.Fn nvlist_add_ +functions they will consume the given resource. +If an error occurs while adding new element, the resource is destroyed and +internal error is set which can be examined using the +.Fn nvlist_error +function. +.Pp +The +.Fn nvlist_get_bool , +.Fn nvlist_get_number , +.Fn nvlist_get_string , +.Fn nvlist_get_nvlist , +.Fn nvlist_get_descriptor , +.Fn nvlist_get_binary +functions allow to obtain value of the given name. +In case of string, nvlist, descriptor or binary, returned resource should +not be modified - it still belongs to the nvlist. +If element of the given name does not exist, the program will be aborted. +To avoid that the caller should check for existence before trying to obtain +the value or use +.Xr dnvlist 3 +extension, which allows to provide default value for a missing element. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_take_bool , +.Fn nvlist_take_number , +.Fn nvlist_take_string , +.Fn nvlist_take_nvlist , +.Fn nvlist_take_descriptor , +.Fn nvlist_take_binary +functions return value associated with the given name and remove the element +from the nvlist. +In case of string and binary values, the caller is responsible for free returned +memory using the +.Xr free 3 +function. +In case of nvlist, the caller is responsible for destroying returned nvlist +using the +.Fn nvlist_destroy +function. +In case of descriptor, the caller is responsible for closing returned descriptor +using the +.Fn close 2 +system call. +If element of the given name does not exist, the program will be aborted. +To avoid that the caller should check for existence before trying to obtain +the value or use +.Xr dnvlist 3 +extension, which allows to provide default value for a missing element. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_free +function removes element of the given name from the nvlist (besides of its type) +and frees all resources associated with it. +If element of the given name does not exist, the program will be aborted. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_free_type +function removes element of the given name and the given type from the nvlist +and frees all resources associated with it. +If element of the given name and the given type does not exist, the program +will be aborted. +The nvlist must not be in error state. +.Pp +The +.Fn nvlist_free_null , +.Fn nvlist_free_bool , +.Fn nvlist_free_number , +.Fn nvlist_free_string , +.Fn nvlist_free_nvlist , +.Fn nvlist_free_descriptor , +.Fn nvlist_free_binary +functions remove element of the given name and the given type determined by the +function name from the nvlist and free all resources associated with it. +If element of the given name and the given type does not exist, the program +will be aborted. +The nvlist must not be in error state. +.Sh EXAMPLES +The following example demonstrates how to prepare an nvlist and send it over +.Xr unix 4 +domain socket. +.Bd -literal +nvlist_t *nvl; +int fd; + +fd = open("/tmp/foo", O_RDONLY); +if (fd < 0) + err(1, "open(\\"/tmp/foo\\") failed"); + +nvl = nvlist_create(0); +/* + * There is no need to check if nvlist_create() succeeded, + * as the nvlist_add_() functions can cope. + * If it failed, nvlist_send() will fail. + */ +nvlist_add_string(nvl, "filename", "/tmp/foo"); +nvlist_add_number(nvl, "flags", O_RDONLY); +/* + * We just want to send the descriptor, so we can give it + * for the nvlist to consume (that's why we use nvlist_move + * not nvlist_add). + */ +nvlist_move_descriptor(nvl, "fd", fd); +if (nvlist_send(sock, nvl) < 0) { + nvlist_destroy(nvl); + err(1, "nvlist_send() failed"); +} +nvlist_destroy(nvl); +.Ed +.Pp +Receiving nvlist and getting data: +.Bd -literal +nvlist_t *nvl; +const char *command; +char *filename; +int fd; + +nvl = nvlist_recv(sock); +if (nvl == NULL) + err(1, "nvlist_recv() failed"); + +/* For command we take pointer to nvlist's buffer. */ +command = nvlist_get_string(nvl, "command"); +/* + * For filename we remove it from the nvlist and take + * ownership of the buffer. + */ +filename = nvlist_take_string(nvl, "filename"); +/* The same for the descriptor. */ +fd = nvlist_take_descriptor(nvl, "fd"); + +printf("command=%s filename=%s fd=%d\n", command, filename, fd); + +nvlist_destroy(nvl); +free(filename); +close(fd); +/* command was freed by nvlist_destroy() */ +.Ed +.Pp +Iterating over nvlist: +.Bd -literal +nvlist_t *nvl; +const char *name; +void *cookie; +int type; + +nvl = nvlist_recv(sock); +if (nvl == NULL) + err(1, "nvlist_recv() failed"); + +cookie = NULL; +while ((name = nvlist_next(nvl, &type, &cookie)) != NULL) { + printf("%s=", name); + switch (type) { + case NV_TYPE_NUMBER: + printf("%ju", (uintmax_t)nvlist_get_number(nvl, name)); + break; + case NV_TYPE_STRING: + printf("%s", nvlist_get_string(nvl, name)); + break; + default: + printf("N/A"); + break; + } + printf("\\n"); +} +.Ed +.Sh SEE ALSO +.Xr close 2 , +.Xr dup 2 , +.Xr open 2 , +.Xr err 3 , +.Xr free 3 , +.Xr printf 3 , +.Xr unix 4 +.Sh HISTORY +The +.Nm libnv +library appeared in +.Fx 11.0 . +.Sh AUTHORS +.An -nosplit +The +.Nm libnv +library was implemented by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship from the FreeBSD Foundation. diff --git a/lib/libnv/nv.h b/lib/libnv/nv.h new file mode 100644 index 0000000..85718c5 --- /dev/null +++ b/lib/libnv/nv.h @@ -0,0 +1,273 @@ +/*- + * Copyright (c) 2009-2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/nv.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _NV_H_ +#define _NV_H_ + +#include + +#include +#include +#include +#include + +#ifndef _NVLIST_T_DECLARED +#define _NVLIST_T_DECLARED +struct nvlist; + +typedef struct nvlist nvlist_t; +#endif + +#define NV_NAME_MAX 2048 + +#define NV_TYPE_NONE 0 + +#define NV_TYPE_NULL 1 +#define NV_TYPE_BOOL 2 +#define NV_TYPE_NUMBER 3 +#define NV_TYPE_STRING 4 +#define NV_TYPE_NVLIST 5 +#define NV_TYPE_DESCRIPTOR 6 +#define NV_TYPE_BINARY 7 + +/* + * Perform case-insensitive lookups of provided names. + */ +#define NV_FLAG_IGNORE_CASE 0x01 + +nvlist_t *nvlist_create(int flags); +void nvlist_destroy(nvlist_t *nvl); +int nvlist_error(const nvlist_t *nvl); +bool nvlist_empty(const nvlist_t *nvl); + +nvlist_t *nvlist_clone(const nvlist_t *nvl); + +void nvlist_dump(const nvlist_t *nvl, int fd); +void nvlist_fdump(const nvlist_t *nvl, FILE *fp); + +size_t nvlist_size(const nvlist_t *nvl); +void *nvlist_pack(const nvlist_t *nvl, size_t *sizep); +nvlist_t *nvlist_unpack(const void *buf, size_t size); + +int nvlist_send(int sock, const nvlist_t *nvl); +nvlist_t *nvlist_recv(int sock); +nvlist_t *nvlist_xfer(int sock, nvlist_t *nvl); + +const char *nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep); + +/* + * The nvlist_exists functions check if the given name (optionally of the given + * type) exists on nvlist. + */ + +bool nvlist_exists(const nvlist_t *nvl, const char *name); +bool nvlist_exists_type(const nvlist_t *nvl, const char *name, int type); + +bool nvlist_exists_null(const nvlist_t *nvl, const char *name); +bool nvlist_exists_bool(const nvlist_t *nvl, const char *name); +bool nvlist_exists_number(const nvlist_t *nvl, const char *name); +bool nvlist_exists_string(const nvlist_t *nvl, const char *name); +bool nvlist_exists_nvlist(const nvlist_t *nvl, const char *name); +bool nvlist_exists_descriptor(const nvlist_t *nvl, const char *name); +bool nvlist_exists_binary(const nvlist_t *nvl, const char *name); + +/* + * The nvlist_add functions add the given name/value pair. + * If a pointer is provided, nvlist_add will internally allocate memory for the + * given data (in other words it won't consume provided buffer). + */ + +void nvlist_add_null(nvlist_t *nvl, const char *name); +void nvlist_add_bool(nvlist_t *nvl, const char *name, bool value); +void nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value); +void nvlist_add_string(nvlist_t *nvl, const char *name, const char *value); +void nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) __printflike(3, 4); +void nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, va_list valueap) __printflike(3, 0); +void nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value); +void nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value); +void nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, size_t size); + +/* + * The nvlist_move functions add the given name/value pair. + * The functions consumes provided buffer. + */ + +void nvlist_move_string(nvlist_t *nvl, const char *name, char *value); +void nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value); +void nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value); +void nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size); + +/* + * The nvlist_get functions returns value associated with the given name. + * If it returns a pointer, the pointer represents internal buffer and should + * not be freed by the caller. + */ + +bool nvlist_get_bool(const nvlist_t *nvl, const char *name); +uint64_t nvlist_get_number(const nvlist_t *nvl, const char *name); +const char *nvlist_get_string(const nvlist_t *nvl, const char *name); +const nvlist_t *nvlist_get_nvlist(const nvlist_t *nvl, const char *name); +int nvlist_get_descriptor(const nvlist_t *nvl, const char *name); +const void *nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep); + +/* + * The nvlist_take functions returns value associated with the given name and + * remove the given entry from the nvlist. + * The caller is responsible for freeing received data. + */ + +bool nvlist_take_bool(nvlist_t *nvl, const char *name); +uint64_t nvlist_take_number(nvlist_t *nvl, const char *name); +char *nvlist_take_string(nvlist_t *nvl, const char *name); +nvlist_t *nvlist_take_nvlist(nvlist_t *nvl, const char *name); +int nvlist_take_descriptor(nvlist_t *nvl, const char *name); +void *nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep); + +/* + * The nvlist_free functions removes the given name/value pair from the nvlist + * and frees memory associated with it. + */ + +void nvlist_free(nvlist_t *nvl, const char *name); +void nvlist_free_type(nvlist_t *nvl, const char *name, int type); + +void nvlist_free_null(nvlist_t *nvl, const char *name); +void nvlist_free_bool(nvlist_t *nvl, const char *name); +void nvlist_free_number(nvlist_t *nvl, const char *name); +void nvlist_free_string(nvlist_t *nvl, const char *name); +void nvlist_free_nvlist(nvlist_t *nvl, const char *name); +void nvlist_free_descriptor(nvlist_t *nvl, const char *name); +void nvlist_free_binary(nvlist_t *nvl, const char *name); + +/* + * Below are the same functions, but which operate on format strings and + * variable argument lists. + */ + +bool nvlist_existsf(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_type(const nvlist_t *nvl, int type, const char *namefmt, ...) __printflike(3, 4); + +bool nvlist_existsf_null(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_bool(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_number(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_string(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_nvlist(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_descriptor(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +bool nvlist_existsf_binary(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); + +bool nvlist_existsv(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_type(const nvlist_t *nvl, int type, const char *namefmt, va_list nameap) __printflike(3, 0); + +bool nvlist_existsv_null(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_bool(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_number(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_string(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_nvlist(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_descriptor(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +bool nvlist_existsv_binary(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); + +void nvlist_addf_null(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_addf_bool(nvlist_t *nvl, bool value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_addf_number(nvlist_t *nvl, uint64_t value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_addf_string(nvlist_t *nvl, const char *value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_addf_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_addf_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_addf_binary(nvlist_t *nvl, const void *value, size_t size, const char *namefmt, ...) __printflike(4, 5); + +void nvlist_addv_null(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_addv_bool(nvlist_t *nvl, bool value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_addv_number(nvlist_t *nvl, uint64_t value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_addv_string(nvlist_t *nvl, const char *value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_addv_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_addv_binary(nvlist_t *nvl, const void *value, size_t size, const char *namefmt, va_list nameap) __printflike(4, 0); + +void nvlist_movef_string(nvlist_t *nvl, char *value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_movef_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_movef_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) __printflike(3, 4); +void nvlist_movef_binary(nvlist_t *nvl, void *value, size_t size, const char *namefmt, ...) __printflike(4, 5); + +void nvlist_movev_string(nvlist_t *nvl, char *value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_movev_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_movev_descriptor(nvlist_t *nvl, int value, const char *namefmt, va_list nameap) __printflike(3, 0); +void nvlist_movev_binary(nvlist_t *nvl, void *value, size_t size, const char *namefmt, va_list nameap) __printflike(4, 0); + +bool nvlist_getf_bool(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +uint64_t nvlist_getf_number(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +const char *nvlist_getf_string(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +const nvlist_t *nvlist_getf_nvlist(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +int nvlist_getf_descriptor(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +const void *nvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) __printflike(3, 4); + +bool nvlist_getv_bool(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +uint64_t nvlist_getv_number(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +const char *nvlist_getv_string(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +const nvlist_t *nvlist_getv_nvlist(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +int nvlist_getv_descriptor(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +const void *nvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, va_list nameap) __printflike(3, 0); + +bool nvlist_takef_bool(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +uint64_t nvlist_takef_number(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +char *nvlist_takef_string(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +nvlist_t *nvlist_takef_nvlist(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +int nvlist_takef_descriptor(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void *nvlist_takef_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) __printflike(3, 4); + +bool nvlist_takev_bool(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +uint64_t nvlist_takev_number(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +char *nvlist_takev_string(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +nvlist_t *nvlist_takev_nvlist(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +int nvlist_takev_descriptor(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void *nvlist_takev_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, va_list nameap) __printflike(3, 0); + +void nvlist_freef(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_type(nvlist_t *nvl, int type, const char *namefmt, ...) __printflike(3, 4); + +void nvlist_freef_null(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_bool(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_number(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_string(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_nvlist(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_descriptor(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); +void nvlist_freef_binary(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); + +void nvlist_freev(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_type(nvlist_t *nvl, int type, const char *namefmt, va_list nameap) __printflike(3, 0); + +void nvlist_freev_null(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_bool(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_number(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_string(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_nvlist(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_descriptor(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); +void nvlist_freev_binary(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); + +#endif /* !_NV_H_ */ diff --git a/lib/libnv/nv_impl.h b/lib/libnv/nv_impl.h new file mode 100644 index 0000000..0c41180 --- /dev/null +++ b/lib/libnv/nv_impl.h @@ -0,0 +1,130 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/nv_impl.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _NV_IMPL_H_ +#define _NV_IMPL_H_ + +#ifndef _NVPAIR_T_DECLARED +#define _NVPAIR_T_DECLARED +struct nvpair; + +typedef struct nvpair nvpair_t; +#endif + +#define NV_TYPE_FIRST NV_TYPE_NULL +#define NV_TYPE_LAST NV_TYPE_BINARY + +#define NV_FLAG_BIG_ENDIAN 0x80 + +int *nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp); +size_t nvlist_ndescriptors(const nvlist_t *nvl); + +nvpair_t *nvlist_first_nvpair(const nvlist_t *nvl); +nvpair_t *nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp); +nvpair_t *nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp); + +void nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp); + +void nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp); + +const nvpair_t *nvlist_get_nvpair(const nvlist_t *nvl, const char *name); + +nvpair_t *nvlist_take_nvpair(nvlist_t *nvl, const char *name); + +/* Function removes the given nvpair from the nvlist. */ +void nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp); + +void nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp); + +int nvpair_type(const nvpair_t *nvp); +const char *nvpair_name(const nvpair_t *nvp); + +nvpair_t *nvpair_clone(const nvpair_t *nvp); + +nvpair_t *nvpair_create_null(const char *name); +nvpair_t *nvpair_create_bool(const char *name, bool value); +nvpair_t *nvpair_create_number(const char *name, uint64_t value); +nvpair_t *nvpair_create_string(const char *name, const char *value); +nvpair_t *nvpair_create_stringf(const char *name, const char *valuefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap) __printflike(2, 0); +nvpair_t *nvpair_create_nvlist(const char *name, const nvlist_t *value); +nvpair_t *nvpair_create_descriptor(const char *name, int value); +nvpair_t *nvpair_create_binary(const char *name, const void *value, size_t size); + +nvpair_t *nvpair_move_string(const char *name, char *value); +nvpair_t *nvpair_move_nvlist(const char *name, nvlist_t *value); +nvpair_t *nvpair_move_descriptor(const char *name, int value); +nvpair_t *nvpair_move_binary(const char *name, void *value, size_t size); + +bool nvpair_get_bool(const nvpair_t *nvp); +uint64_t nvpair_get_number(const nvpair_t *nvp); +const char *nvpair_get_string(const nvpair_t *nvp); +const nvlist_t *nvpair_get_nvlist(const nvpair_t *nvp); +int nvpair_get_descriptor(const nvpair_t *nvp); +const void *nvpair_get_binary(const nvpair_t *nvp, size_t *sizep); + +void nvpair_free(nvpair_t *nvp); + +const nvpair_t *nvlist_getf_nvpair(const nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); + +const nvpair_t *nvlist_getv_nvpair(const nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); + +nvpair_t *nvlist_takef_nvpair(nvlist_t *nvl, const char *namefmt, ...) __printflike(2, 3); + +nvpair_t *nvlist_takev_nvpair(nvlist_t *nvl, const char *namefmt, va_list nameap) __printflike(2, 0); + +nvpair_t *nvpair_createf_null(const char *namefmt, ...) __printflike(1, 2); +nvpair_t *nvpair_createf_bool(bool value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_createf_number(uint64_t value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_createf_string(const char *value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_createf_nvlist(const nvlist_t *value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_createf_descriptor(int value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_createf_binary(const void *value, size_t size, const char *namefmt, ...) __printflike(3, 4); + +nvpair_t *nvpair_createv_null(const char *namefmt, va_list nameap) __printflike(1, 0); +nvpair_t *nvpair_createv_bool(bool value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_createv_number(uint64_t value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_createv_binary(const void *value, size_t size, const char *namefmt, va_list nameap) __printflike(3, 0); + +nvpair_t *nvpair_movef_string(char *value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_movef_nvlist(nvlist_t *value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_movef_descriptor(int value, const char *namefmt, ...) __printflike(2, 3); +nvpair_t *nvpair_movef_binary(void *value, size_t size, const char *namefmt, ...) __printflike(3, 4); + +nvpair_t *nvpair_movev_string(char *value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap) __printflike(2, 0); +nvpair_t *nvpair_movev_binary(void *value, size_t size, const char *namefmt, va_list nameap) __printflike(3, 0); + +#endif /* !_NV_IMPL_H_ */ diff --git a/lib/libnv/nvlist.c b/lib/libnv/nvlist.c new file mode 100644 index 0000000..f4e1d6f --- /dev/null +++ b/lib/libnv/nvlist.c @@ -0,0 +1,1707 @@ +/*- + * Copyright (c) 2009-2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/lib/libnv/nvlist.c 264021 2014-04-01 21:30:54Z jilles $"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#define _WITH_DPRINTF +#include +#include +#include +#include + +#ifdef HAVE_PJDLOG +#include +#endif + +#include "msgio.h" +#include "nv.h" +#include "nv_impl.h" +#include "nvlist_impl.h" +#include "nvpair_impl.h" + +#ifndef HAVE_PJDLOG +#include +#define PJDLOG_ASSERT(...) assert(__VA_ARGS__) +#define PJDLOG_RASSERT(expr, ...) assert(expr) +#define PJDLOG_ABORT(...) do { \ + fprintf(stderr, "%s:%u: ", __FILE__, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + abort(); \ +} while (0) +#endif + +#define NV_FLAG_PRIVATE_MASK (NV_FLAG_BIG_ENDIAN) +#define NV_FLAG_PUBLIC_MASK (NV_FLAG_IGNORE_CASE) +#define NV_FLAG_ALL_MASK (NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK) + +#define NVLIST_MAGIC 0x6e766c /* "nvl" */ +struct nvlist { + int nvl_magic; + int nvl_error; + int nvl_flags; + struct nvl_head nvl_head; +}; + +#define NVLIST_ASSERT(nvl) do { \ + PJDLOG_ASSERT((nvl) != NULL); \ + PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC); \ +} while (0) + +#define NVPAIR_ASSERT(nvp) nvpair_assert(nvp) + +#define NVLIST_HEADER_MAGIC 0x6c +#define NVLIST_HEADER_VERSION 0x00 +struct nvlist_header { + uint8_t nvlh_magic; + uint8_t nvlh_version; + uint8_t nvlh_flags; + uint64_t nvlh_descriptors; + uint64_t nvlh_size; +} __packed; + +nvlist_t * +nvlist_create(int flags) +{ + nvlist_t *nvl; + + PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0); + + nvl = malloc(sizeof(*nvl)); + nvl->nvl_error = 0; + nvl->nvl_flags = flags; + TAILQ_INIT(&nvl->nvl_head); + nvl->nvl_magic = NVLIST_MAGIC; + + return (nvl); +} + +void +nvlist_destroy(nvlist_t *nvl) +{ + nvpair_t *nvp; + int serrno; + + if (nvl == NULL) + return; + + serrno = errno; + + NVLIST_ASSERT(nvl); + + while ((nvp = nvlist_first_nvpair(nvl)) != NULL) { + nvlist_remove_nvpair(nvl, nvp); + nvpair_free(nvp); + } + nvl->nvl_magic = 0; + free(nvl); + + errno = serrno; +} + +int +nvlist_error(const nvlist_t *nvl) +{ + + if (nvl == NULL) + return (ENOMEM); + + NVLIST_ASSERT(nvl); + + return (nvl->nvl_error); +} + +bool +nvlist_empty(const nvlist_t *nvl) +{ + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + + return (nvlist_first_nvpair(nvl) == NULL); +} + +static void +nvlist_report_missing(int type, const char *namefmt, va_list nameap) +{ + char *name; + + vasprintf(&name, namefmt, nameap); + PJDLOG_ABORT("Element '%s' of type %s doesn't exist.", + name != NULL ? name : "N/A", nvpair_type_string(type)); +} + +static nvpair_t * +nvlist_findv(const nvlist_t *nvl, int type, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + char *name; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(type == NV_TYPE_NONE || + (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); + + if (vasprintf(&name, namefmt, nameap) < 0) + return (NULL); + + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + if (type != NV_TYPE_NONE && nvpair_type(nvp) != type) + continue; + if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) { + if (strcasecmp(nvpair_name(nvp), name) != 0) + continue; + } else { + if (strcmp(nvpair_name(nvp), name) != 0) + continue; + } + break; + } + + free(name); + + if (nvp == NULL) + errno = ENOENT; + + return (nvp); +} + +bool +nvlist_exists_type(const nvlist_t *nvl, const char *name, int type) +{ + + return (nvlist_existsf_type(nvl, type, "%s", name)); +} + +bool +nvlist_existsf_type(const nvlist_t *nvl, int type, const char *namefmt, ...) +{ + va_list nameap; + bool ret; + + va_start(nameap, namefmt); + ret = nvlist_existsv_type(nvl, type, namefmt, nameap); + va_end(nameap); + + return (ret); +} + +bool +nvlist_existsv_type(const nvlist_t *nvl, int type, const char *namefmt, + va_list nameap) +{ + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(type == NV_TYPE_NONE || + (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); + + return (nvlist_findv(nvl, type, namefmt, nameap) != NULL); +} + +void +nvlist_free_type(nvlist_t *nvl, const char *name, int type) +{ + + nvlist_freef_type(nvl, type, "%s", name); +} + +void +nvlist_freef_type(nvlist_t *nvl, int type, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_freev_type(nvl, type, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_freev_type(nvlist_t *nvl, int type, const char *namefmt, va_list nameap) +{ + va_list cnameap; + nvpair_t *nvp; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(type == NV_TYPE_NONE || + (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST)); + + va_copy(cnameap, nameap); + nvp = nvlist_findv(nvl, type, namefmt, cnameap); + va_end(cnameap); + if (nvp != NULL) + nvlist_free_nvpair(nvl, nvp); + else + nvlist_report_missing(type, namefmt, nameap); +} + +nvlist_t * +nvlist_clone(const nvlist_t *nvl) +{ + nvlist_t *newnvl; + nvpair_t *nvp, *newnvp; + + NVLIST_ASSERT(nvl); + + if (nvl->nvl_error != 0) { + errno = nvl->nvl_error; + return (NULL); + } + + newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK); + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + newnvp = nvpair_clone(nvp); + if (newnvp == NULL) + break; + nvlist_move_nvpair(newnvl, newnvp); + } + if (nvp != NULL) { + nvlist_destroy(newnvl); + return (NULL); + } + return (newnvl); +} + +/* + * Dump content of nvlist. + */ +static void +nvlist_xdump(const nvlist_t *nvl, int fd, int level) +{ + nvpair_t *nvp; + + PJDLOG_ASSERT(level < 3); + + if (nvlist_error(nvl) != 0) { + dprintf(fd, "%*serror: %d\n", level * 4, "", + nvlist_error(nvl)); + return; + } + + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp), + nvpair_type_string(nvpair_type(nvp))); + switch (nvpair_type(nvp)) { + case NV_TYPE_NULL: + dprintf(fd, " null\n"); + break; + case NV_TYPE_BOOL: + dprintf(fd, " %s\n", nvpair_get_bool(nvp) ? + "TRUE" : "FALSE"); + break; + case NV_TYPE_NUMBER: + dprintf(fd, " %ju (%jd) (0x%jx)\n", + (uintmax_t)nvpair_get_number(nvp), + (intmax_t)nvpair_get_number(nvp), + (uintmax_t)nvpair_get_number(nvp)); + break; + case NV_TYPE_STRING: + dprintf(fd, " [%s]\n", nvpair_get_string(nvp)); + break; + case NV_TYPE_NVLIST: + dprintf(fd, "\n"); + nvlist_xdump(nvpair_get_nvlist(nvp), fd, level + 1); + break; + case NV_TYPE_DESCRIPTOR: + dprintf(fd, " %d\n", nvpair_get_descriptor(nvp)); + break; + case NV_TYPE_BINARY: + { + const unsigned char *binary; + unsigned int ii; + size_t size; + + binary = nvpair_get_binary(nvp, &size); + dprintf(fd, " %zu ", size); + for (ii = 0; ii < size; ii++) + dprintf(fd, "%02hhx", binary[ii]); + dprintf(fd, "\n"); + break; + } + default: + PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp)); + } + } +} + +void +nvlist_dump(const nvlist_t *nvl, int fd) +{ + + nvlist_xdump(nvl, fd, 0); +} + +void +nvlist_fdump(const nvlist_t *nvl, FILE *fp) +{ + + fflush(fp); + nvlist_dump(nvl, fileno(fp)); +} + +/* + * The function obtains size of the nvlist after nvlist_pack(). + * Additional argument 'level' allows to track how deep are we as we obtain + * size of the NV_TYPE_NVLIST elements using recursion. We allow at most + * three levels of recursion. + */ +static size_t +nvlist_xsize(const nvlist_t *nvl, int level) +{ + const nvpair_t *nvp; + size_t size; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(level < 3); + + size = sizeof(struct nvlist_header); + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + size += nvpair_header_size(); + size += strlen(nvpair_name(nvp)) + 1; + if (nvpair_type(nvp) == NV_TYPE_NVLIST) + size += nvlist_xsize(nvpair_get_nvlist(nvp), level + 1); + else + size += nvpair_size(nvp); + } + + return (size); +} + +size_t +nvlist_size(const nvlist_t *nvl) +{ + + return (nvlist_xsize(nvl, 0)); +} + +static int * +nvlist_xdescriptors(const nvlist_t *nvl, int *descs, int level) +{ + const nvpair_t *nvp; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(level < 3); + + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + switch (nvpair_type(nvp)) { + case NV_TYPE_DESCRIPTOR: + *descs = nvpair_get_descriptor(nvp); + descs++; + break; + case NV_TYPE_NVLIST: + descs = nvlist_xdescriptors(nvpair_get_nvlist(nvp), + descs, level + 1); + break; + } + } + + return (descs); +} + +int * +nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp) +{ + size_t nitems; + int *fds; + + nitems = nvlist_ndescriptors(nvl); + fds = malloc(sizeof(fds[0]) * (nitems + 1)); + if (fds == NULL) + return (NULL); + if (nitems > 0) + nvlist_xdescriptors(nvl, fds, 0); + fds[nitems] = -1; + if (nitemsp != NULL) + *nitemsp = nitems; + return (fds); +} + +static size_t +nvlist_xndescriptors(const nvlist_t *nvl, int level) +{ + const nvpair_t *nvp; + size_t ndescs; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(nvl->nvl_error == 0); + PJDLOG_ASSERT(level < 3); + + ndescs = 0; + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + switch (nvpair_type(nvp)) { + case NV_TYPE_DESCRIPTOR: + ndescs++; + break; + case NV_TYPE_NVLIST: + ndescs += nvlist_xndescriptors(nvpair_get_nvlist(nvp), + level + 1); + break; + } + } + + return (ndescs); +} + +size_t +nvlist_ndescriptors(const nvlist_t *nvl) +{ + + return (nvlist_xndescriptors(nvl, 0)); +} + +static unsigned char * +nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp) +{ + struct nvlist_header nvlhdr; + + NVLIST_ASSERT(nvl); + + nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC; + nvlhdr.nvlh_version = NVLIST_HEADER_VERSION; + nvlhdr.nvlh_flags = nvl->nvl_flags; +#if BYTE_ORDER == BIG_ENDIAN + nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN; +#endif + nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl); + nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr); + PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr)); + memcpy(ptr, &nvlhdr, sizeof(nvlhdr)); + ptr += sizeof(nvlhdr); + *leftp -= sizeof(nvlhdr); + + return (ptr); +} + +void * +nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep) +{ + unsigned char *buf, *ptr; + size_t left, size; + nvpair_t *nvp; + + NVLIST_ASSERT(nvl); + + if (nvl->nvl_error != 0) { + errno = nvl->nvl_error; + return (NULL); + } + + size = nvlist_size(nvl); + buf = malloc(size); + if (buf == NULL) + return (NULL); + + ptr = buf; + left = size; + + ptr = nvlist_pack_header(nvl, ptr, &left); + + for (nvp = nvlist_first_nvpair(nvl); nvp != NULL; + nvp = nvlist_next_nvpair(nvl, nvp)) { + ptr = nvpair_pack(nvp, ptr, fdidxp, &left); + if (ptr == NULL) { + free(buf); + return (NULL); + } + } + + if (sizep != NULL) + *sizep = size; + return (buf); +} + +void * +nvlist_pack(const nvlist_t *nvl, size_t *sizep) +{ + + NVLIST_ASSERT(nvl); + + if (nvl->nvl_error != 0) { + errno = nvl->nvl_error; + return (NULL); + } + + if (nvlist_ndescriptors(nvl) > 0) { + errno = EOPNOTSUPP; + return (NULL); + } + + return (nvlist_xpack(nvl, NULL, sizep)); +} + +static bool +nvlist_check_header(struct nvlist_header *nvlhdrp) +{ + + if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) { + errno = EINVAL; + return (false); + } + if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) { + errno = EINVAL; + return (false); + } +#if BYTE_ORDER == BIG_ENDIAN + if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) { + nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size); + nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors); + } +#else + if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) { + nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size); + nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors); + } +#endif + return (true); +} + +static const unsigned char * +nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds, + int *flagsp, size_t *leftp) +{ + struct nvlist_header nvlhdr; + + if (*leftp < sizeof(nvlhdr)) + goto failed; + + memcpy(&nvlhdr, ptr, sizeof(nvlhdr)); + + if (!nvlist_check_header(&nvlhdr)) + goto failed; + + if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr)) + goto failed; + + /* + * nvlh_descriptors might be smaller than nfds in embedded nvlists. + */ + if (nvlhdr.nvlh_descriptors > nfds) + goto failed; + + if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) + goto failed; + + nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK); + + ptr += sizeof(nvlhdr); + *flagsp = (int)nvlhdr.nvlh_flags; + *leftp -= sizeof(nvlhdr); + + return (ptr); +failed: + errno = EINVAL; + return (NULL); +} + +nvlist_t * +nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds) +{ + const unsigned char *ptr; + nvlist_t *nvl; + nvpair_t *nvp; + size_t left; + int flags; + + left = size; + ptr = buf; + + nvl = nvlist_create(0); + if (nvl == NULL) + goto failed; + + ptr = nvlist_unpack_header(nvl, ptr, nfds, &flags, &left); + if (ptr == NULL) + goto failed; + + while (left > 0) { + ptr = nvpair_unpack(flags, ptr, &left, fds, nfds, &nvp); + if (ptr == NULL) + goto failed; + nvlist_move_nvpair(nvl, nvp); + } + + return (nvl); +failed: + nvlist_destroy(nvl); + return (NULL); +} + +nvlist_t * +nvlist_unpack(const void *buf, size_t size) +{ + + return (nvlist_xunpack(buf, size, NULL, 0)); +} + +int +nvlist_send(int sock, const nvlist_t *nvl) +{ + size_t datasize, nfds; + int *fds; + void *data; + int64_t fdidx; + int serrno, ret; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return (-1); + } + + fds = nvlist_descriptors(nvl, &nfds); + if (fds == NULL) + return (-1); + + ret = -1; + data = NULL; + fdidx = 0; + + data = nvlist_xpack(nvl, &fdidx, &datasize); + if (data == NULL) + goto out; + + if (buf_send(sock, data, datasize) == -1) + goto out; + + if (nfds > 0) { + if (fd_send(sock, fds, nfds) == -1) + goto out; + } + + ret = 0; +out: + serrno = errno; + free(fds); + free(data); + errno = serrno; + return (ret); +} + +nvlist_t * +nvlist_recv(int sock) +{ + struct nvlist_header nvlhdr; + nvlist_t *nvl, *ret; + unsigned char *buf; + size_t nfds, size; + int serrno, *fds; + + if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1) + return (NULL); + + if (!nvlist_check_header(&nvlhdr)) + return (NULL); + + nfds = (size_t)nvlhdr.nvlh_descriptors; + size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size; + + buf = malloc(size); + if (buf == NULL) + return (NULL); + + memcpy(buf, &nvlhdr, sizeof(nvlhdr)); + + ret = NULL; + fds = NULL; + + if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1) + goto out; + + if (nfds > 0) { + fds = malloc(nfds * sizeof(fds[0])); + if (fds == NULL) + goto out; + if (fd_recv(sock, fds, nfds) == -1) + goto out; + } + + nvl = nvlist_xunpack(buf, size, fds, nfds); + if (nvl == NULL) + goto out; + + ret = nvl; +out: + serrno = errno; + free(buf); + free(fds); + errno = serrno; + + return (ret); +} + +nvlist_t * +nvlist_xfer(int sock, nvlist_t *nvl) +{ + + if (nvlist_send(sock, nvl) < 0) { + nvlist_destroy(nvl); + return (NULL); + } + nvlist_destroy(nvl); + return (nvlist_recv(sock)); +} + +nvpair_t * +nvlist_first_nvpair(const nvlist_t *nvl) +{ + + NVLIST_ASSERT(nvl); + + return (TAILQ_FIRST(&nvl->nvl_head)); +} + +nvpair_t * +nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) +{ + nvpair_t *retnvp; + + NVLIST_ASSERT(nvl); + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); + + retnvp = nvpair_next(nvp); + PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl); + + return (retnvp); + +} + +nvpair_t * +nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp) +{ + nvpair_t *retnvp; + + NVLIST_ASSERT(nvl); + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); + + retnvp = nvpair_prev(nvp); + PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl); + + return (retnvp); +} + +const char * +nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep) +{ + nvpair_t *nvp; + + NVLIST_ASSERT(nvl); + PJDLOG_ASSERT(cookiep != NULL); + + if (*cookiep == NULL) + nvp = nvlist_first_nvpair(nvl); + else + nvp = nvlist_next_nvpair(nvl, *cookiep); + if (nvp == NULL) + return (NULL); + if (typep != NULL) + *typep = nvpair_type(nvp); + *cookiep = nvp; + return (nvpair_name(nvp)); +} + +bool +nvlist_exists(const nvlist_t *nvl, const char *name) +{ + + return (nvlist_existsf(nvl, "%s", name)); +} + +#define NVLIST_EXISTS(type) \ +bool \ +nvlist_exists_##type(const nvlist_t *nvl, const char *name) \ +{ \ + \ + return (nvlist_existsf_##type(nvl, "%s", name)); \ +} + +NVLIST_EXISTS(null) +NVLIST_EXISTS(bool) +NVLIST_EXISTS(number) +NVLIST_EXISTS(string) +NVLIST_EXISTS(nvlist) +NVLIST_EXISTS(descriptor) +NVLIST_EXISTS(binary) + +#undef NVLIST_EXISTS + +bool +nvlist_existsf(const nvlist_t *nvl, const char *namefmt, ...) +{ + va_list nameap; + bool ret; + + va_start(nameap, namefmt); + ret = nvlist_existsv(nvl, namefmt, nameap); + va_end(nameap); + return (ret); +} + +#define NVLIST_EXISTSF(type) \ +bool \ +nvlist_existsf_##type(const nvlist_t *nvl, const char *namefmt, ...) \ +{ \ + va_list nameap; \ + bool ret; \ + \ + va_start(nameap, namefmt); \ + ret = nvlist_existsv_##type(nvl, namefmt, nameap); \ + va_end(nameap); \ + return (ret); \ +} + +NVLIST_EXISTSF(null) +NVLIST_EXISTSF(bool) +NVLIST_EXISTSF(number) +NVLIST_EXISTSF(string) +NVLIST_EXISTSF(nvlist) +NVLIST_EXISTSF(descriptor) +NVLIST_EXISTSF(binary) + +#undef NVLIST_EXISTSF + +bool +nvlist_existsv(const nvlist_t *nvl, const char *namefmt, va_list nameap) +{ + + return (nvlist_findv(nvl, NV_TYPE_NONE, namefmt, nameap) != NULL); +} + +#define NVLIST_EXISTSV(type, TYPE) \ +bool \ +nvlist_existsv_##type(const nvlist_t *nvl, const char *namefmt, \ + va_list nameap) \ +{ \ + \ + return (nvlist_findv(nvl, NV_TYPE_##TYPE, namefmt, nameap) != \ + NULL); \ +} + +NVLIST_EXISTSV(null, NULL) +NVLIST_EXISTSV(bool, BOOL) +NVLIST_EXISTSV(number, NUMBER) +NVLIST_EXISTSV(string, STRING) +NVLIST_EXISTSV(nvlist, NVLIST) +NVLIST_EXISTSV(descriptor, DESCRIPTOR) +NVLIST_EXISTSV(binary, BINARY) + +#undef NVLIST_EXISTSV + +void +nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp) +{ + nvpair_t *newnvp; + + NVPAIR_ASSERT(nvp); + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + if (nvlist_exists(nvl, nvpair_name(nvp))) { + nvl->nvl_error = errno = EEXIST; + return; + } + + newnvp = nvpair_clone(nvp); + if (newnvp == NULL) { + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + return; + } + + nvpair_insert(&nvl->nvl_head, newnvp, nvl); +} + +void +nvlist_add_null(nvlist_t *nvl, const char *name) +{ + + nvlist_addf_null(nvl, "%s", name); +} + +void +nvlist_add_bool(nvlist_t *nvl, const char *name, bool value) +{ + + nvlist_addf_bool(nvl, value, "%s", name); +} + +void +nvlist_add_number(nvlist_t *nvl, const char *name, uint64_t value) +{ + + nvlist_addf_number(nvl, value, "%s", name); +} + +void +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) +{ + + nvlist_addf_string(nvl, value, "%s", name); +} + +void +nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...) +{ + va_list valueap; + + va_start(valueap, valuefmt); + nvlist_add_stringv(nvl, name, valuefmt, valueap); + va_end(valueap); +} + +void +nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt, + va_list valueap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_create_stringv(name, valuefmt, valueap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_add_nvlist(nvlist_t *nvl, const char *name, const nvlist_t *value) +{ + + nvlist_addf_nvlist(nvl, value, "%s", name); +} + +void +nvlist_add_descriptor(nvlist_t *nvl, const char *name, int value) +{ + + nvlist_addf_descriptor(nvl, value, "%s", name); +} + +void +nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value, + size_t size) +{ + + nvlist_addf_binary(nvl, value, size, "%s", name); +} + +void +nvlist_addf_null(nvlist_t *nvl, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_null(nvl, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_bool(nvlist_t *nvl, bool value, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_bool(nvl, value, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_number(nvlist_t *nvl, uint64_t value, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_number(nvl, value, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_string(nvlist_t *nvl, const char *value, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_string(nvl, value, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, + ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_nvlist(nvl, value, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_descriptor(nvlist_t *nvl, int value, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_descriptor(nvl, value, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addf_binary(nvlist_t *nvl, const void *value, size_t size, + const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_addv_binary(nvl, value, size, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_addv_null(nvlist_t *nvl, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_null(namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_bool(nvlist_t *nvl, bool value, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_bool(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_number(nvlist_t *nvl, uint64_t value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_number(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_string(nvlist_t *nvl, const char *value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_string(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_nvlist(nvlist_t *nvl, const nvlist_t *value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_nvlist(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_descriptor(nvlist_t *nvl, int value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_descriptor(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_addv_binary(nvlist_t *nvl, const void *value, size_t size, + const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_createv_binary(value, size, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL); + + if (nvlist_error(nvl) != 0) { + nvpair_free(nvp); + errno = nvlist_error(nvl); + return; + } + if (nvlist_exists(nvl, nvpair_name(nvp))) { + nvpair_free(nvp); + nvl->nvl_error = errno = EEXIST; + return; + } + + nvpair_insert(&nvl->nvl_head, nvp, nvl); +} + +#define NVLIST_MOVE(vtype, type) \ +void \ +nvlist_move_##type(nvlist_t *nvl, const char *name, vtype value) \ +{ \ + \ + nvlist_movef_##type(nvl, value, "%s", name); \ +} + +NVLIST_MOVE(char *, string) +NVLIST_MOVE(nvlist_t *, nvlist) +NVLIST_MOVE(int, descriptor) + +#undef NVLIST_MOVE + +void +nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size) +{ + + nvlist_movef_binary(nvl, value, size, "%s", name); +} + +#define NVLIST_MOVEF(vtype, type) \ +void \ +nvlist_movef_##type(nvlist_t *nvl, vtype value, const char *namefmt, \ + ...) \ +{ \ + va_list nameap; \ + \ + va_start(nameap, namefmt); \ + nvlist_movev_##type(nvl, value, namefmt, nameap); \ + va_end(nameap); \ +} + +NVLIST_MOVEF(char *, string) +NVLIST_MOVEF(nvlist_t *, nvlist) +NVLIST_MOVEF(int, descriptor) + +#undef NVLIST_MOVEF + +void +nvlist_movef_binary(nvlist_t *nvl, void *value, size_t size, + const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_movev_binary(nvl, value, size, namefmt, nameap); + va_end(nameap); +} + +void +nvlist_movev_string(nvlist_t *nvl, char *value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + free(value); + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_movev_string(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_movev_nvlist(nvlist_t *nvl, nvlist_t *value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + nvlist_destroy(value); + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_movev_nvlist(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_movev_descriptor(nvlist_t *nvl, int value, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + close(value); + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_movev_descriptor(value, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +void +nvlist_movev_binary(nvlist_t *nvl, void *value, size_t size, + const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (nvlist_error(nvl) != 0) { + free(value); + errno = nvlist_error(nvl); + return; + } + + nvp = nvpair_movev_binary(value, size, namefmt, nameap); + if (nvp == NULL) + nvl->nvl_error = errno = (errno != 0 ? errno : ENOMEM); + else + nvlist_move_nvpair(nvl, nvp); +} + +#define NVLIST_GET(ftype, type) \ +ftype \ +nvlist_get_##type(const nvlist_t *nvl, const char *name) \ +{ \ + \ + return (nvlist_getf_##type(nvl, "%s", name)); \ +} + +NVLIST_GET(const nvpair_t *, nvpair) +NVLIST_GET(bool, bool) +NVLIST_GET(uint64_t, number) +NVLIST_GET(const char *, string) +NVLIST_GET(const nvlist_t *, nvlist) +NVLIST_GET(int, descriptor) + +#undef NVLIST_GET + +const void * +nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep) +{ + + return (nvlist_getf_binary(nvl, sizep, "%s", name)); +} + +#define NVLIST_GETF(ftype, type) \ +ftype \ +nvlist_getf_##type(const nvlist_t *nvl, const char *namefmt, ...) \ +{ \ + va_list nameap; \ + ftype value; \ + \ + va_start(nameap, namefmt); \ + value = nvlist_getv_##type(nvl, namefmt, nameap); \ + va_end(nameap); \ + \ + return (value); \ +} + +NVLIST_GETF(const nvpair_t *, nvpair) +NVLIST_GETF(bool, bool) +NVLIST_GETF(uint64_t, number) +NVLIST_GETF(const char *, string) +NVLIST_GETF(const nvlist_t *, nvlist) +NVLIST_GETF(int, descriptor) + +#undef NVLIST_GETF + +const void * +nvlist_getf_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) +{ + va_list nameap; + const void *value; + + va_start(nameap, namefmt); + value = nvlist_getv_binary(nvl, sizep, namefmt, nameap); + va_end(nameap); + + return (value); +} + +const nvpair_t * +nvlist_getv_nvpair(const nvlist_t *nvl, const char *namefmt, va_list nameap) +{ + + return (nvlist_findv(nvl, NV_TYPE_NONE, namefmt, nameap)); +} + +#define NVLIST_GETV(ftype, type, TYPE) \ +ftype \ +nvlist_getv_##type(const nvlist_t *nvl, const char *namefmt, \ + va_list nameap) \ +{ \ + va_list cnameap; \ + const nvpair_t *nvp; \ + \ + va_copy(cnameap, nameap); \ + nvp = nvlist_findv(nvl, NV_TYPE_##TYPE, namefmt, cnameap); \ + va_end(cnameap); \ + if (nvp == NULL) \ + nvlist_report_missing(NV_TYPE_##TYPE, namefmt, nameap); \ + return (nvpair_get_##type(nvp)); \ +} + +NVLIST_GETV(bool, bool, BOOL) +NVLIST_GETV(uint64_t, number, NUMBER) +NVLIST_GETV(const char *, string, STRING) +NVLIST_GETV(const nvlist_t *, nvlist, NVLIST) +NVLIST_GETV(int, descriptor, DESCRIPTOR) + +#undef NVLIST_GETV + +const void * +nvlist_getv_binary(const nvlist_t *nvl, size_t *sizep, const char *namefmt, + va_list nameap) +{ + va_list cnameap; + const nvpair_t *nvp; + + va_copy(cnameap, nameap); + nvp = nvlist_findv(nvl, NV_TYPE_BINARY, namefmt, cnameap); + va_end(cnameap); + if (nvp == NULL) + nvlist_report_missing(NV_TYPE_BINARY, namefmt, nameap); + + return (nvpair_get_binary(nvp, sizep)); +} + +#define NVLIST_TAKE(ftype, type) \ +ftype \ +nvlist_take_##type(nvlist_t *nvl, const char *name) \ +{ \ + \ + return (nvlist_takef_##type(nvl, "%s", name)); \ +} + +NVLIST_TAKE(nvpair_t *, nvpair) +NVLIST_TAKE(bool, bool) +NVLIST_TAKE(uint64_t, number) +NVLIST_TAKE(char *, string) +NVLIST_TAKE(nvlist_t *, nvlist) +NVLIST_TAKE(int, descriptor) + +#undef NVLIST_TAKE + +void * +nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep) +{ + + return (nvlist_takef_binary(nvl, sizep, "%s", name)); +} + +#define NVLIST_TAKEF(ftype, type) \ +ftype \ +nvlist_takef_##type(nvlist_t *nvl, const char *namefmt, ...) \ +{ \ + va_list nameap; \ + ftype value; \ + \ + va_start(nameap, namefmt); \ + value = nvlist_takev_##type(nvl, namefmt, nameap); \ + va_end(nameap); \ + \ + return (value); \ +} + +NVLIST_TAKEF(nvpair_t *, nvpair) +NVLIST_TAKEF(bool, bool) +NVLIST_TAKEF(uint64_t, number) +NVLIST_TAKEF(char *, string) +NVLIST_TAKEF(nvlist_t *, nvlist) +NVLIST_TAKEF(int, descriptor) + +#undef NVLIST_TAKEF + +void * +nvlist_takef_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, ...) +{ + va_list nameap; + void *value; + + va_start(nameap, namefmt); + value = nvlist_takev_binary(nvl, sizep, namefmt, nameap); + va_end(nameap); + + return (value); +} + +nvpair_t * +nvlist_takev_nvpair(nvlist_t *nvl, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + nvp = nvlist_findv(nvl, NV_TYPE_NONE, namefmt, nameap); + if (nvp != NULL) + nvlist_remove_nvpair(nvl, nvp); + return (nvp); +} + +#define NVLIST_TAKEV(ftype, type, TYPE) \ +ftype \ +nvlist_takev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \ +{ \ + va_list cnameap; \ + nvpair_t *nvp; \ + ftype value; \ + \ + va_copy(cnameap, nameap); \ + nvp = nvlist_findv(nvl, NV_TYPE_##TYPE, namefmt, cnameap); \ + va_end(cnameap); \ + if (nvp == NULL) \ + nvlist_report_missing(NV_TYPE_##TYPE, namefmt, nameap); \ + value = (ftype)(intptr_t)nvpair_get_##type(nvp); \ + nvlist_remove_nvpair(nvl, nvp); \ + nvpair_free_structure(nvp); \ + return (value); \ +} + +NVLIST_TAKEV(bool, bool, BOOL) +NVLIST_TAKEV(uint64_t, number, NUMBER) +NVLIST_TAKEV(char *, string, STRING) +NVLIST_TAKEV(nvlist_t *, nvlist, NVLIST) +NVLIST_TAKEV(int, descriptor, DESCRIPTOR) + +#undef NVLIST_TAKEV + +void * +nvlist_takev_binary(nvlist_t *nvl, size_t *sizep, const char *namefmt, + va_list nameap) +{ + va_list cnameap; + nvpair_t *nvp; + void *value; + + va_copy(cnameap, nameap); + nvp = nvlist_findv(nvl, NV_TYPE_BINARY, namefmt, cnameap); + va_end(cnameap); + if (nvp == NULL) + nvlist_report_missing(NV_TYPE_BINARY, namefmt, nameap); + + value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep); + nvlist_remove_nvpair(nvl, nvp); + nvpair_free_structure(nvp); + return (value); +} + +void +nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + + NVLIST_ASSERT(nvl); + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); + + nvpair_remove(&nvl->nvl_head, nvp, nvl); +} + +void +nvlist_free(nvlist_t *nvl, const char *name) +{ + + nvlist_freef(nvl, "%s", name); +} + +#define NVLIST_FREE(type) \ +void \ +nvlist_free_##type(nvlist_t *nvl, const char *name) \ +{ \ + \ + nvlist_freef_##type(nvl, "%s", name); \ +} + +NVLIST_FREE(null) +NVLIST_FREE(bool) +NVLIST_FREE(number) +NVLIST_FREE(string) +NVLIST_FREE(nvlist) +NVLIST_FREE(descriptor) +NVLIST_FREE(binary) + +#undef NVLIST_FREE + +void +nvlist_freef(nvlist_t *nvl, const char *namefmt, ...) +{ + va_list nameap; + + va_start(nameap, namefmt); + nvlist_freev(nvl, namefmt, nameap); + va_end(nameap); +} + +#define NVLIST_FREEF(type) \ +void \ +nvlist_freef_##type(nvlist_t *nvl, const char *namefmt, ...) \ +{ \ + va_list nameap; \ + \ + va_start(nameap, namefmt); \ + nvlist_freev_##type(nvl, namefmt, nameap); \ + va_end(nameap); \ +} + +NVLIST_FREEF(null) +NVLIST_FREEF(bool) +NVLIST_FREEF(number) +NVLIST_FREEF(string) +NVLIST_FREEF(nvlist) +NVLIST_FREEF(descriptor) +NVLIST_FREEF(binary) + +#undef NVLIST_FREEF + +void +nvlist_freev(nvlist_t *nvl, const char *namefmt, va_list nameap) +{ + + nvlist_freev_type(nvl, NV_TYPE_NONE, namefmt, nameap); +} + +#define NVLIST_FREEV(type, TYPE) \ +void \ +nvlist_freev_##type(nvlist_t *nvl, const char *namefmt, va_list nameap) \ +{ \ + \ + nvlist_freev_type(nvl, NV_TYPE_##TYPE, namefmt, nameap); \ +} + +NVLIST_FREEV(null, NULL) +NVLIST_FREEV(bool, BOOL) +NVLIST_FREEV(number, NUMBER) +NVLIST_FREEV(string, STRING) +NVLIST_FREEV(nvlist, NVLIST) +NVLIST_FREEV(descriptor, DESCRIPTOR) +NVLIST_FREEV(binary, BINARY) +#undef NVLIST_FREEV + +void +nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp) +{ + + NVLIST_ASSERT(nvl); + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl); + + nvlist_remove_nvpair(nvl, nvp); + nvpair_free(nvp); +} diff --git a/lib/libnv/nvlist_impl.h b/lib/libnv/nvlist_impl.h new file mode 100644 index 0000000..693b71c --- /dev/null +++ b/lib/libnv/nvlist_impl.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/nvlist_impl.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _NVLIST_IMPL_H_ +#define _NVLIST_IMPL_H_ + +#include + +#include "nv.h" + +void *nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep); +nvlist_t *nvlist_xunpack(const void *buf, size_t size, const int *fds, + size_t nfds); + +#endif /* !_NVLIST_IMPL_H_ */ diff --git a/lib/libnv/nvpair.c b/lib/libnv/nvpair.c new file mode 100644 index 0000000..8415e8b --- /dev/null +++ b/lib/libnv/nvpair.c @@ -0,0 +1,1333 @@ +/*- + * Copyright (c) 2009-2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: head/lib/libnv/nvpair.c 258594 2013-11-25 20:45:30Z pjd $"); + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_PJDLOG +#include +#endif + +#include "common_impl.h" +#include "nv.h" +#include "nv_impl.h" +#include "nvlist_impl.h" +#include "nvpair_impl.h" + +#ifndef HAVE_PJDLOG +#include +#define PJDLOG_ASSERT(...) assert(__VA_ARGS__) +#define PJDLOG_RASSERT(expr, ...) assert(expr) +#define PJDLOG_ABORT(...) abort() +#endif + +#define NVPAIR_MAGIC 0x6e7670 /* "nvp" */ +struct nvpair { + int nvp_magic; + char *nvp_name; + int nvp_type; + uint64_t nvp_data; + size_t nvp_datasize; + nvlist_t *nvp_list; /* Used for sanity checks. */ + TAILQ_ENTRY(nvpair) nvp_next; +}; + +#define NVPAIR_ASSERT(nvp) do { \ + PJDLOG_ASSERT((nvp) != NULL); \ + PJDLOG_ASSERT((nvp)->nvp_magic == NVPAIR_MAGIC); \ +} while (0) + +struct nvpair_header { + uint8_t nvph_type; + uint16_t nvph_namesize; + uint64_t nvph_datasize; +} __packed; + + +void +nvpair_assert(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); +} + +const nvlist_t * +nvpair_nvlist(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_list); +} + +nvpair_t * +nvpair_next(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list != NULL); + + return (TAILQ_NEXT(nvp, nvp_next)); +} + +nvpair_t * +nvpair_prev(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list != NULL); + + return (TAILQ_PREV(nvp, nvl_head, nvp_next)); +} + +void +nvpair_insert(struct nvl_head *head, nvpair_t *nvp, nvlist_t *nvl) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list == NULL); + PJDLOG_ASSERT(!nvlist_exists(nvl, nvpair_name(nvp))); + + TAILQ_INSERT_TAIL(head, nvp, nvp_next); + nvp->nvp_list = nvl; +} + +void +nvpair_remove(struct nvl_head *head, nvpair_t *nvp, const nvlist_t *nvl) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list == nvl); + + TAILQ_REMOVE(head, nvp, nvp_next); + nvp->nvp_list = NULL; +} + +nvpair_t * +nvpair_clone(const nvpair_t *nvp) +{ + nvpair_t *newnvp; + const char *name; + const void *data; + size_t datasize; + + NVPAIR_ASSERT(nvp); + + name = nvpair_name(nvp); + + switch (nvpair_type(nvp)) { + case NV_TYPE_NULL: + newnvp = nvpair_create_null(name); + break; + case NV_TYPE_BOOL: + newnvp = nvpair_create_bool(name, nvpair_get_bool(nvp)); + break; + case NV_TYPE_NUMBER: + newnvp = nvpair_create_number(name, nvpair_get_number(nvp)); + break; + case NV_TYPE_STRING: + newnvp = nvpair_create_string(name, nvpair_get_string(nvp)); + break; + case NV_TYPE_NVLIST: + newnvp = nvpair_create_nvlist(name, nvpair_get_nvlist(nvp)); + break; + case NV_TYPE_DESCRIPTOR: + newnvp = nvpair_create_descriptor(name, + nvpair_get_descriptor(nvp)); + break; + case NV_TYPE_BINARY: + data = nvpair_get_binary(nvp, &datasize); + newnvp = nvpair_create_binary(name, data, datasize); + break; + default: + PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp)); + } + + return (newnvp); +} + +size_t +nvpair_header_size(void) +{ + + return (sizeof(struct nvpair_header)); +} + +size_t +nvpair_size(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_datasize); +} + +static unsigned char * +nvpair_pack_header(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp) +{ + struct nvpair_header nvphdr; + size_t namesize; + + NVPAIR_ASSERT(nvp); + + nvphdr.nvph_type = nvp->nvp_type; + namesize = strlen(nvp->nvp_name) + 1; + PJDLOG_ASSERT(namesize > 0 && namesize <= UINT16_MAX); + nvphdr.nvph_namesize = namesize; + nvphdr.nvph_datasize = nvp->nvp_datasize; + PJDLOG_ASSERT(*leftp >= sizeof(nvphdr)); + memcpy(ptr, &nvphdr, sizeof(nvphdr)); + ptr += sizeof(nvphdr); + *leftp -= sizeof(nvphdr); + + PJDLOG_ASSERT(*leftp >= namesize); + memcpy(ptr, nvp->nvp_name, namesize); + ptr += namesize; + *leftp -= namesize; + + return (ptr); +} + +static unsigned char * +nvpair_pack_null(const nvpair_t *nvp, unsigned char *ptr, + size_t *leftp __unused) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL); + + return (ptr); +} + +static unsigned char * +nvpair_pack_bool(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp) +{ + uint8_t value; + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL); + + value = (uint8_t)nvp->nvp_data; + + PJDLOG_ASSERT(*leftp >= sizeof(value)); + memcpy(ptr, &value, sizeof(value)); + ptr += sizeof(value); + *leftp -= sizeof(value); + + return (ptr); +} + +static unsigned char * +nvpair_pack_number(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp) +{ + uint64_t value; + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER); + + value = (uint64_t)nvp->nvp_data; + + PJDLOG_ASSERT(*leftp >= sizeof(value)); + memcpy(ptr, &value, sizeof(value)); + ptr += sizeof(value); + *leftp -= sizeof(value); + + return (ptr); +} + +static unsigned char * +nvpair_pack_string(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING); + + PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize); + memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize); + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + return (ptr); +} + +static unsigned char * +nvpair_pack_nvlist(const nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp, + size_t *leftp) +{ + unsigned char *data; + size_t size; + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST); + + if (nvp->nvp_datasize == 0) + return (ptr); + + data = nvlist_xpack((const nvlist_t *)(intptr_t)nvp->nvp_data, fdidxp, + &size); + if (data == NULL) + return (NULL); + + PJDLOG_ASSERT(size == nvp->nvp_datasize); + PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize); + + memcpy(ptr, data, nvp->nvp_datasize); + free(data); + + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + return (ptr); +} + +static unsigned char * +nvpair_pack_descriptor(const nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp, + size_t *leftp) +{ + int64_t value; + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR); + + value = (int64_t)nvp->nvp_data; + if (value != -1) { + /* + * If there is a real descriptor here, we change its number + * to position in the array of descriptors send via control + * message. + */ + PJDLOG_ASSERT(fdidxp != NULL); + + value = *fdidxp; + (*fdidxp)++; + } + + PJDLOG_ASSERT(*leftp >= sizeof(value)); + memcpy(ptr, &value, sizeof(value)); + ptr += sizeof(value); + *leftp -= sizeof(value); + + return (ptr); +} + +static unsigned char * +nvpair_pack_binary(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY); + + PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize); + memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize); + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + return (ptr); +} + +unsigned char * +nvpair_pack(nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp, size_t *leftp) +{ + + NVPAIR_ASSERT(nvp); + + /* + * We have to update datasize for NV_TYPE_NVLIST on every pack, + * so that proper datasize is placed into nvpair_header + * during the nvpair_pack_header() call below. + */ + if (nvp->nvp_type == NV_TYPE_NVLIST) { + if (nvp->nvp_data == 0) { + nvp->nvp_datasize = 0; + } else { + nvp->nvp_datasize = + nvlist_size((const nvlist_t *)(intptr_t)nvp->nvp_data); + } + } + + ptr = nvpair_pack_header(nvp, ptr, leftp); + if (ptr == NULL) + return (NULL); + + switch (nvp->nvp_type) { + case NV_TYPE_NULL: + ptr = nvpair_pack_null(nvp, ptr, leftp); + break; + case NV_TYPE_BOOL: + ptr = nvpair_pack_bool(nvp, ptr, leftp); + break; + case NV_TYPE_NUMBER: + ptr = nvpair_pack_number(nvp, ptr, leftp); + break; + case NV_TYPE_STRING: + ptr = nvpair_pack_string(nvp, ptr, leftp); + break; + case NV_TYPE_NVLIST: + ptr = nvpair_pack_nvlist(nvp, ptr, fdidxp, leftp); + break; + case NV_TYPE_DESCRIPTOR: + ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, leftp); + break; + case NV_TYPE_BINARY: + ptr = nvpair_pack_binary(nvp, ptr, leftp); + break; + default: + PJDLOG_ABORT("Invalid type (%d).", nvp->nvp_type); + } + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_header(int flags, nvpair_t *nvp, const unsigned char *ptr, + size_t *leftp) +{ + struct nvpair_header nvphdr; + + if (*leftp < sizeof(nvphdr)) + goto failed; + + memcpy(&nvphdr, ptr, sizeof(nvphdr)); + ptr += sizeof(nvphdr); + *leftp -= sizeof(nvphdr); + +#if NV_TYPE_FIRST > 0 + if (nvphdr.nvph_type < NV_TYPE_FIRST) + goto failed; +#endif + if (nvphdr.nvph_type > NV_TYPE_LAST) + goto failed; + +#if BYTE_ORDER == BIG_ENDIAN + if ((flags & NV_FLAG_BIG_ENDIAN) == 0) { + nvphdr.nvph_namesize = le16toh(nvphdr.nvph_namesize); + nvphdr.nvph_datasize = le64toh(nvphdr.nvph_datasize); + } +#else + if ((flags & NV_FLAG_BIG_ENDIAN) != 0) { + nvphdr.nvph_namesize = be16toh(nvphdr.nvph_namesize); + nvphdr.nvph_datasize = be64toh(nvphdr.nvph_datasize); + } +#endif + + if (nvphdr.nvph_namesize > NV_NAME_MAX) + goto failed; + if (*leftp < nvphdr.nvph_namesize) + goto failed; + if (nvphdr.nvph_namesize < 1) + goto failed; + if (strnlen((const char *)ptr, nvphdr.nvph_namesize) != + (size_t)(nvphdr.nvph_namesize - 1)) { + goto failed; + } + + memcpy(nvp->nvp_name, ptr, nvphdr.nvph_namesize); + ptr += nvphdr.nvph_namesize; + *leftp -= nvphdr.nvph_namesize; + + if (*leftp < nvphdr.nvph_datasize) + goto failed; + + nvp->nvp_type = nvphdr.nvph_type; + nvp->nvp_data = 0; + nvp->nvp_datasize = nvphdr.nvph_datasize; + + return (ptr); +failed: + errno = EINVAL; + return (NULL); +} + +static const unsigned char * +nvpair_unpack_null(int flags __unused, nvpair_t *nvp, const unsigned char *ptr, + size_t *leftp __unused) +{ + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL); + + if (nvp->nvp_datasize != 0) { + errno = EINVAL; + return (NULL); + } + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_bool(int flags __unused, nvpair_t *nvp, const unsigned char *ptr, + size_t *leftp) +{ + uint8_t value; + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL); + + if (nvp->nvp_datasize != sizeof(value)) { + errno = EINVAL; + return (NULL); + } + if (*leftp < sizeof(value)) { + errno = EINVAL; + return (NULL); + } + + memcpy(&value, ptr, sizeof(value)); + ptr += sizeof(value); + *leftp -= sizeof(value); + + if (value != 0 && value != 1) { + errno = EINVAL; + return (NULL); + } + + nvp->nvp_data = (uint64_t)value; + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_number(int flags, nvpair_t *nvp, const unsigned char *ptr, + size_t *leftp) +{ + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER); + + if (nvp->nvp_datasize != sizeof(uint64_t)) { + errno = EINVAL; + return (NULL); + } + if (*leftp < sizeof(uint64_t)) { + errno = EINVAL; + return (NULL); + } + + if ((flags & NV_FLAG_BIG_ENDIAN) != 0) + nvp->nvp_data = be64dec(ptr); + else + nvp->nvp_data = le64dec(ptr); + ptr += sizeof(uint64_t); + *leftp -= sizeof(uint64_t); + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_string(int flags __unused, nvpair_t *nvp, + const unsigned char *ptr, size_t *leftp) +{ + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING); + + if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { + errno = EINVAL; + return (NULL); + } + + if (strnlen((const char *)ptr, nvp->nvp_datasize) != + nvp->nvp_datasize - 1) { + errno = EINVAL; + return (NULL); + } + + nvp->nvp_data = (uint64_t)(uintptr_t)strdup((const char *)ptr); + if (nvp->nvp_data == 0) + return (NULL); + + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_nvlist(int flags __unused, nvpair_t *nvp, + const unsigned char *ptr, size_t *leftp, const int *fds, size_t nfds) +{ + nvlist_t *value; + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST); + + if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { + errno = EINVAL; + return (NULL); + } + + value = nvlist_xunpack(ptr, nvp->nvp_datasize, fds, nfds); + if (value == NULL) + return (NULL); + + nvp->nvp_data = (uint64_t)(uintptr_t)value; + + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_descriptor(int flags, nvpair_t *nvp, const unsigned char *ptr, + size_t *leftp, const int *fds, size_t nfds) +{ + int64_t idx; + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR); + + if (nvp->nvp_datasize != sizeof(idx)) { + errno = EINVAL; + return (NULL); + } + if (*leftp < sizeof(idx)) { + errno = EINVAL; + return (NULL); + } + + if ((flags & NV_FLAG_BIG_ENDIAN) != 0) + idx = be64dec(ptr); + else + idx = le64dec(ptr); + + if (idx < 0) { + errno = EINVAL; + return (NULL); + } + + if ((size_t)idx >= nfds) { + errno = EINVAL; + return (NULL); + } + + nvp->nvp_data = (uint64_t)fds[idx]; + + ptr += sizeof(idx); + *leftp -= sizeof(idx); + + return (ptr); +} + +static const unsigned char * +nvpair_unpack_binary(int flags __unused, nvpair_t *nvp, + const unsigned char *ptr, size_t *leftp) +{ + void *value; + + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY); + + if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) { + errno = EINVAL; + return (NULL); + } + + value = malloc(nvp->nvp_datasize); + if (value == NULL) + return (NULL); + + memcpy(value, ptr, nvp->nvp_datasize); + ptr += nvp->nvp_datasize; + *leftp -= nvp->nvp_datasize; + + nvp->nvp_data = (uint64_t)(uintptr_t)value; + + return (ptr); +} + +const unsigned char * +nvpair_unpack(int flags, const unsigned char *ptr, size_t *leftp, + const int *fds, size_t nfds, nvpair_t **nvpp) +{ + nvpair_t *nvp, *tmp; + + nvp = calloc(1, sizeof(*nvp) + NV_NAME_MAX); + if (nvp == NULL) + return (NULL); + nvp->nvp_name = (char *)(nvp + 1); + + ptr = nvpair_unpack_header(flags, nvp, ptr, leftp); + if (ptr == NULL) + goto failed; + tmp = realloc(nvp, sizeof(*nvp) + strlen(nvp->nvp_name) + 1); + if (tmp == NULL) + goto failed; + nvp = tmp; + /* Update nvp_name after realloc(). */ + nvp->nvp_name = (char *)(nvp + 1); + + switch (nvp->nvp_type) { + case NV_TYPE_NULL: + ptr = nvpair_unpack_null(flags, nvp, ptr, leftp); + break; + case NV_TYPE_BOOL: + ptr = nvpair_unpack_bool(flags, nvp, ptr, leftp); + break; + case NV_TYPE_NUMBER: + ptr = nvpair_unpack_number(flags, nvp, ptr, leftp); + break; + case NV_TYPE_STRING: + ptr = nvpair_unpack_string(flags, nvp, ptr, leftp); + break; + case NV_TYPE_NVLIST: + ptr = nvpair_unpack_nvlist(flags, nvp, ptr, leftp, fds, + nfds); + break; + case NV_TYPE_DESCRIPTOR: + ptr = nvpair_unpack_descriptor(flags, nvp, ptr, leftp, fds, + nfds); + break; + case NV_TYPE_BINARY: + ptr = nvpair_unpack_binary(flags, nvp, ptr, leftp); + break; + default: + PJDLOG_ABORT("Invalid type (%d).", nvp->nvp_type); + } + + if (ptr == NULL) + goto failed; + + nvp->nvp_magic = NVPAIR_MAGIC; + *nvpp = nvp; + return (ptr); +failed: + free(nvp); + return (NULL); +} + +int +nvpair_type(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_type); +} + +const char * +nvpair_name(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_name); +} + +static nvpair_t * +nvpair_allocv(int type, uint64_t data, size_t datasize, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + char *name; + int namelen; + + PJDLOG_ASSERT(type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST); + + namelen = vasprintf(&name, namefmt, nameap); + if (namelen < 0) + return (NULL); + + PJDLOG_ASSERT(namelen > 0); + if (namelen >= NV_NAME_MAX) { + free(name); + errno = ENAMETOOLONG; + return (NULL); + } + + nvp = calloc(1, sizeof(*nvp) + namelen + 1); + if (nvp != NULL) { + nvp->nvp_name = (char *)(nvp + 1); + memcpy(nvp->nvp_name, name, namelen + 1); + nvp->nvp_type = type; + nvp->nvp_data = data; + nvp->nvp_datasize = datasize; + nvp->nvp_magic = NVPAIR_MAGIC; + } + free(name); + + return (nvp); +}; + +nvpair_t * +nvpair_create_null(const char *name) +{ + + return (nvpair_createf_null("%s", name)); +} + +nvpair_t * +nvpair_create_bool(const char *name, bool value) +{ + + return (nvpair_createf_bool(value, "%s", name)); +} + +nvpair_t * +nvpair_create_number(const char *name, uint64_t value) +{ + + return (nvpair_createf_number(value, "%s", name)); +} + +nvpair_t * +nvpair_create_string(const char *name, const char *value) +{ + + return (nvpair_createf_string(value, "%s", name)); +} + +nvpair_t * +nvpair_create_stringf(const char *name, const char *valuefmt, ...) +{ + va_list valueap; + nvpair_t *nvp; + + va_start(valueap, valuefmt); + nvp = nvpair_create_stringv(name, valuefmt, valueap); + va_end(valueap); + + return (nvp); +} + +nvpair_t * +nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap) +{ + nvpair_t *nvp; + char *str; + int len; + + len = vasprintf(&str, valuefmt, valueap); + if (len < 0) + return (NULL); + nvp = nvpair_create_string(name, str); + if (nvp == NULL) + free(str); + return (nvp); +} + +nvpair_t * +nvpair_create_nvlist(const char *name, const nvlist_t *value) +{ + + return (nvpair_createf_nvlist(value, "%s", name)); +} + +nvpair_t * +nvpair_create_descriptor(const char *name, int value) +{ + + return (nvpair_createf_descriptor(value, "%s", name)); +} + +nvpair_t * +nvpair_create_binary(const char *name, const void *value, size_t size) +{ + + return (nvpair_createf_binary(value, size, "%s", name)); +} + +nvpair_t * +nvpair_createf_null(const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_null(namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_bool(bool value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_bool(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_number(uint64_t value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_number(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_string(const char *value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_string(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_nvlist(const nvlist_t *value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_nvlist(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_descriptor(int value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_descriptor(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createf_binary(const void *value, size_t size, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_createv_binary(value, size, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_createv_null(const char *namefmt, va_list nameap) +{ + + return (nvpair_allocv(NV_TYPE_NULL, 0, 0, namefmt, nameap)); +} + +nvpair_t * +nvpair_createv_bool(bool value, const char *namefmt, va_list nameap) +{ + + return (nvpair_allocv(NV_TYPE_BOOL, value ? 1 : 0, sizeof(uint8_t), + namefmt, nameap)); +} + +nvpair_t * +nvpair_createv_number(uint64_t value, const char *namefmt, va_list nameap) +{ + + return (nvpair_allocv(NV_TYPE_NUMBER, value, sizeof(value), namefmt, + nameap)); +} + +nvpair_t * +nvpair_createv_string(const char *value, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + size_t size; + char *data; + + if (value == NULL) { + errno = EINVAL; + return (NULL); + } + + data = strdup(value); + if (data == NULL) + return (NULL); + size = strlen(value) + 1; + + nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)data, size, + namefmt, nameap); + if (nvp == NULL) + free(data); + + return (nvp); +} + +nvpair_t * +nvpair_createv_nvlist(const nvlist_t *value, const char *namefmt, + va_list nameap) +{ + nvlist_t *nvl; + nvpair_t *nvp; + + if (value == NULL) { + errno = EINVAL; + return (NULL); + } + + nvl = nvlist_clone(value); + if (nvl == NULL) + return (NULL); + + nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0, + namefmt, nameap); + if (nvp == NULL) + nvlist_destroy(nvl); + + return (nvp); +} + +nvpair_t * +nvpair_createv_descriptor(int value, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (value < 0 || !fd_is_valid(value)) { + errno = EBADF; + return (NULL); + } + + value = fcntl(value, F_DUPFD_CLOEXEC, 0); + if (value < 0) + return (NULL); + + nvp = nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value, + sizeof(int64_t), namefmt, nameap); + if (nvp == NULL) + close(value); + + return (nvp); +} + +nvpair_t * +nvpair_createv_binary(const void *value, size_t size, const char *namefmt, + va_list nameap) +{ + nvpair_t *nvp; + void *data; + + if (value == NULL || size == 0) { + errno = EINVAL; + return (NULL); + } + + data = malloc(size); + if (data == NULL) + return (NULL); + memcpy(data, value, size); + + nvp = nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)data, size, + namefmt, nameap); + if (nvp == NULL) + free(data); + + return (nvp); +} + +nvpair_t * +nvpair_move_string(const char *name, char *value) +{ + + return (nvpair_movef_string(value, "%s", name)); +} + +nvpair_t * +nvpair_move_nvlist(const char *name, nvlist_t *value) +{ + + return (nvpair_movef_nvlist(value, "%s", name)); +} + +nvpair_t * +nvpair_move_descriptor(const char *name, int value) +{ + + return (nvpair_movef_descriptor(value, "%s", name)); +} + +nvpair_t * +nvpair_move_binary(const char *name, void *value, size_t size) +{ + + return (nvpair_movef_binary(value, size, "%s", name)); +} + +nvpair_t * +nvpair_movef_string(char *value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_movev_string(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_movef_nvlist(nvlist_t *value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_movev_nvlist(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_movef_descriptor(int value, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_movev_descriptor(value, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_movef_binary(void *value, size_t size, const char *namefmt, ...) +{ + va_list nameap; + nvpair_t *nvp; + + va_start(nameap, namefmt); + nvp = nvpair_movev_binary(value, size, namefmt, nameap); + va_end(nameap); + + return (nvp); +} + +nvpair_t * +nvpair_movev_string(char *value, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (value == NULL) { + errno = EINVAL; + return (NULL); + } + + nvp = nvpair_allocv(NV_TYPE_STRING, (uint64_t)(uintptr_t)value, + strlen(value) + 1, namefmt, nameap); + if (nvp == NULL) + free(value); + + return (nvp); +} + +nvpair_t * +nvpair_movev_nvlist(nvlist_t *value, const char *namefmt, va_list nameap) +{ + nvpair_t *nvp; + + if (value == NULL) { + errno = EINVAL; + return (NULL); + } + + nvp = nvpair_allocv(NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value, 0, + namefmt, nameap); + if (nvp == NULL) + nvlist_destroy(value); + + return (nvp); +} + +nvpair_t * +nvpair_movev_descriptor(int value, const char *namefmt, va_list nameap) +{ + + if (value < 0 || !fd_is_valid(value)) { + errno = EBADF; + return (NULL); + } + + return (nvpair_allocv(NV_TYPE_DESCRIPTOR, (uint64_t)value, + sizeof(int64_t), namefmt, nameap)); +} + +nvpair_t * +nvpair_movev_binary(void *value, size_t size, const char *namefmt, + va_list nameap) +{ + + if (value == NULL || size == 0) { + errno = EINVAL; + return (NULL); + } + + return (nvpair_allocv(NV_TYPE_BINARY, (uint64_t)(uintptr_t)value, size, + namefmt, nameap)); +} + +bool +nvpair_get_bool(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_data == 1); +} + +uint64_t +nvpair_get_number(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + + return (nvp->nvp_data); +} + +const char * +nvpair_get_string(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING); + + return ((const char *)(intptr_t)nvp->nvp_data); +} + +const nvlist_t * +nvpair_get_nvlist(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST); + + return ((const nvlist_t *)(intptr_t)nvp->nvp_data); +} + +int +nvpair_get_descriptor(const nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR); + + return ((int)nvp->nvp_data); +} + +const void * +nvpair_get_binary(const nvpair_t *nvp, size_t *sizep) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY); + + if (sizep != NULL) + *sizep = nvp->nvp_datasize; + return ((const void *)(intptr_t)nvp->nvp_data); +} + +void +nvpair_free(nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list == NULL); + + nvp->nvp_magic = 0; + switch (nvp->nvp_type) { + case NV_TYPE_DESCRIPTOR: + close((int)nvp->nvp_data); + break; + case NV_TYPE_NVLIST: + nvlist_destroy((nvlist_t *)(intptr_t)nvp->nvp_data); + break; + case NV_TYPE_STRING: + free((char *)(intptr_t)nvp->nvp_data); + break; + case NV_TYPE_BINARY: + free((void *)(intptr_t)nvp->nvp_data); + break; + } + free(nvp); +} + +void +nvpair_free_structure(nvpair_t *nvp) +{ + + NVPAIR_ASSERT(nvp); + PJDLOG_ASSERT(nvp->nvp_list == NULL); + + nvp->nvp_magic = 0; + free(nvp); +} + +const char * +nvpair_type_string(int type) +{ + + switch (type) { + case NV_TYPE_NULL: + return ("NULL"); + case NV_TYPE_BOOL: + return ("BOOL"); + case NV_TYPE_NUMBER: + return ("NUMBER"); + case NV_TYPE_STRING: + return ("STRING"); + case NV_TYPE_NVLIST: + return ("NVLIST"); + case NV_TYPE_DESCRIPTOR: + return ("DESCRIPTOR"); + case NV_TYPE_BINARY: + return ("BINARY"); + default: + return (""); + } +} diff --git a/lib/libnv/nvpair_impl.h b/lib/libnv/nvpair_impl.h new file mode 100644 index 0000000..64fcfde --- /dev/null +++ b/lib/libnv/nvpair_impl.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2009-2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: head/lib/libnv/nvpair_impl.h 258065 2013-11-12 19:39:14Z pjd $ + */ + +#ifndef _NVPAIR_IMPL_H_ +#define _NVPAIR_IMPL_H_ + +#include + +#include + +#include "nv.h" + +TAILQ_HEAD(nvl_head, nvpair); + +void nvpair_assert(const nvpair_t *nvp); +const nvlist_t *nvpair_nvlist(const nvpair_t *nvp); +nvpair_t *nvpair_next(const nvpair_t *nvp); +nvpair_t *nvpair_prev(const nvpair_t *nvp); +void nvpair_insert(struct nvl_head *head, nvpair_t *nvp, nvlist_t *nvl); +void nvpair_remove(struct nvl_head *head, nvpair_t *nvp, const nvlist_t *nvl); +size_t nvpair_header_size(void); +size_t nvpair_size(const nvpair_t *nvp); +unsigned char *nvpair_pack(nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp, + size_t *leftp); +const unsigned char *nvpair_unpack(int flags, const unsigned char *ptr, + size_t *leftp, const int *fds, size_t nfds, nvpair_t **nvpp); +void nvpair_free_structure(nvpair_t *nvp); +const char *nvpair_type_string(int type); + +#endif /* !_NVPAIR_IMPL_H_ */ From 766780de72eac416112f4c3e35ff88a6e81e2d54 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 21:35:18 +0000 Subject: [PATCH 015/148] Rewrite everything to use libnv. This again only supports the string type for now. --- bin/passive/Makefile | 4 +- bin/passive/sysctl_api.c | 307 ++++++++++++++++----------------------- bin/passive/sysctl_api.h | 2 + bin/sysctl/Makefile | 4 +- bin/sysctl/sysctl.c | 154 +++++++------------- 5 files changed, 181 insertions(+), 290 deletions(-) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 477ba2c..aa186e9 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -6,8 +6,8 @@ SRCS=passive.c sysctl_api.c UINET_LIBS=uinet -CFLAGS= -I${TOPDIR}/lib/libev -LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap +CFLAGS= -I${TOPDIR}/lib/libev -I${TOPDIR}/lib/libnv +LDADD= ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libnv/libnv.a -lm -lpcap DEBUG_FLAGS=-g -O0 diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 4dc0535..fbc0ec0 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -40,6 +40,7 @@ #include "uinet_api.h" #include "uinet_config.h" +#include "nv.h" #include "sysctl_api.h" #define SYSCTL_BUF_LEN 131072 @@ -54,165 +55,141 @@ * not. */ static int -passive_sysctl_reqtype_str(int ns, char *buf, int len) +passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) { struct sysctl_req_hdr *hdr; + nvlist_t *nvl_resp = NULL; int retval = 0; - char *sbuf = NULL; - char *req_str = NULL; + char *wbuf = NULL; size_t wbuf_len = 0; size_t sbuf_len = 0; - size_t rval; + char *req_str = NULL; + const char *sbuf; int error; - struct sysctl_resp_hdr rhdr; - char *wbuf = NULL; - - /* Request header; zero response header */ - hdr = (struct sysctl_req_hdr *) buf; - bzero(&rhdr, sizeof(rhdr)); - - /* - * Validate the various lengths. - */ + size_t rval; - if (le32toh(hdr->sysctl_req_len) != - le32toh(hdr->sysctl_str_len) - + le32toh(hdr->sysctl_src_len) - + sizeof(struct sysctl_req_hdr)) { - fprintf(stderr, "%s: fd %d: length mismatch\n", + /* Validate fields are here */ + if (! nvlist_exists_string(nvl, "sysctl_str")) { + fprintf(stderr, "%s: fd %d: missing sysctl_str\n", __func__, ns); - fprintf(stderr, "%s: fd %d: hdr_len=%d, req_len=%d, str_len=%d, src_len=%d\n", - __func__, - ns, - (int) sizeof(struct sysctl_req_hdr), - le32toh(hdr->sysctl_req_len), - le32toh(hdr->sysctl_str_len), - le32toh(hdr->sysctl_src_len)); retval = 0; goto finish; } + req_str = strdup(nvlist_get_string(nvl, "sysctl_str")); - if (le32toh(hdr->sysctl_dst_len) > SYSCTL_MAX_BUF_LEN) { - fprintf(stderr, "%s: fd %d: dst_len %d > %d\n", + /* sysctl_respbuf_len */ + if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { + fprintf(stderr, "%s: fd %d: missing sysctl_respbuf_len\n", __func__, - ns, - le32toh(hdr->sysctl_dst_len), - SYSCTL_MAX_BUF_LEN); + ns); retval = 0; goto finish; } - - /* - * Populate the request string. - */ - req_str = malloc(le32toh(hdr->sysctl_str_len) + 1); - if (req_str == NULL) { - fprintf(stderr, "%s; fd %d: malloc failed (req_str)\n", + if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { + fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big!\n", __func__, ns); retval = 0; goto finish; } + wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); + wbuf = calloc(1, wbuf_len); + if (wbuf == NULL) { + fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); + retval = 0; + goto finish; + } - memcpy(req_str, buf + sizeof(struct sysctl_req_hdr), - le32toh(hdr->sysctl_str_len)); - req_str[le32toh(hdr->sysctl_str_len)] = '\0'; - - /* - * If there's a request buffer, populate that. - */ - if (le32toh(hdr->sysctl_src_len) > 0) { - sbuf = buf + le32toh(hdr->sysctl_src_len); - sbuf_len = le32toh(hdr->sysctl_src_len); + /* sysctl_reqbuf */ + if (nvlist_exists_binary(nvl, "sysctl_reqbuf")) { + sbuf = nvlist_get_binary(nvl, "sysctl_reqbuf", &sbuf_len); + } else { + sbuf = NULL; + sbuf_len = 0; } + /* Issue sysctl */ + fprintf(stderr, + "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", + __func__, + ns, + req_str, + (int) sbuf_len, + (int) wbuf_len); + + /* XXX typecasting sbuf sucks */ + error = uinet_sysctl(req_str, + wbuf, &wbuf_len, + (char *) sbuf, sbuf_len, + &rval, + 0); + + fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", + __func__, + ns, + (int) error, + (unsigned long long) wbuf_len, + (unsigned long long) rval); + /* - * Allocate response buffer if requested. + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. */ - if (le32toh(hdr->sysctl_dst_len) > 0) { - wbuf = malloc(le32toh(hdr->sysctl_dst_len)); - if (wbuf == NULL) { - fprintf(stderr, "%s: fd %d: malloc failed: %d\n", - __func__, - ns, - errno); - retval = 0; - goto finish; - } - wbuf_len = le32toh(hdr->sysctl_dst_len); - } - - /* Issue sysctl */ - fprintf(stderr, - "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", + if (error == 0 && rval >= wbuf_len) { + fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, ns, - req_str, - le32toh(hdr->sysctl_src_len), - le32toh(hdr->sysctl_dst_len)); + (unsigned long long) rval, + (unsigned long long) wbuf_len); + retval = 0; + goto finish; + } + + /* Construct our response */ + nvl_resp = nvlist_create(0); + if (nvl_resp == NULL) { + fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, ns); + retval = 0; + goto finish; + } - error = uinet_sysctl(req_str, - wbuf, &wbuf_len, - sbuf, sbuf_len, - &rval, - 0); + nvlist_add_number(nvl_resp, "sysctl_errno", error); + if (error == 0) { + nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); + } - fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%d, rval=%d\n", + if (nvlist_send(ns, nvl_resp) < 0) { + fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", __func__, ns, - (int) error, - (int) wbuf_len, - (int) rval); - - /* - * XXX Validate the response back from uinet_sysctl() - * is within bounds for the response back to the - * client. - */ - - /* Construct our response */ - rhdr.sysctl_resp_len = htole32(sizeof(struct sysctl_resp_hdr) + wbuf_len); - rhdr.sysctl_resp_type = 0; /* XXX */ - rhdr.sysctl_resp_flags = 0; /* XXX */ - - if (errno == 0) - rhdr.sysctl_dst_len = htole32(rval); - else - rhdr.sysctl_dst_len = 0; - rhdr.sysctl_dst_errno = error; - - write(ns, &rhdr, sizeof(rhdr)); - if (wbuf_len > 0) { - write(ns, wbuf, wbuf_len); - } - - /* Done! */ + errno); retval = 1; + goto finish; + } + + /* Done! */ + retval = 1; finish: if (req_str != NULL) free(req_str); if (wbuf != NULL) free(wbuf); + if (nvl_resp != NULL) + nvlist_destroy(nvl_resp); return (retval); } void * passive_sysctl_listener(void *arg) { - int s, ns, r; + int s, r; struct sockaddr_un sun; - char *rbuf; uinet_initialize_thread(); - rbuf = malloc(SYSCTL_BUF_LEN); - if (rbuf == NULL) { - printf("%s: malloc failed: %d\n", __func__, errno); - return (NULL); - } - bzero(&sun, sizeof(sun)); strcpy(sun.sun_path, "/tmp/sysctl.sock"); sun.sun_len = 0; @@ -243,9 +220,10 @@ passive_sysctl_listener(void *arg) for (;;) { struct sockaddr_un sun_n; socklen_t sl; - int len; - struct sysctl_req_hdr *hdr; - int rlen = 0; + nvlist_t *nvl; + int ns; + int ret; + const char *type; ns = accept(s, (struct sockaddr *) &sun_n, &sl); if (ns < 0) { @@ -253,83 +231,46 @@ passive_sysctl_listener(void *arg) continue; } - /* XXX I hate gotos */ -readmore: - - /* Do we have space left in our incoming buffer? */ - if (rlen >= SYSCTL_BUF_LEN) { - fprintf(stderr, "%s: fd %d: read too much?\n", __func__, ns); - goto next; - } - - /* Read data */ - len = read(ns, rbuf + rlen, SYSCTL_BUF_LEN - rlen); - if (len <= 0) { - fprintf(stderr, "%s: fd %d: read returned %d, errno=%d\n", - __func__, - ns, - len, - errno); - goto next; - } - - /* Keep track of how much data is in the incoming buffer */ - rlen += len; + for (;;) { + nvl = nvlist_recv(ns); + if (nvl == NULL) + break; - /* - * Not enough data? Keep reading. - */ - if (rlen < sizeof(struct sysctl_req_hdr)) { - fprintf(stderr, - "%s: fd %d: read %d btyes, rlen is now %d\n", - __func__, - ns, - len, - rlen); - goto readmore; - } + if (! nvlist_exists_string(nvl, "type")) { + fprintf(stderr, "%s: fd %d: no type; bailing\n", + __func__, + ns); + break; + } + type = nvlist_get_string(nvl, "type"); - hdr = (struct sysctl_req_hdr *) rbuf; - - /* - * Validate sysctl_req_len so we don't try to read way more - * than we have buffer space for. - * - * We assume that we're only getting to this point - * when the header is at the beginning of the buffer; - * not that we're doing pipelined requests. - */ - if (le32toh(hdr->sysctl_req_len) >= SYSCTL_BUF_LEN) { - fprintf(stderr, - "%s: fd %d: req_len (%d) is too big (%d)\n", + fprintf(stderr, "%s: fd %d: type=%s\n", __func__, ns, - le32toh(hdr->sysctl_req_len), - SYSCTL_BUF_LEN); - goto next; - } - - /* - * Do we have enough data to cover the payload length? - */ - if (le32toh(hdr->sysctl_req_len) < rlen) { - goto readmore; + type); + + /* Dispatch as appropriate */ + if (strncmp(type, "sysctl_str", 10) == 0) { + ret = passive_sysctl_reqtype_str(ns, nvl); + } else if (strncmp(type, "sysctl_oid", 10) == 0) { + ret = passive_sysctl_reqtype_str(ns, nvl); + } else { + fprintf(stderr, "%s: fd %d: unknown type=%s\n", + __func__, + ns, + nvlist_get_string(nvl, "type")); + break; + } + + /* Tidyup */ + nvlist_destroy(nvl); + + /* Ret == 0? Then we don't wait around */ + if (ret == 0) + break; } - /* - * We have the entire payload. Let's dispatch based - * on type. - */ - (void) passive_sysctl_reqtype_str(ns, rbuf, rlen); - - /* XXX until we've taught the loop about - * how to consume readbuf data right and - * have the remainder data be moved to the - * head of the queue, let's just close it for - * now. - */ -next: - /* Close */ + /* Done; bail */ close(ns); } diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index 9ddcc43..4b77654 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -7,6 +7,7 @@ typedef enum { SYSCTL_REQ_OID = 2, } sysctl_req_type_t; +#if 0 struct sysctl_req_hdr { uint32_t sysctl_req_len; /* length of the whole payload */ uint32_t sysctl_req_type; /* Type of the message */ @@ -33,6 +34,7 @@ struct sysctl_resp_hdr { /* Response follows, if sysctl_dst_len != 0 */ }; +#endif extern void * passive_sysctl_listener(void *arg); diff --git a/bin/sysctl/Makefile b/bin/sysctl/Makefile index 68cec67..93ab286 100644 --- a/bin/sysctl/Makefile +++ b/bin/sysctl/Makefile @@ -6,8 +6,8 @@ SRCS=sysctl.c UINET_LIBS=uinet -CFLAGS= -I${TOPDIR}/lib/libev -I../passive/ -LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap +CFLAGS= -I${TOPDIR}/lib/libev -I../passive/ -I${TOPDIR}/lib/libnv +LDADD= ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libnv/libnv.a -lm -lpcap DEBUG_FLAGS=-g -O0 diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 09fd10c..470e0e3 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -13,6 +13,7 @@ #include #include "sysctl_api.h" +#include "nv.h" int main(int argc, char *argv[]) @@ -21,13 +22,13 @@ main(int argc, char *argv[]) int s; struct sockaddr_un sun; int r; - struct sysctl_req_hdr req_hdr; - struct sysctl_resp_hdr *resp_hdr; - char *req_buf, *read_buf; + nvlist_t *nvl, *nvl_resp; + size_t reqbuf_len = 0, respbuf_len = 0; char *req_str; - size_t reqbuf_len, respbuf_len; - size_t readbuf_len; - size_t read_ofs = 0; + char *req_buf = NULL; + const char *resp_buf; + size_t r_len; + int r_errno; if (argc < 2) { printf("Usage: sysctl \n"); @@ -38,29 +39,9 @@ main(int argc, char *argv[]) req_str = strdup(argv[1]); reqbuf_len = 0; respbuf_len = 1048576; - req_buf = NULL; - read_buf = NULL; - /* Ok, allocate request buffer */ - if (reqbuf_len > 0) { - req_buf = calloc(1, reqbuf_len); - if (req_buf == NULL) - err(1, "calloc"); - } + /* XXX Reqbuf when required */ - /* - * Calculate the readbuf_len. It's the combination - * of the header size and the response payload. - */ - readbuf_len = respbuf_len + sizeof(struct sysctl_resp_hdr); - - /* - * Allocate the response buffer. This includes the - * response header and response payload if required. - */ - read_buf = calloc(1, readbuf_len); - if (read_buf == NULL) - err(1, "calloc"); /* Connect to the destination socket */ bzero(&sun, sizeof(sun)); @@ -79,88 +60,55 @@ main(int argc, char *argv[]) err(1, "connect"); } - /* Craft request header */ - bzero(&req_hdr, sizeof(req_hdr)); - req_hdr.sysctl_req_len = htole32(sizeof(req_hdr) + strlen(req_str) + reqbuf_len); - req_hdr.sysctl_req_type = 0; /* XXX */ - req_hdr.sysctl_req_flags = 0; /* XXX */ - req_hdr.sysctl_str_len = htole32(strlen(req_str)); - req_hdr.sysctl_dst_len = htole32(respbuf_len); - req_hdr.sysctl_src_len = htole32(reqbuf_len); - - /* Send request */ - r = write(s, &req_hdr, sizeof(req_hdr)); - if (r != sizeof(req_hdr)) { - err(1, "write (hdr)"); - } - r = write(s, req_str, strlen(req_str)); - if (r != strlen(req_str)) { - err(1, "write (req_str)"); + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) + err(1, "nvlist_create"); + + /* Create nvlist for a sysctl_str request */ + nvlist_add_string(nvl, "type", "sysctl_str"); + nvlist_add_string(nvl, "sysctl_str", req_str); + nvlist_add_number(nvl, "sysctl_respbuf_len", respbuf_len); + if (reqbuf_len > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", req_buf, reqbuf_len); } - r = write(s, req_buf, reqbuf_len); - if (r != reqbuf_len) { - err(1, "write (req_buf)"); + + /* Send command */ + if (nvlist_send(s, nvl) < 0) { + err(1, "nvlist_send"); } /* Read response */ - while (read_ofs < readbuf_len) { - /* Don't try to read if we have a full buffer */ - if (readbuf_len - read_ofs <= 0) - break; - r = read(s, read_buf + read_ofs, readbuf_len - read_ofs); - if (r < 0) { - err(1, "read"); - } if (r == 0) { - fprintf(stderr, "%s: read early EOF\n", __func__); - break; - } - - read_ofs += r; - - /* if we don't have enough data for the header, continue */ - if (read_ofs < sizeof(struct sysctl_resp_hdr)) { - continue; - } - - /* Grab the response header */ - resp_hdr = (struct sysctl_resp_hdr *) read_buf; - - /* - * Is the response length greater than respbuf_len? - * Then the response is too large. Naughty server. - */ - if (le32toh(resp_hdr->sysctl_resp_len) > - respbuf_len + sizeof(struct sysctl_resp_hdr)) { - fprintf(stderr, "%s: resp_len (%d) is too long!\n", - __func__, - le32toh(resp_hdr->sysctl_resp_len)); - break; - } - - /* Do we have enough data to match the response length? */ - if (read_ofs < le32toh(resp_hdr->sysctl_resp_len)) - continue; - - /* We have enough data - woo! More sanity checks! */ - - /* - * Does the response buffer length exceed what we allocated? - * again, too big a response; bad coder. - */ - - if (le32toh(resp_hdr->sysctl_dst_len) > respbuf_len) { - fprintf(stderr, "%s: dst_len (%d) is too long!\n", - __func__, - le32toh(resp_hdr->sysctl_dst_len)); - break; - } - printf("%s: received response: errno=%d, %d bytes\n", - __func__, - le32toh(resp_hdr->sysctl_dst_errno), - le32toh(resp_hdr->sysctl_dst_len)); + nvl_resp = nvlist_recv(s); + if (nvl_resp == NULL) { + err(1, "nvlist_recv"); } - /* Done */ + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + resp_buf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + } else { + r_len = 0; + } + + printf("%s: str=%s, errno=%d, len=%d\n", + __func__, + req_str, + (int) r_errno, + (int) r_len); + + +done: + /* Done with request/response */ + nvlist_destroy(nvl); + nvlist_destroy(nvl_resp); + + /* Done with socket */ close(s); exit(0); From 63b4cf4fba2cf8e4bd366e0a3f369a2ccd6c4f0c Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:00:03 +0000 Subject: [PATCH 016/148] Refactor out the sysctlbyname stuff into a new function, uinet_sysctlbyname(). --- bin/sysctl/sysctl.c | 131 +++++++++++++++++++++++++++++--------------- 1 file changed, 86 insertions(+), 45 deletions(-) diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 470e0e3..812501a 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -15,6 +15,78 @@ #include "sysctl_api.h" #include "nv.h" +static int +uinet_sysctlbyname(int ns, + const char *name, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl, *nvl_resp; + int retval = 0; + const char *rbuf; + size_t r_len; + int r_errno; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_str request */ + nvlist_add_string(nvl, "type", "sysctl_str"); + nvlist_add_string(nvl, "sysctl_str", name); + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); + if (newlen > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); + } + + /* Send command */ + if (nvlist_send(ns, nvl) < 0) { + warn("nvlist_send"); + retval = -1; + goto done; + } + + /* Read response */ + nvl_resp = nvlist_recv(ns); + if (nvl_resp == NULL) { + warn("nvlist_recv"); + retval = -1; + goto done; + } + + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + /* XXX validate r_len versus oldlenp */ + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + memcpy(oldp, rbuf, r_len); + *oldlenp = r_len; + } else { + r_len = 0; + } + + retval = 0; + /* XXX */ + errno = r_errno; + +done: + if (nvl) + nvlist_destroy(nvl); + if (nvl_resp) + nvlist_destroy(nvl_resp); + return (retval); +} + int main(int argc, char *argv[]) { @@ -22,13 +94,11 @@ main(int argc, char *argv[]) int s; struct sockaddr_un sun; int r; - nvlist_t *nvl, *nvl_resp; size_t reqbuf_len = 0, respbuf_len = 0; char *req_str; char *req_buf = NULL; - const char *resp_buf; + char *resp_buf; size_t r_len; - int r_errno; if (argc < 2) { printf("Usage: sysctl \n"); @@ -42,7 +112,6 @@ main(int argc, char *argv[]) /* XXX Reqbuf when required */ - /* Connect to the destination socket */ bzero(&sun, sizeof(sun)); @@ -60,53 +129,25 @@ main(int argc, char *argv[]) err(1, "connect"); } - /* Create nvlist to populate the request into */ - nvl = nvlist_create(0); - if (nvl == NULL) - err(1, "nvlist_create"); - - /* Create nvlist for a sysctl_str request */ - nvlist_add_string(nvl, "type", "sysctl_str"); - nvlist_add_string(nvl, "sysctl_str", req_str); - nvlist_add_number(nvl, "sysctl_respbuf_len", respbuf_len); - if (reqbuf_len > 0) { - nvlist_add_binary(nvl, "sysctl_reqbuf", req_buf, reqbuf_len); - } - - /* Send command */ - if (nvlist_send(s, nvl) < 0) { - err(1, "nvlist_send"); - } - - /* Read response */ - nvl_resp = nvlist_recv(s); - if (nvl_resp == NULL) { - err(1, "nvlist_recv"); - } - - if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { - fprintf(stderr, "response: no errno?\n"); - goto done; - } - r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); - - if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { - resp_buf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); - } else { - r_len = 0; - } + resp_buf = calloc(1, respbuf_len); + if (resp_buf == NULL) + err(1, "calloc"); - printf("%s: str=%s, errno=%d, len=%d\n", + /* Do a sysctl */ + r = uinet_sysctlbyname(s, req_str, resp_buf, &respbuf_len, + NULL, 0); + printf("%s: str=%s, r=%d, errno=%d, len=%d\n", __func__, req_str, - (int) r_errno, - (int) r_len); + r, + errno, + (int) respbuf_len); + /* Done */ + if (req_str) + free(req_str); done: - /* Done with request/response */ - nvlist_destroy(nvl); - nvlist_destroy(nvl_resp); /* Done with socket */ close(s); From c952b666fdbf4328c9a01d6fc8982631581d8aea Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:04:58 +0000 Subject: [PATCH 017/148] Migrate uinet_sysctl() to uinet_sysctlbyname() - it's the correct name for it. Implement uinet_sysctl(). --- bin/passive/sysctl_api.c | 2 +- lib/libuinet/api_include/uinet_config.h | 5 ++++- lib/libuinet/uinet_api.symlist | 1 + lib/libuinet/uinet_config_kernel.c | 13 ++++++++++++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index fbc0ec0..2b3527c 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -119,7 +119,7 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) (int) wbuf_len); /* XXX typecasting sbuf sucks */ - error = uinet_sysctl(req_str, + error = uinet_sysctlbyname(req_str, wbuf, &wbuf_len, (char *) sbuf, sbuf_len, &rval, diff --git a/lib/libuinet/api_include/uinet_config.h b/lib/libuinet/api_include/uinet_config.h index 7b68d81..a9d4554 100644 --- a/lib/libuinet/api_include/uinet_config.h +++ b/lib/libuinet/api_include/uinet_config.h @@ -156,8 +156,11 @@ int uinet_config_blackhole(uinet_blackhole_t action); * XXX doesn't belong here! */ int -uinet_sysctl(char *name, char *oldp, size_t *oldplen, +uinet_sysctlbyname(char *name, char *oldp, size_t *oldplen, char *newp, size_t newplen, size_t *retval, int flags); +int +uinet_sysctl(int *name, u_int namelen, void *oid, size_t *oldlenp, + void *new, size_t newlen, size_t *retval, int flags); #ifdef __cplusplus } diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index e2e7b51..59e867e 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -68,3 +68,4 @@ uinet_synfilter_getconninfo uinet_synfilter_getl2info uinet_synfilter_install uinet_sysctl +uinet_sysctlbyname diff --git a/lib/libuinet/uinet_config_kernel.c b/lib/libuinet/uinet_config_kernel.c index bcfda27..cccf7dd 100644 --- a/lib/libuinet/uinet_config_kernel.c +++ b/lib/libuinet/uinet_config_kernel.c @@ -68,7 +68,7 @@ uinet_config_blackhole(uinet_blackhole_t action) } int -uinet_sysctl(char *name, char *oldp, size_t *oldplen, +uinet_sysctlbyname(char *name, char *oldp, size_t *oldplen, char *newp, size_t newplen, size_t *retval, int flags) { int error; @@ -77,3 +77,14 @@ uinet_sysctl(char *name, char *oldp, size_t *oldplen, newp, newplen, retval, flags); return (error); } + +int +uinet_sysctl(int *name, u_int namelen, void *old, size_t *oldlenp, + void *new, size_t newlen, size_t *retval, int flags) +{ + int error; + + error = kernel_sysctl(curthread, name, namelen, old, oldlenp, + new, newlen, retval, flags); + return (error); +} From 648a764756043bc28f276b15a5a24c844d7f0021 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:05:36 +0000 Subject: [PATCH 018/148] Oops, naming conflict with the uinet stuff. Fix. --- bin/sysctl/sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 812501a..4833747 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -16,7 +16,7 @@ #include "nv.h" static int -uinet_sysctlbyname(int ns, +u_sysctlbyname(int ns, const char *name, void *oldp, size_t *oldlenp, @@ -134,7 +134,7 @@ main(int argc, char *argv[]) err(1, "calloc"); /* Do a sysctl */ - r = uinet_sysctlbyname(s, req_str, resp_buf, &respbuf_len, + r = u_sysctlbyname(s, req_str, resp_buf, &respbuf_len, NULL, 0); printf("%s: str=%s, r=%d, errno=%d, len=%d\n", __func__, From 74d679fc5a1a78a4f2b72cb50fd189c91e791ac0 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:21:04 +0000 Subject: [PATCH 019/148] Begin fleshing out (untested for now) OID based sysctl. It's terrible - I'm just passing the oid data in as a binary blob. Any semblence of being endian-agnostic just went out the window. --- bin/passive/sysctl_api.c | 147 ++++++++++++++++++++++++++++++++++++++- bin/sysctl/sysctl.c | 89 ++++++++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 2b3527c..b3cd14a 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -182,6 +182,151 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) return (retval); } +/* + * Handle sysctl oid type requests. + * + * Returns 1 if the connection should stay open; 0 if + * not. + * + * XXX this is definitely not endian-clean. + * I'm just passing in sysctl_oid as a binary array. Ew. + */ +static int +passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) +{ + struct sysctl_req_hdr *hdr; + nvlist_t *nvl_resp = NULL; + int retval = 0; + char *wbuf = NULL; + size_t wbuf_len = 0; + size_t sbuf_len = 0; + const int *req_oid = NULL; + const char *sbuf; + int error; + size_t rval; + size_t req_oid_len; + + /* Validate fields are here */ + if (! nvlist_exists_binary(nvl, "sysctl_oid")) { + fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", + __func__, + ns); + retval = 0; + goto finish; + } + req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", + &req_oid_len); + if (req_oid_len % sizeof(int) != 0) { + fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", + __func__, + ns, + (unsigned long long) req_oid_len, + (int) sizeof(int)); + retval = 0; + goto finish; + } + + /* sysctl_respbuf_len */ + if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { + fprintf(stderr, "%s: fd %d: missing sysctl_respbuf_len\n", + __func__, + ns); + retval = 0; + goto finish; + } + if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { + fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big!\n", + __func__, + ns); + retval = 0; + goto finish; + } + wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); + wbuf = calloc(1, wbuf_len); + if (wbuf == NULL) { + fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); + retval = 0; + goto finish; + } + + /* sysctl_reqbuf */ + if (nvlist_exists_binary(nvl, "sysctl_reqbuf")) { + sbuf = nvlist_get_binary(nvl, "sysctl_reqbuf", &sbuf_len); + } else { + sbuf = NULL; + sbuf_len = 0; + } + + /* Issue sysctl */ + fprintf(stderr, + "%s: fd %d: sysctl oid src_len=%d, dst_len=%d\n", + __func__, + ns, + (int) sbuf_len, + (int) wbuf_len); + + /* XXX typecasting sbuf and req_oid sucks */ + error = uinet_sysctl((int *) req_oid, req_oid_len / sizeof(int), + wbuf, &wbuf_len, + (char *) sbuf, sbuf_len, + &rval, + 0); + + fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", + __func__, + ns, + (int) error, + (unsigned long long) wbuf_len, + (unsigned long long) rval); + + /* + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. + */ + if (error == 0 && rval >= wbuf_len) { + fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", + __func__, + ns, + (unsigned long long) rval, + (unsigned long long) wbuf_len); + retval = 0; + goto finish; + } + + /* Construct our response */ + nvl_resp = nvlist_create(0); + if (nvl_resp == NULL) { + fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, ns); + retval = 0; + goto finish; + } + + nvlist_add_number(nvl_resp, "sysctl_errno", error); + if (error == 0) { + nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); + } + + if (nvlist_send(ns, nvl_resp) < 0) { + fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", + __func__, + ns, + errno); + retval = 1; + goto finish; + } + + /* Done! */ + retval = 1; + +finish: + if (wbuf != NULL) + free(wbuf); + if (nvl_resp != NULL) + nvlist_destroy(nvl_resp); + return (retval); +} + void * passive_sysctl_listener(void *arg) { @@ -253,7 +398,7 @@ passive_sysctl_listener(void *arg) if (strncmp(type, "sysctl_str", 10) == 0) { ret = passive_sysctl_reqtype_str(ns, nvl); } else if (strncmp(type, "sysctl_oid", 10) == 0) { - ret = passive_sysctl_reqtype_str(ns, nvl); + ret = passive_sysctl_reqtype_oid(ns, nvl); } else { fprintf(stderr, "%s: fd %d: unknown type=%s\n", __func__, diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 4833747..3393a9c 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -87,6 +87,80 @@ u_sysctlbyname(int ns, return (retval); } +static int +u_sysctl(int ns, + int *oid, + u_int namelen, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl, *nvl_resp; + int retval = 0; + const char *rbuf; + size_t r_len; + int r_errno; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_oid request */ + nvlist_add_string(nvl, "type", "sysctl_oid"); + nvlist_add_binary(nvl, "sysctl_oid", oid, namelen * sizeof(int)); + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); + if (newlen > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); + } + + /* Send command */ + if (nvlist_send(ns, nvl) < 0) { + warn("nvlist_send"); + retval = -1; + goto done; + } + + /* Read response */ + nvl_resp = nvlist_recv(ns); + if (nvl_resp == NULL) { + warn("nvlist_recv"); + retval = -1; + goto done; + } + + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + /* XXX validate r_len versus oldlenp */ + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + memcpy(oldp, rbuf, r_len); + *oldlenp = r_len; + } else { + r_len = 0; + } + + retval = 0; + /* XXX */ + errno = r_errno; + +done: + if (nvl) + nvlist_destroy(nvl); + if (nvl_resp) + nvlist_destroy(nvl_resp); + return (retval); +} + + int main(int argc, char *argv[]) { @@ -133,6 +207,7 @@ main(int argc, char *argv[]) if (resp_buf == NULL) err(1, "calloc"); +#if 0 /* Do a sysctl */ r = u_sysctlbyname(s, req_str, resp_buf, &respbuf_len, NULL, 0); @@ -142,6 +217,20 @@ main(int argc, char *argv[]) r, errno, (int) respbuf_len); +#else + /* Do a sysctl */ + int oida[2]; + oida[0] = 1; + oida[1] = 6; + r = u_sysctl(s, oida, 2, resp_buf, &respbuf_len, + NULL, 0); + printf("%s: str=%s, r=%d, errno=%d, len=%d\n", + __func__, + req_str, + r, + errno, + (int) respbuf_len); +#endif /* Done */ if (req_str) From f12ed2b7c080f099da1fc8ca0ad759f0787d2936 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:26:54 +0000 Subject: [PATCH 020/148] Break out the sysctl glue to external functions. --- bin/sysctl/Makefile | 2 +- bin/sysctl/sysctl.c | 165 +----------------------------------- bin/sysctl/u_sysctl.c | 188 ++++++++++++++++++++++++++++++++++++++++++ bin/sysctl/u_sysctl.h | 12 +++ 4 files changed, 204 insertions(+), 163 deletions(-) create mode 100644 bin/sysctl/u_sysctl.c create mode 100644 bin/sysctl/u_sysctl.h diff --git a/bin/sysctl/Makefile b/bin/sysctl/Makefile index 93ab286..43aefeb 100644 --- a/bin/sysctl/Makefile +++ b/bin/sysctl/Makefile @@ -2,7 +2,7 @@ TOPDIR?=${CURDIR}/../.. PROG=sysctl -SRCS=sysctl.c +SRCS=sysctl.c u_sysctl.c UINET_LIBS=uinet diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 3393a9c..539b511 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -12,161 +12,14 @@ #include #include -#include "sysctl_api.h" +#include "u_sysctl.h" #include "nv.h" -static int -u_sysctlbyname(int ns, - const char *name, - void *oldp, - size_t *oldlenp, - const void *newp, - size_t newlen) -{ - nvlist_t *nvl, *nvl_resp; - int retval = 0; - const char *rbuf; - size_t r_len; - int r_errno; - - /* Create nvlist to populate the request into */ - nvl = nvlist_create(0); - if (nvl == NULL) { - warn("nvlist_create"); - retval = -1; - goto done; - } - - /* Create nvlist for a sysctl_str request */ - nvlist_add_string(nvl, "type", "sysctl_str"); - nvlist_add_string(nvl, "sysctl_str", name); - nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); - if (newlen > 0) { - nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); - } - - /* Send command */ - if (nvlist_send(ns, nvl) < 0) { - warn("nvlist_send"); - retval = -1; - goto done; - } - - /* Read response */ - nvl_resp = nvlist_recv(ns); - if (nvl_resp == NULL) { - warn("nvlist_recv"); - retval = -1; - goto done; - } - - if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { - fprintf(stderr, "response: no errno?\n"); - goto done; - } - r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); - - /* XXX validate r_len versus oldlenp */ - if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { - rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); - memcpy(oldp, rbuf, r_len); - *oldlenp = r_len; - } else { - r_len = 0; - } - - retval = 0; - /* XXX */ - errno = r_errno; - -done: - if (nvl) - nvlist_destroy(nvl); - if (nvl_resp) - nvlist_destroy(nvl_resp); - return (retval); -} - -static int -u_sysctl(int ns, - int *oid, - u_int namelen, - void *oldp, - size_t *oldlenp, - const void *newp, - size_t newlen) -{ - nvlist_t *nvl, *nvl_resp; - int retval = 0; - const char *rbuf; - size_t r_len; - int r_errno; - - /* Create nvlist to populate the request into */ - nvl = nvlist_create(0); - if (nvl == NULL) { - warn("nvlist_create"); - retval = -1; - goto done; - } - - /* Create nvlist for a sysctl_oid request */ - nvlist_add_string(nvl, "type", "sysctl_oid"); - nvlist_add_binary(nvl, "sysctl_oid", oid, namelen * sizeof(int)); - nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); - if (newlen > 0) { - nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); - } - - /* Send command */ - if (nvlist_send(ns, nvl) < 0) { - warn("nvlist_send"); - retval = -1; - goto done; - } - - /* Read response */ - nvl_resp = nvlist_recv(ns); - if (nvl_resp == NULL) { - warn("nvlist_recv"); - retval = -1; - goto done; - } - - if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { - fprintf(stderr, "response: no errno?\n"); - goto done; - } - r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); - - /* XXX validate r_len versus oldlenp */ - if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { - rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); - memcpy(oldp, rbuf, r_len); - *oldlenp = r_len; - } else { - r_len = 0; - } - - retval = 0; - /* XXX */ - errno = r_errno; - -done: - if (nvl) - nvlist_destroy(nvl); - if (nvl_resp) - nvlist_destroy(nvl_resp); - return (retval); -} - - int main(int argc, char *argv[]) { int s; - struct sockaddr_un sun; int r; size_t reqbuf_len = 0, respbuf_len = 0; char *req_str; @@ -186,28 +39,16 @@ main(int argc, char *argv[]) /* XXX Reqbuf when required */ - /* Connect to the destination socket */ - bzero(&sun, sizeof(sun)); - - strcpy(sun.sun_path, "/tmp/sysctl.sock"); - sun.sun_len = 0; - sun.sun_family = AF_UNIX; - - s = socket(AF_UNIX, SOCK_STREAM, 0); + s = u_sysctl_open(); if (s < 0) { err(1, "socket"); } - r = connect(s, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)); - if (r < 0) { - err(1, "connect"); - } - resp_buf = calloc(1, respbuf_len); if (resp_buf == NULL) err(1, "calloc"); -#if 0 +#if 1 /* Do a sysctl */ r = u_sysctlbyname(s, req_str, resp_buf, &respbuf_len, NULL, 0); diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c new file mode 100644 index 0000000..55668bc --- /dev/null +++ b/bin/sysctl/u_sysctl.c @@ -0,0 +1,188 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "sysctl_api.h" +#include "nv.h" + +int +u_sysctlbyname(int ns, + const char *name, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl, *nvl_resp; + int retval = 0; + const char *rbuf; + size_t r_len; + int r_errno; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_str request */ + nvlist_add_string(nvl, "type", "sysctl_str"); + nvlist_add_string(nvl, "sysctl_str", name); + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); + if (newlen > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); + } + + /* Send command */ + if (nvlist_send(ns, nvl) < 0) { + warn("nvlist_send"); + retval = -1; + goto done; + } + + /* Read response */ + nvl_resp = nvlist_recv(ns); + if (nvl_resp == NULL) { + warn("nvlist_recv"); + retval = -1; + goto done; + } + + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + /* XXX validate r_len versus oldlenp */ + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + memcpy(oldp, rbuf, r_len); + *oldlenp = r_len; + } else { + r_len = 0; + } + + retval = 0; + /* XXX */ + errno = r_errno; + +done: + if (nvl) + nvlist_destroy(nvl); + if (nvl_resp) + nvlist_destroy(nvl_resp); + return (retval); +} + +int +u_sysctl(int ns, + int *oid, + u_int namelen, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl, *nvl_resp; + int retval = 0; + const char *rbuf; + size_t r_len; + int r_errno; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_oid request */ + nvlist_add_string(nvl, "type", "sysctl_oid"); + nvlist_add_binary(nvl, "sysctl_oid", oid, namelen * sizeof(int)); + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); + if (newlen > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); + } + + /* Send command */ + if (nvlist_send(ns, nvl) < 0) { + warn("nvlist_send"); + retval = -1; + goto done; + } + + /* Read response */ + nvl_resp = nvlist_recv(ns); + if (nvl_resp == NULL) { + warn("nvlist_recv"); + retval = -1; + goto done; + } + + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + /* XXX validate r_len versus oldlenp */ + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + memcpy(oldp, rbuf, r_len); + *oldlenp = r_len; + } else { + r_len = 0; + } + + retval = 0; + /* XXX */ + errno = r_errno; + +done: + if (nvl) + nvlist_destroy(nvl); + if (nvl_resp) + nvlist_destroy(nvl_resp); + return (retval); +} + +int +u_sysctl_open(void) +{ + int s; + struct sockaddr_un sun; + int r; + + /* Connect to the destination socket */ + bzero(&sun, sizeof(sun)); + + strcpy(sun.sun_path, "/tmp/sysctl.sock"); + sun.sun_len = 0; + sun.sun_family = AF_UNIX; + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + err(1, "socket"); + } + + r = connect(s, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)); + if (r < 0) { + err(1, "connect"); + } + + return (s); +} diff --git a/bin/sysctl/u_sysctl.h b/bin/sysctl/u_sysctl.h new file mode 100644 index 0000000..3f1b20e --- /dev/null +++ b/bin/sysctl/u_sysctl.h @@ -0,0 +1,12 @@ +#ifndef __U_SYSCTL_H__ +#define __U_SYSCTL_H__ + +extern int u_sysctlbyname(int ns, const char *name, void *oldp, + size_t *oldlenp, const void *newp, size_t newlen); + +extern int u_sysctl(int ns, int *oid, u_int namelen, void *oldp, + size_t *oldlenp, const void *newp, size_t newlen); + +extern int u_sysctl_open(void); + +#endif /* __U_SYSCTL_H__ */ From 8da79f73a1aec8d01324c576aacca3b69b373ce4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 16 Apr 2014 22:34:55 +0000 Subject: [PATCH 021/148] Dirty - but sysctl -a (which doesn't yet work) requires allproc_lock. --- lib/libuinet/uinet_kern_mutex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_kern_mutex.c b/lib/libuinet/uinet_kern_mutex.c index 9ad2dfa..9092dc4 100644 --- a/lib/libuinet/uinet_kern_mutex.c +++ b/lib/libuinet/uinet_kern_mutex.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "uinet_host_interface.h" @@ -104,8 +105,9 @@ void mutex_init(void) { mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); - mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); + /* XXX? */ + sx_init(&allproc_lock, "allproc_lock"); } void From 02c9d9b6a31b1145c24d656c4e9fbb43f4efc3db Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Apr 2014 20:31:59 +0000 Subject: [PATCH 022/148] * Make sure we set oldlenp based on rlen. * If we get no response buffer but we do have the length then set it to that. The "how big is X?" API involves doing a query for an OID, with no old buffer but with an oldlenp set. The size of the response is then put in oldlenp. --- bin/sysctl/u_sysctl.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 55668bc..7231acd 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -102,6 +102,15 @@ u_sysctl(int ns, size_t r_len; int r_errno; +#if 0 + printf("sysctl: nl=%d, oldp=%p, oldlen=%d, newp=%p, newlen=%d\n", + namelen, + oldp, + (int) *oldlenp, + newp, + (int) newlen); +#endif + /* Create nvlist to populate the request into */ nvl = nvlist_create(0); if (nvl == NULL) { @@ -143,10 +152,12 @@ u_sysctl(int ns, if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); memcpy(oldp, rbuf, r_len); - *oldlenp = r_len; + } else if (nvlist_exists_number(nvl_resp, "sysctl_respbuf_len")) { + r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_len"); } else { r_len = 0; } + *oldlenp = r_len; retval = 0; /* XXX */ From 98d8e0870c1063911687d5a408a9600bc7df9209 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Apr 2014 20:33:23 +0000 Subject: [PATCH 023/148] I'm now hacking on the freebsd supplied sysctl; move the old sysctl.c out of the way. --- bin/sysctl/sysctl.c | 935 ++++++++++++++++++++++++++++++++++++--- bin/sysctl/sysctl_hack.c | 86 ++++ 2 files changed, 949 insertions(+), 72 deletions(-) create mode 100644 bin/sysctl/sysctl_hack.c diff --git a/bin/sysctl/sysctl.c b/bin/sysctl/sysctl.c index 539b511..6e5741a 100644 --- a/bin/sysctl/sysctl.c +++ b/bin/sysctl/sysctl.c @@ -1,86 +1,877 @@ -#include -#include +/* + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)from: sysctl.c 8.1 (Berkeley) 6/6/93"; +#endif +static const char rcsid[] = + "$FreeBSD: stable/10/sbin/sysctl/sysctl.c 260193 2014-01-02 13:48:54Z trasz $"; +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include + +#include +#include #include +#include +#include +#include #include -#include -#include #include -#include - -#include -#include -#include -#include +#include +#include #include "u_sysctl.h" -#include "nv.h" + +static int u_sock = -1; + +static const char *conffile; + +static int aflag, bflag, dflag, eflag, hflag, iflag; +static int Nflag, nflag, oflag, qflag, Tflag, Wflag, xflag; + +static int oidfmt(int *, int, char *, u_int *); +static int parsefile(const char *); +static int parse(const char *, int); +static int show_var(int *, int); +static int sysctl_all(int *oid, int len); +static int name2oid(char *, int *); + +static int set_IK(const char *, int *); + +static void +usage(void) +{ + + (void)fprintf(stderr, "%s\n%s\n", + "usage: sysctl [-bdehiNnoqTWx] [-f filename] name[=value] ...", + " sysctl [-bdehNnoqTWx] -a"); + exit(1); +} int -main(int argc, char *argv[]) +main(int argc, char **argv) { + int ch; + int warncount = 0; - int s; - int r; - size_t reqbuf_len = 0, respbuf_len = 0; - char *req_str; - char *req_buf = NULL; - char *resp_buf; - size_t r_len; - - if (argc < 2) { - printf("Usage: sysctl \n"); - exit(127); - } - - /* Fake up a request structure for now */ - req_str = strdup(argv[1]); - reqbuf_len = 0; - respbuf_len = 1048576; - - /* XXX Reqbuf when required */ - - s = u_sysctl_open(); - if (s < 0) { - err(1, "socket"); - } - - resp_buf = calloc(1, respbuf_len); - if (resp_buf == NULL) - err(1, "calloc"); - -#if 1 - /* Do a sysctl */ - r = u_sysctlbyname(s, req_str, resp_buf, &respbuf_len, - NULL, 0); - printf("%s: str=%s, r=%d, errno=%d, len=%d\n", - __func__, - req_str, - r, - errno, - (int) respbuf_len); -#else - /* Do a sysctl */ - int oida[2]; - oida[0] = 1; - oida[1] = 6; - r = u_sysctl(s, oida, 2, resp_buf, &respbuf_len, - NULL, 0); - printf("%s: str=%s, r=%d, errno=%d, len=%d\n", - __func__, - req_str, - r, - errno, - (int) respbuf_len); -#endif + setlocale(LC_NUMERIC, ""); + setbuf(stdout,0); + setbuf(stderr,0); + + while ((ch = getopt(argc, argv, "Aabdef:hiNnoqTwWxX")) != -1) { + switch (ch) { + case 'A': + /* compatibility */ + aflag = oflag = 1; + break; + case 'a': + aflag = 1; + break; + case 'b': + bflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'e': + eflag = 1; + break; + case 'f': + conffile = optarg; + break; + case 'h': + hflag = 1; + break; + case 'i': + iflag = 1; + break; + case 'N': + Nflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'o': + oflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'T': + Tflag = 1; + break; + case 'w': + /* compatibility */ + /* ignored */ + break; + case 'W': + Wflag = 1; + break; + case 'X': + /* compatibility */ + aflag = xflag = 1; + break; + case 'x': + xflag = 1; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + /* open local socket */ + u_sock = u_sysctl_open(); + if (u_sock < 0) + err(1, "u_sysctl_open"); + + if (Nflag && nflag) + usage(); + if (aflag && argc == 0) + exit(sysctl_all(0, 0)); + if (argc == 0 && conffile == NULL) + usage(); + + warncount = 0; + if (conffile != NULL) + warncount += parsefile(conffile); + + while (argc-- > 0) + warncount += parse(*argv++, 0); + + return (warncount); +} + +/* + * Parse a name into a MIB entry. + * Lookup and print out the MIB entry if it exists. + * Set a new value if requested. + */ +static int +parse(const char *string, int lineno) +{ + int len, i, j; + void *newval = 0; + int intval; + unsigned int uintval; + long longval; + unsigned long ulongval; + size_t newsize = 0; + int64_t i64val; + uint64_t u64val; + int mib[CTL_MAXNAME]; + char *cp, *bufp, buf[BUFSIZ], *endptr, fmt[BUFSIZ], line[BUFSIZ]; + u_int kind; + + if (lineno) + snprintf(line, sizeof(line), " at line %d", lineno); + else + line[0] = '\0'; + + cp = buf; + if (snprintf(buf, BUFSIZ, "%s", string) >= BUFSIZ) { + warnx("oid too long: '%s'%s", string, line); + return (1); + } + bufp = strsep(&cp, "=:"); + if (cp != NULL) { + /* Tflag just lists tunables, do not allow assignment */ + if (Tflag || Wflag) { + warnx("Can't set variables when using -T or -W"); + usage(); + } + while (isspace(*cp)) + cp++; + /* Strip a pair of " or ' if any. */ + switch (*cp) { + case '\"': + case '\'': + if (cp[strlen(cp) - 1] == *cp) + cp[strlen(cp) - 1] = '\0'; + cp++; + } + newval = cp; + newsize = strlen(cp); + } + len = name2oid(bufp, mib); + + if (len < 0) { + if (iflag) + return (0); + if (qflag) + return (1); + else { + warn("unknown oid '%s'%s", bufp, line); + return (1); + } + } + + if (oidfmt(mib, len, fmt, &kind)) { + warn("couldn't find format of oid '%s'%s", bufp, line); + if (iflag) + return (1); + else + exit(1); + } + + if (newval == NULL || dflag) { + if ((kind & CTLTYPE) == CTLTYPE_NODE) { + if (dflag) { + i = show_var(mib, len); + if (!i && !bflag) + putchar('\n'); + } + sysctl_all(mib, len); + } else { + i = show_var(mib, len); + if (!i && !bflag) + putchar('\n'); + } + } else { + if ((kind & CTLTYPE) == CTLTYPE_NODE) { + warnx("oid '%s' isn't a leaf node%s", bufp, line); + return (1); + } + + if (!(kind & CTLFLAG_WR)) { + if (kind & CTLFLAG_TUN) { + warnx("oid '%s' is a read only tunable%s", bufp, line); + warnx("Tunable values are set in /boot/loader.conf"); + } else + warnx("oid '%s' is read only%s", bufp, line); + return (1); + } + + if ((kind & CTLTYPE) == CTLTYPE_INT || + (kind & CTLTYPE) == CTLTYPE_UINT || + (kind & CTLTYPE) == CTLTYPE_LONG || + (kind & CTLTYPE) == CTLTYPE_ULONG || + (kind & CTLTYPE) == CTLTYPE_S64 || + (kind & CTLTYPE) == CTLTYPE_U64) { + if (strlen(newval) == 0) { + warnx("empty numeric value"); + return (1); + } + } + + switch (kind & CTLTYPE) { + case CTLTYPE_INT: + if (strcmp(fmt, "IK") == 0) { + if (!set_IK(newval, &intval)) { + warnx("invalid value '%s'%s", + (char *)newval, line); + return (1); + } + } else { + intval = (int)strtol(newval, &endptr, + 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid integer '%s'%s", + (char *)newval, line); + return (1); + } + } + newval = &intval; + newsize = sizeof(intval); + break; + case CTLTYPE_UINT: + uintval = (int) strtoul(newval, &endptr, 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid unsigned integer '%s'%s", + (char *)newval, line); + return (1); + } + newval = &uintval; + newsize = sizeof(uintval); + break; + case CTLTYPE_LONG: + longval = strtol(newval, &endptr, 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid long integer '%s'%s", + (char *)newval, line); + return (1); + } + newval = &longval; + newsize = sizeof(longval); + break; + case CTLTYPE_ULONG: + ulongval = strtoul(newval, &endptr, 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid unsigned long integer" + " '%s'%s", (char *)newval, line); + return (1); + } + newval = &ulongval; + newsize = sizeof(ulongval); + break; + case CTLTYPE_STRING: + break; + case CTLTYPE_S64: + i64val = strtoimax(newval, &endptr, 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid int64_t '%s'%s", + (char *)newval, line); + return (1); + } + newval = &i64val; + newsize = sizeof(i64val); + break; + case CTLTYPE_U64: + u64val = strtoumax(newval, &endptr, 0); + if (endptr == newval || *endptr != '\0') { + warnx("invalid uint64_t '%s'%s", + (char *)newval, line); + return (1); + } + newval = &u64val; + newsize = sizeof(u64val); + break; + case CTLTYPE_OPAQUE: + /* FALLTHROUGH */ + default: + warnx("oid '%s' is type %d," + " cannot set that%s", bufp, + kind & CTLTYPE, line); + return (1); + } + + i = show_var(mib, len); + if (u_sysctl(u_sock, mib, len, 0, 0, newval, newsize) == -1) { + if (!i && !bflag) + putchar('\n'); + switch (errno) { + case EOPNOTSUPP: + warnx("%s: value is not available%s", + string, line); + return (1); + case ENOTDIR: + warnx("%s: specification is incomplete%s", + string, line); + return (1); + case ENOMEM: + warnx("%s: type is unknown to this program%s", + string, line); + return (1); + default: + warn("%s%s", string, line); + return (1); + } + } + if (!bflag) + printf(" -> "); + i = nflag; + nflag = 1; + j = show_var(mib, len); + if (!j && !bflag) + putchar('\n'); + nflag = i; + } + + return (0); +} + +static int +parsefile(const char *filename) +{ + FILE *file; + char line[BUFSIZ], *p, *pq, *pdq; + int warncount = 0, lineno = 0; + + file = fopen(filename, "r"); + if (file == NULL) + err(EX_NOINPUT, "%s", filename); + while (fgets(line, sizeof(line), file) != NULL) { + lineno++; + p = line; + pq = strchr(line, '\''); + pdq = strchr(line, '\"'); + /* Replace the first # with \0. */ + while((p = strchr(p, '#')) != NULL) { + if (pq != NULL && p > pq) { + if ((p = strchr(pq+1, '\'')) != NULL) + *(++p) = '\0'; + break; + } else if (pdq != NULL && p > pdq) { + if ((p = strchr(pdq+1, '\"')) != NULL) + *(++p) = '\0'; + break; + } else if (p == line || *(p-1) != '\\') { + *p = '\0'; + break; + } + p++; + } + /* Trim spaces */ + p = line + strlen(line) - 1; + while (p >= line && isspace((int)*p)) { + *p = '\0'; + p--; + } + p = line; + while (isspace((int)*p)) + p++; + if (*p == '\0') + continue; + else + warncount += parse(p, lineno); + } + fclose(file); + + return (warncount); +} + +/* These functions will dump out various interesting structures. */ + +static int +S_clockinfo(int l2, void *p) +{ + struct clockinfo *ci = (struct clockinfo*)p; + + if (l2 != sizeof(*ci)) { + warnx("S_clockinfo %d != %zu", l2, sizeof(*ci)); + return (1); + } + printf(hflag ? "{ hz = %'d, tick = %'d, profhz = %'d, stathz = %'d }" : + "{ hz = %d, tick = %d, profhz = %d, stathz = %d }", + ci->hz, ci->tick, ci->profhz, ci->stathz); + return (0); +} + +static int +S_loadavg(int l2, void *p) +{ + struct loadavg *tv = (struct loadavg*)p; - /* Done */ - if (req_str) - free(req_str); + if (l2 != sizeof(*tv)) { + warnx("S_loadavg %d != %zu", l2, sizeof(*tv)); + return (1); + } + printf(hflag ? "{ %'.2f %'.2f %'.2f }" : "{ %.2f %.2f %.2f }", + (double)tv->ldavg[0]/(double)tv->fscale, + (double)tv->ldavg[1]/(double)tv->fscale, + (double)tv->ldavg[2]/(double)tv->fscale); + return (0); +} + +static int +S_timeval(int l2, void *p) +{ + struct timeval *tv = (struct timeval*)p; + time_t tv_sec; + char *p1, *p2; + + if (l2 != sizeof(*tv)) { + warnx("S_timeval %d != %zu", l2, sizeof(*tv)); + return (1); + } + printf(hflag ? "{ sec = %'jd, usec = %'ld } " : + "{ sec = %jd, usec = %ld } ", + (intmax_t)tv->tv_sec, tv->tv_usec); + tv_sec = tv->tv_sec; + p1 = strdup(ctime(&tv_sec)); + for (p2=p1; *p2 ; p2++) + if (*p2 == '\n') + *p2 = '\0'; + fputs(p1, stdout); + free(p1); + return (0); +} + +static int +S_vmtotal(int l2, void *p) +{ + struct vmtotal *v = (struct vmtotal *)p; + int pageKilo = getpagesize() / 1024; + + if (l2 != sizeof(*v)) { + warnx("S_vmtotal %d != %zu", l2, sizeof(*v)); + return (1); + } + + printf( + "\nSystem wide totals computed every five seconds:" + " (values in kilobytes)\n"); + printf("===============================================\n"); + printf( + "Processes:\t\t(RUNQ: %hd Disk Wait: %hd Page Wait: " + "%hd Sleep: %hd)\n", + v->t_rq, v->t_dw, v->t_pw, v->t_sl); + printf( + "Virtual Memory:\t\t(Total: %dK Active: %dK)\n", + v->t_vm * pageKilo, v->t_avm * pageKilo); + printf("Real Memory:\t\t(Total: %dK Active: %dK)\n", + v->t_rm * pageKilo, v->t_arm * pageKilo); + printf("Shared Virtual Memory:\t(Total: %dK Active: %dK)\n", + v->t_vmshr * pageKilo, v->t_avmshr * pageKilo); + printf("Shared Real Memory:\t(Total: %dK Active: %dK)\n", + v->t_rmshr * pageKilo, v->t_armshr * pageKilo); + printf("Free Memory:\t%dK\n", v->t_free * pageKilo); + + return (0); +} + +static int +set_IK(const char *str, int *val) +{ + float temp; + int len, kelv; + const char *p; + char *endptr; + + if ((len = strlen(str)) == 0) + return (0); + p = &str[len - 1]; + if (*p == 'C' || *p == 'F') { + temp = strtof(str, &endptr); + if (endptr == str || endptr != p) + return (0); + if (*p == 'F') + temp = (temp - 32) * 5 / 9; + kelv = temp * 10 + 2732; + } else { + kelv = (int)strtol(str, &endptr, 10); + if (endptr == str || *endptr != '\0') + return (0); + } + *val = kelv; + return (1); +} + +/* + * These functions uses a presently undocumented interface to the kernel + * to walk the tree and get the type so it can print the value. + * This interface is under work and consideration, and should probably + * be killed with a big axe by the first person who can find the time. + * (be aware though, that the proper interface isn't as obvious as it + * may seem, there are various conflicting requirements. + */ + +static int +name2oid(char *name, int *oidp) +{ + int oid[2]; + int i; + size_t j; + + oid[0] = 0; + oid[1] = 3; + + j = CTL_MAXNAME * sizeof(int); + i = u_sysctl(u_sock, oid, 2, oidp, &j, name, strlen(name)); + if (i < 0) + return (i); + j /= sizeof(int); + return (j); +} + +static int +oidfmt(int *oid, int len, char *fmt, u_int *kind) +{ + int qoid[CTL_MAXNAME+2]; + u_char buf[BUFSIZ]; + int i; + size_t j; + + qoid[0] = 0; + qoid[1] = 4; + memcpy(qoid + 2, oid, len * sizeof(int)); + + j = sizeof(buf); + i = u_sysctl(u_sock, qoid, len + 2, buf, &j, 0, 0); + if (i) + err(1, "sysctl fmt %d %zu %d", i, j, errno); + + if (kind) + *kind = *(u_int *)buf; + + if (fmt) + strcpy(fmt, (char *)(buf + sizeof(u_int))); + return (0); +} -done: +static int ctl_sign[CTLTYPE+1] = { + [CTLTYPE_INT] = 1, + [CTLTYPE_LONG] = 1, + [CTLTYPE_S64] = 1, +}; - /* Done with socket */ - close(s); +static int ctl_size[CTLTYPE+1] = { + [CTLTYPE_INT] = sizeof(int), + [CTLTYPE_UINT] = sizeof(u_int), + [CTLTYPE_LONG] = sizeof(long), + [CTLTYPE_ULONG] = sizeof(u_long), + [CTLTYPE_S64] = sizeof(int64_t), + [CTLTYPE_U64] = sizeof(int64_t), +}; - exit(0); +/* + * This formats and outputs the value of one variable + * + * Returns zero if anything was actually output. + * Returns one if didn't know what to do with this. + * Return minus one if we had errors. + */ +static int +show_var(int *oid, int nlen) +{ + u_char buf[BUFSIZ], *val, *oval, *p; + char name[BUFSIZ], fmt[BUFSIZ]; + const char *sep, *sep1; + int qoid[CTL_MAXNAME+2]; + uintmax_t umv; + intmax_t mv; + int i, hexlen, sign, ctltype; + size_t intlen; + size_t j, len; + u_int kind; + int (*func)(int, void *); + + /* Silence GCC. */ + umv = mv = intlen = 0; + + bzero(buf, BUFSIZ); + bzero(fmt, BUFSIZ); + bzero(name, BUFSIZ); + qoid[0] = 0; + memcpy(qoid + 2, oid, nlen * sizeof(int)); + + qoid[1] = 1; + j = sizeof(name); + i = u_sysctl(u_sock, qoid, nlen + 2, name, &j, 0, 0); + if (i || !j) + err(1, "sysctl name %d %zu %d", i, j, errno); + + oidfmt(oid, nlen, fmt, &kind); + /* if Wflag then only list sysctls that are writeable and not stats. */ + if (Wflag && ((kind & CTLFLAG_WR) == 0 || (kind & CTLFLAG_STATS) != 0)) + return 1; + + /* if Tflag then only list sysctls that are tuneables. */ + if (Tflag && (kind & CTLFLAG_TUN) == 0) + return 1; + + if (Nflag) { + printf("%s", name); + return (0); + } + + if (eflag) + sep = "="; + else + sep = ": "; + + if (dflag) { /* just print description */ + qoid[1] = 5; + j = sizeof(buf); + i = u_sysctl(u_sock, qoid, nlen + 2, buf, &j, 0, 0); + if (!nflag) + printf("%s%s", name, sep); + printf("%s", buf); + return (0); + } + /* find an estimate of how much we need for this var */ + j = 0; + i = u_sysctl(u_sock, oid, nlen, 0, &j, 0, 0); + j += j; /* we want to be sure :-) */ + + val = oval = malloc(j + 1); + if (val == NULL) { + warnx("malloc failed"); + return (1); + } + len = j; + i = u_sysctl(u_sock, oid, nlen, val, &len, 0, 0); + if (i || !len) { + free(oval); + return (1); + } + + if (bflag) { + fwrite(val, 1, len, stdout); + free(oval); + return (0); + } + val[len] = '\0'; + p = val; + ctltype = (kind & CTLTYPE); + sign = ctl_sign[ctltype]; + intlen = ctl_size[ctltype]; + + switch (ctltype) { + case CTLTYPE_STRING: + if (!nflag) + printf("%s%s", name, sep); + printf("%.*s", (int)len, p); + free(oval); + return (0); + + case CTLTYPE_INT: + case CTLTYPE_UINT: + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + case CTLTYPE_S64: + case CTLTYPE_U64: + if (!nflag) + printf("%s%s", name, sep); + hexlen = 2 + (intlen * CHAR_BIT + 3) / 4; + sep1 = ""; + while (len >= intlen) { + switch (kind & CTLTYPE) { + case CTLTYPE_INT: + case CTLTYPE_UINT: + umv = *(u_int *)p; + mv = *(int *)p; + break; + case CTLTYPE_LONG: + case CTLTYPE_ULONG: + umv = *(u_long *)p; + mv = *(long *)p; + break; + case CTLTYPE_S64: + case CTLTYPE_U64: + umv = *(uint64_t *)p; + mv = *(int64_t *)p; + break; + } + fputs(sep1, stdout); + if (xflag) + printf("%#0*jx", hexlen, umv); + else if (!sign) + printf(hflag ? "%'ju" : "%ju", umv); + else if (fmt[1] == 'K') { + if (mv < 0) + printf("%jd", mv); + else + printf("%.1fC", (mv - 2732.0) / 10); + } else + printf(hflag ? "%'jd" : "%jd", mv); + sep1 = " "; + len -= intlen; + p += intlen; + } + free(oval); + return (0); + + case CTLTYPE_OPAQUE: + i = 0; + if (strcmp(fmt, "S,clockinfo") == 0) + func = S_clockinfo; + else if (strcmp(fmt, "S,timeval") == 0) + func = S_timeval; + else if (strcmp(fmt, "S,loadavg") == 0) + func = S_loadavg; + else if (strcmp(fmt, "S,vmtotal") == 0) + func = S_vmtotal; + else + func = NULL; + if (func) { + if (!nflag) + printf("%s%s", name, sep); + i = (*func)(len, p); + free(oval); + return (i); + } + /* FALLTHROUGH */ + default: + if (!oflag && !xflag) { + free(oval); + return (1); + } + if (!nflag) + printf("%s%s", name, sep); + printf("Format:%s Length:%zu Dump:0x", fmt, len); + while (len-- && (xflag || p < val + 16)) + printf("%02x", *p++); + if (!xflag && len > 16) + printf("..."); + free(oval); + return (0); + } + free(oval); + return (1); +} + +static int +sysctl_all(int *oid, int len) +{ + int name1[22], name2[22]; + int i, j; + size_t l1, l2; + + name1[0] = 0; + name1[1] = 2; + l1 = 2; + if (len) { + memcpy(name1+2, oid, len * sizeof(int)); + l1 += len; + } else { + name1[2] = 1; + l1++; + } + for (;;) { + l2 = sizeof(name2); + j = u_sysctl(u_sock, name1, l1, name2, &l2, 0, 0); + if (j < 0) { + if (errno == ENOENT) + return (0); + else + err(1, "sysctl(getnext) %d %zu", j, l2); + } + + l2 /= sizeof(int); + + if (len < 0 || l2 < (unsigned int)len) + return (0); + + for (i = 0; i < len; i++) + if (name2[i] != oid[i]) + return (0); + + i = show_var(name2, l2); + if (!i && !bflag) + putchar('\n'); + + memcpy(name1+2, name2, l2 * sizeof(int)); + l1 = 2 + l2; + } } diff --git a/bin/sysctl/sysctl_hack.c b/bin/sysctl/sysctl_hack.c new file mode 100644 index 0000000..539b511 --- /dev/null +++ b/bin/sysctl/sysctl_hack.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "u_sysctl.h" +#include "nv.h" + +int +main(int argc, char *argv[]) +{ + + int s; + int r; + size_t reqbuf_len = 0, respbuf_len = 0; + char *req_str; + char *req_buf = NULL; + char *resp_buf; + size_t r_len; + + if (argc < 2) { + printf("Usage: sysctl \n"); + exit(127); + } + + /* Fake up a request structure for now */ + req_str = strdup(argv[1]); + reqbuf_len = 0; + respbuf_len = 1048576; + + /* XXX Reqbuf when required */ + + s = u_sysctl_open(); + if (s < 0) { + err(1, "socket"); + } + + resp_buf = calloc(1, respbuf_len); + if (resp_buf == NULL) + err(1, "calloc"); + +#if 1 + /* Do a sysctl */ + r = u_sysctlbyname(s, req_str, resp_buf, &respbuf_len, + NULL, 0); + printf("%s: str=%s, r=%d, errno=%d, len=%d\n", + __func__, + req_str, + r, + errno, + (int) respbuf_len); +#else + /* Do a sysctl */ + int oida[2]; + oida[0] = 1; + oida[1] = 6; + r = u_sysctl(s, oida, 2, resp_buf, &respbuf_len, + NULL, 0); + printf("%s: str=%s, r=%d, errno=%d, len=%d\n", + __func__, + req_str, + r, + errno, + (int) respbuf_len); +#endif + + /* Done */ + if (req_str) + free(req_str); + +done: + + /* Done with socket */ + close(s); + + exit(0); +} From 6c76d011708afc742d6527193f76c30da210b011 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Apr 2014 20:34:01 +0000 Subject: [PATCH 024/148] Try to handle the case where no oldp buffer is provided, but we do want the resulting length. --- bin/passive/sysctl_api.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index b3cd14a..e0b7b6d 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -242,11 +242,15 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) goto finish; } wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); - wbuf = calloc(1, wbuf_len); - if (wbuf == NULL) { - fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); - retval = 0; - goto finish; + if (wbuf_len == 0) { + wbuf = NULL; + } else { + wbuf = calloc(1, wbuf_len); + if (wbuf == NULL) { + fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); + retval = 0; + goto finish; + } } /* sysctl_reqbuf */ @@ -259,11 +263,14 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) /* Issue sysctl */ fprintf(stderr, - "%s: fd %d: sysctl oid src_len=%d, dst_len=%d\n", + "%s: fd %d: sysctl oid oidlen=%d oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", __func__, ns, - (int) sbuf_len, - (int) wbuf_len); + (int) (req_oid_len / sizeof(int)), + wbuf, + (int) wbuf_len, + sbuf, + (int) sbuf_len); /* XXX typecasting sbuf and req_oid sucks */ error = uinet_sysctl((int *) req_oid, req_oid_len / sizeof(int), @@ -279,12 +286,20 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) (unsigned long long) wbuf_len, (unsigned long long) rval); + + /* + * We only copy the data back if wbuf is not NULL. + * + * The undocumented size lookup in sysctl is done by + * doing a sysctl fetch on the given OID but with oldplen=0 and + * oldp=NULL, oldplen gets updated with the storage size. + */ /* * XXX Validate the response back from uinet_sysctl() * is within bounds for the response back to the * client. */ - if (error == 0 && rval >= wbuf_len) { + if (wbuf != NULL && error == 0 && rval >= wbuf_len) { fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, ns, @@ -303,9 +318,10 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) } nvlist_add_number(nvl_resp, "sysctl_errno", error); - if (error == 0) { + if (error == 0 && wbuf != NULL) { nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); } + nvlist_add_number(nvl_resp, "sysctl_respbuf_len", rval); if (nvlist_send(ns, nvl_resp) < 0) { fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", From 91fff7d01ca591bcc772aca4e0637759cf148866 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Apr 2014 20:45:39 +0000 Subject: [PATCH 025/148] Initialise rval = 0 so it doesn't return bogus values if it's not modified by sysctl*() calls. I need to refactor out the common code between the str and oid paths, this is ridiculous. --- bin/passive/sysctl_api.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index e0b7b6d..659ca8c 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -66,7 +66,7 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) char *req_str = NULL; const char *sbuf; int error; - size_t rval; + size_t rval = 0; /* Validate fields are here */ if (! nvlist_exists_string(nvl, "sysctl_str")) { @@ -87,9 +87,10 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) goto finish; } if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { - fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big!\n", + fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big (%llu)!\n", __func__, - ns); + ns, + (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); retval = 0; goto finish; } @@ -203,7 +204,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) const int *req_oid = NULL; const char *sbuf; int error; - size_t rval; + size_t rval = 0; size_t req_oid_len; /* Validate fields are here */ @@ -235,9 +236,10 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) goto finish; } if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { - fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big!\n", + fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big! (%llu)\n", __func__, - ns); + ns, + (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); retval = 0; goto finish; } From c64bcc90f9ac1c38f8f5a3b09e5610a2b4168b33 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Apr 2014 22:14:53 +0000 Subject: [PATCH 026/148] Toss this; patrick committed a real fix. --- lib/libuinet/uinet_kern_mutex.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/libuinet/uinet_kern_mutex.c b/lib/libuinet/uinet_kern_mutex.c index 9092dc4..3210a59 100644 --- a/lib/libuinet/uinet_kern_mutex.c +++ b/lib/libuinet/uinet_kern_mutex.c @@ -106,8 +106,6 @@ mutex_init(void) { mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); - /* XXX? */ - sx_init(&allproc_lock, "allproc_lock"); } void From 0b1c48e111cc39094b1adc21912fe59b0a3588ca Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 18 Apr 2014 20:11:32 +0000 Subject: [PATCH 027/148] unlink before creating the socket. --- bin/passive/sysctl_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 659ca8c..4fb95b4 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -353,6 +353,8 @@ passive_sysctl_listener(void *arg) uinet_initialize_thread(); + (void) unlink("/tmp/sysctl.sock"); + bzero(&sun, sizeof(sun)); strcpy(sun.sun_path, "/tmp/sysctl.sock"); sun.sun_len = 0; From 51abb37519fdcc91b01a188691e4482d77205bb5 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 18 Apr 2014 22:23:19 +0000 Subject: [PATCH 028/148] NULL out the nvlist pointers so we don't free an invalid pointer if we hit an error. --- bin/sysctl/u_sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 7231acd..68cf9bf 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -23,7 +23,7 @@ u_sysctlbyname(int ns, const void *newp, size_t newlen) { - nvlist_t *nvl, *nvl_resp; + nvlist_t *nvl = NULL, *nvl_resp = NULL; int retval = 0; const char *rbuf; size_t r_len; @@ -96,7 +96,7 @@ u_sysctl(int ns, const void *newp, size_t newlen) { - nvlist_t *nvl, *nvl_resp; + nvlist_t *nvl = NULL, *nvl_resp = NULL; int retval = 0; const char *rbuf; size_t r_len; From b7fd67bacd664de2bd0756c58177478499ed266d Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 18 Apr 2014 23:41:32 +0000 Subject: [PATCH 029/148] Return -1 if errno is set. --- bin/sysctl/u_sysctl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 68cf9bf..ccaed5a 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -159,9 +159,12 @@ u_sysctl(int ns, } *oldlenp = r_len; - retval = 0; - /* XXX */ - errno = r_errno; + if (r_errno == 0) { + retval = 0; + } else { + retval = -1; + errno = r_errno; + } done: if (nvl) From 936d8998828208115af6fd11f5df02368874c43e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 21 Apr 2014 19:41:27 +0000 Subject: [PATCH 030/148] * Refactor out the common code from the byname and byoid paths into a new function. * Handle a NULL oldlenp - this is the case for sysctl writes. This (with the upcoming libuinet changes) allows for sysctl writing to occur. --- bin/sysctl/u_sysctl.c | 123 ++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 66 deletions(-) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index ccaed5a..14b62cf 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -15,32 +15,28 @@ #include "sysctl_api.h" #include "nv.h" -int -u_sysctlbyname(int ns, - const char *name, +static int +u_sysctl_do_sysctl(struct nvlist *nvl, int ns, void *oldp, size_t *oldlenp, const void *newp, size_t newlen) { - nvlist_t *nvl = NULL, *nvl_resp = NULL; + nvlist_t *nvl_resp = NULL; int retval = 0; + int r_errno; const char *rbuf; size_t r_len; - int r_errno; - /* Create nvlist to populate the request into */ - nvl = nvlist_create(0); - if (nvl == NULL) { - warn("nvlist_create"); - retval = -1; - goto done; - } + /* Setup request and response buffer information */ + + /* + * Writing a value may pass in a NULL oldlenp, so only conditionally + * send it. + */ + if (oldlenp != NULL) + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); - /* Create nvlist for a sysctl_str request */ - nvlist_add_string(nvl, "type", "sysctl_str"); - nvlist_add_string(nvl, "sysctl_str", name); - nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); if (newlen > 0) { nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); } @@ -70,23 +66,60 @@ u_sysctlbyname(int ns, if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); memcpy(oldp, rbuf, r_len); - *oldlenp = r_len; + } else if (nvlist_exists_number(nvl_resp, "sysctl_respbuf_len")) { + r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_len"); } else { r_len = 0; } - retval = 0; - /* XXX */ - errno = r_errno; + if (oldlenp != NULL) + *oldlenp = r_len; + + if (r_errno == 0) { + retval = 0; + } else { + retval = -1; + errno = r_errno; + } done: - if (nvl) - nvlist_destroy(nvl); if (nvl_resp) nvlist_destroy(nvl_resp); return (retval); } +int +u_sysctlbyname(int ns, + const char *name, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl = NULL; + int retval = 0; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_str request */ + nvlist_add_string(nvl, "type", "sysctl_str"); + nvlist_add_string(nvl, "sysctl_str", name); + + /* XXX this sets errno as appropriate */ + retval = u_sysctl_do_sysctl(nvl, ns, oldp, oldlenp, newp, newlen); + +done: + if (nvl) + nvlist_destroy(nvl); + return (retval); +} + int u_sysctl(int ns, int *oid, @@ -122,55 +155,13 @@ u_sysctl(int ns, /* Create nvlist for a sysctl_oid request */ nvlist_add_string(nvl, "type", "sysctl_oid"); nvlist_add_binary(nvl, "sysctl_oid", oid, namelen * sizeof(int)); - nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); - if (newlen > 0) { - nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); - } - - /* Send command */ - if (nvlist_send(ns, nvl) < 0) { - warn("nvlist_send"); - retval = -1; - goto done; - } - - /* Read response */ - nvl_resp = nvlist_recv(ns); - if (nvl_resp == NULL) { - warn("nvlist_recv"); - retval = -1; - goto done; - } - if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { - fprintf(stderr, "response: no errno?\n"); - goto done; - } - r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); - - /* XXX validate r_len versus oldlenp */ - if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { - rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); - memcpy(oldp, rbuf, r_len); - } else if (nvlist_exists_number(nvl_resp, "sysctl_respbuf_len")) { - r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_len"); - } else { - r_len = 0; - } - *oldlenp = r_len; - - if (r_errno == 0) { - retval = 0; - } else { - retval = -1; - errno = r_errno; - } + /* XXX this sets errno as appropriate */ + retval = u_sysctl_do_sysctl(nvl, ns, oldp, oldlenp, newp, newlen); done: if (nvl) nvlist_destroy(nvl); - if (nvl_resp) - nvlist_destroy(nvl_resp); return (retval); } From aa23ad045a6d715545c82414b087d61b0a239f5e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 21 Apr 2014 19:42:05 +0000 Subject: [PATCH 031/148] (note: grr, should be using a generic C RPC thing already. This hand rolling crap is annoying.) * Disable the sysctlbyname path until I've refactored out the common code. * Handle the no-respbuflen case - sysctl writes will call sysctl() with oldlenp=NULL. This allows sysctl writes to occur. --- bin/passive/sysctl_api.c | 47 +++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 4fb95b4..23a9b1e 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -57,6 +57,7 @@ static int passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) { +#if 0 struct sysctl_req_hdr *hdr; nvlist_t *nvl_resp = NULL; int retval = 0; @@ -181,6 +182,8 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) if (nvl_resp != NULL) nvlist_destroy(nvl_resp); return (retval); +#endif + return (-1); } /* @@ -227,23 +230,28 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) goto finish; } - /* sysctl_respbuf_len */ - if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { - fprintf(stderr, "%s: fd %d: missing sysctl_respbuf_len\n", - __func__, - ns); - retval = 0; - goto finish; - } - if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { - fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big! (%llu)\n", - __func__, - ns, - (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); - retval = 0; - goto finish; + /* + * We may not have a response buffer length provided. + * This is done when writing a sysctl value. + */ + if (nvlist_exists_number(nvl, "sysctl_respbuf_len")) { + if (nvlist_get_number(nvl, "sysctl_respbuf_len") > + SYSCTL_MAX_REQ_BUF_LEN) { + fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " + "too big! (%llu)\n", + __func__, + ns, + (unsigned long long) nvlist_get_number(nvl, + "sysctl_respbuf_len")); + retval = 0; + goto finish; + } + wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); + } else { + wbuf_len = 0; } - wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); + + /* If wbuf_len is 0, then pass in a NULL wbuf */ if (wbuf_len == 0) { wbuf = NULL; } else { @@ -275,8 +283,13 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) (int) sbuf_len); /* XXX typecasting sbuf and req_oid sucks */ + /* + * Pass in a NULL wbuf_len if wbuf is NULL. sysctl writing + * passes in a NULL buffer and NULL oidlenp. + */ error = uinet_sysctl((int *) req_oid, req_oid_len / sizeof(int), - wbuf, &wbuf_len, + wbuf, + wbuf == NULL ? NULL : &wbuf_len, (char *) sbuf, sbuf_len, &rval, 0); From b0caebd6c7be5ef576585c139b34e34f0ea42810 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Apr 2014 19:14:46 +0000 Subject: [PATCH 032/148] Fix merge. --- bin/passive/passive.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/bin/passive/passive.c b/bin/passive/passive.c index a54f13b..81903bd 100644 --- a/bin/passive/passive.c +++ b/bin/passive/passive.c @@ -879,14 +879,11 @@ int main (int argc, char **argv) interface_thread_start, &interfaces[i]); } -<<<<<<< HEAD error = pthread_create(&sysctl_thr, NULL, passive_sysctl_listener, NULL); if (error != 0) { printf("Failed to bring up sysctl thread: %d\n", errno); } -======= ->>>>>>> 7c493ffa2b42b5f64b9d5923063a507d8e89a614 for (i = 0; i < num_interfaces; i++) { if (0 == interfaces[i].thread_create_result) pthread_join(interfaces[i].thread, NULL); From b010aeef0e5d072b7be01096ec9902f49f709da7 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Apr 2014 19:15:02 +0000 Subject: [PATCH 033/148] Make the sysctl debugging configurable. --- bin/passive/sysctl_api.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 23a9b1e..200cf18 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -48,6 +48,8 @@ #define SYSCTL_MAX_STR_LEN 1024 #define SYSCTL_MAX_REQ_BUF_LEN 1048576 +#define UINET_SYSCTL_DEBUG + /* * Handle sysctl string type requests. * @@ -212,20 +214,24 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) /* Validate fields are here */ if (! nvlist_exists_binary(nvl, "sysctl_oid")) { +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", __func__, ns); +#endif retval = 0; goto finish; } req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", &req_oid_len); if (req_oid_len % sizeof(int) != 0) { +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", __func__, ns, (unsigned long long) req_oid_len, (int) sizeof(int)); +#endif retval = 0; goto finish; } @@ -237,12 +243,14 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) if (nvlist_exists_number(nvl, "sysctl_respbuf_len")) { if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " "too big! (%llu)\n", __func__, ns, (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); +#endif retval = 0; goto finish; } @@ -257,7 +265,9 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) } else { wbuf = calloc(1, wbuf_len); if (wbuf == NULL) { +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); +#endif retval = 0; goto finish; } @@ -272,6 +282,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) } /* Issue sysctl */ +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl oid oidlen=%d oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", __func__, @@ -281,6 +292,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) (int) wbuf_len, sbuf, (int) sbuf_len); +#endif /* XXX typecasting sbuf and req_oid sucks */ /* @@ -294,13 +306,14 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) &rval, 0); +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", __func__, ns, (int) error, (unsigned long long) wbuf_len, (unsigned long long) rval); - +#endif /* * We only copy the data back if wbuf is not NULL. @@ -315,11 +328,13 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * client. */ if (wbuf != NULL && error == 0 && rval >= wbuf_len) { +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, ns, (unsigned long long) rval, (unsigned long long) wbuf_len); +#endif retval = 0; goto finish; } @@ -422,10 +437,12 @@ passive_sysctl_listener(void *arg) } type = nvlist_get_string(nvl, "type"); +#ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: type=%s\n", __func__, ns, type); +#endif /* Dispatch as appropriate */ if (strncmp(type, "sysctl_str", 10) == 0) { From 3ba27f1c12eda59e594a7e7bb353482661810a52 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Apr 2014 20:13:25 +0000 Subject: [PATCH 034/148] Migrate the single define that we're currently using into the .h so it can be used by the userland code. --- bin/passive/sysctl_api.c | 5 +++-- bin/passive/sysctl_api.h | 37 +++---------------------------------- 2 files changed, 6 insertions(+), 36 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 200cf18..9ad7ec3 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -43,10 +43,11 @@ #include "nv.h" #include "sysctl_api.h" +#if 0 #define SYSCTL_BUF_LEN 131072 #define SYSCTL_MAX_BUF_LEN 1048576 #define SYSCTL_MAX_STR_LEN 1024 -#define SYSCTL_MAX_REQ_BUF_LEN 1048576 +#endif #define UINET_SYSCTL_DEBUG @@ -242,7 +243,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) */ if (nvlist_exists_number(nvl, "sysctl_respbuf_len")) { if (nvlist_get_number(nvl, "sysctl_respbuf_len") > - SYSCTL_MAX_REQ_BUF_LEN) { + U_SYSCTL_MAX_REQ_BUF_LEN) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " "too big! (%llu)\n", diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index 4b77654..ff311e0 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -1,41 +1,10 @@ #ifndef __SYSCTL_API_H__ #define __SYSCTL_API_H__ -typedef enum { - SYSCTL_REQ_NONE = 0, - SYSCTL_REQ_STR = 1, - SYSCTL_REQ_OID = 2, -} sysctl_req_type_t; - -#if 0 -struct sysctl_req_hdr { - uint32_t sysctl_req_len; /* length of the whole payload */ - uint32_t sysctl_req_type; /* Type of the message */ - uint32_t sysctl_req_flags; /* Message flags */ - - /* This is the sysctl specific stuff */ - uint32_t sysctl_str_len; - uint32_t sysctl_dst_len; /* result (new) */ - uint32_t sysctl_src_len; /* request (old) */ - - /* sysctl string follows, non-NUL terminated */ - - /* srcbuf follows, if srclen != 0 */ -}; - -struct sysctl_resp_hdr { - uint32_t sysctl_resp_len; - uint32_t sysctl_resp_type; - uint32_t sysctl_resp_flags; - - /* This is the sysctl specific stuff */ - uint32_t sysctl_dst_len; /* response buffer length */ - uint32_t sysctl_dst_errno; /* sysctl errno value */ - - /* Response follows, if sysctl_dst_len != 0 */ -}; -#endif +/* XXX this is a public definition */ +#define U_SYSCTL_MAX_REQ_BUF_LEN 1048576 +/* XXX this is a private definition */ extern void * passive_sysctl_listener(void *arg); #endif From 0c1df2f2bfa4f5a98d28021f230a91c20bdc9a7e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Apr 2014 21:29:45 +0000 Subject: [PATCH 035/148] Add in the client-side support for using POSIX shm for the response buffer. It's still memory inefficient (as there's now two copies of the incoming data sitting in the client side) but it's bloating out the client side, _not_ the server-side memory requirements. --- bin/sysctl/u_sysctl.c | 74 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 14b62cf..7dbe0da 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -6,15 +6,32 @@ #include #include #include +#include #include #include #include #include +#include /* for round_page() */ + +#include #include "sysctl_api.h" #include "nv.h" +/* + * XXX TODO: + * + * + the sysctl shm stuff should be a transaction based thing + * + the API should be modified so it returns the buffer, and then + * has a "finish" function that frees it if appropriate - that + * way for shm buffers we don't need to double allocate things. + * + .. we shouldn't be doing all the mmap / munmap stuff - it + * will cause IPI shootdowns as the memory map in the libuinet + * using code has its memory map change. I'll solve that + * later. + */ + static int u_sysctl_do_sysctl(struct nvlist *nvl, int ns, void *oldp, @@ -28,8 +45,56 @@ u_sysctl_do_sysctl(struct nvlist *nvl, int ns, const char *rbuf; size_t r_len; + /* XXX Eventually this should be in a sysctl transaction struct */ + int shm_fd = -1; + char *shm_mem = NULL; + size_t shm_len = 0; + char shm_path[128]; + /* Setup request and response buffer information */ + /* + * If the requested size is provided and it's greater than the + * maximum size allowed, we'll flip to using shm + */ + if (oldlenp != NULL && *oldlenp >= U_SYSCTL_MAX_REQ_BUF_LEN) { + /* Construct a shm path */ + /* XXX should make this less guessable */ + snprintf(shm_path, 128, "/sysctl.%ld", (long) arc4random()); + + /* Open it */ + shm_fd = shm_open(shm_path, O_CREAT | O_RDWR, 0640); + if (shm_fd < 0) { + warn("shm_open (%s)", shm_path); + retval = -1; + goto done; + } + + /* + * Calculate a mmap size that's a multiple of + * the system page length. + */ + shm_len = round_page(*oldlenp); + + /* make it that big! */ + if (ftruncate(shm_fd, shm_len) < 0) { + warn("ftruncate"); + goto done; + } + + /* mmap it */ + shm_mem = mmap(NULL, shm_len, PROT_READ | PROT_WRITE, + 0, shm_fd, 0); + if (shm_mem == NULL) { + warn("mmap"); + goto done; + } + + /* add the shm path to the outbound request */ + nvlist_add_string(nvl, "sysctl_respbuf_shm_path", shm_path); + nvlist_add_number(nvl, "sysctl_respbuf_shm_len", shm_len); + } + /* * Writing a value may pass in a NULL oldlenp, so only conditionally * send it. @@ -66,6 +131,9 @@ u_sysctl_do_sysctl(struct nvlist *nvl, int ns, if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); memcpy(oldp, rbuf, r_len); + } else if (shm_mem != NULL) { + memcpy(oldp, shm_mem, r_len); + r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_shm_len"); } else if (nvlist_exists_number(nvl_resp, "sysctl_respbuf_len")) { r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_len"); } else { @@ -83,6 +151,12 @@ u_sysctl_do_sysctl(struct nvlist *nvl, int ns, } done: + if (shm_mem != NULL) + munmap(shm_mem, shm_len); + if (shm_fd != -1) { + close(shm_fd); + shm_unlink(shm_path); + } if (nvl_resp) nvlist_destroy(nvl_resp); return (retval); From 219638ee13116cba1bfe21439a2bf96c030def0a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Apr 2014 21:30:24 +0000 Subject: [PATCH 036/148] This is the first cut of the server side SHM sysctl response stuff. This allows the response buffer to be passed in via POSIX SHM. It avoids having to allocate a temporary buffer to write a large (potentially > 50 megabytes in the case of the pcbinfo responses) and serialise it out via a socket - and then have the receiver have to also allocate a large temporary buffer. --- bin/passive/sysctl_api.c | 121 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 5 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 9ad7ec3..801a3a6 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -31,12 +31,16 @@ #include #include #include +#include +#include #include #include #include #include #include +#include +#include #include "uinet_api.h" #include "uinet_config.h" @@ -212,8 +216,20 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) int error; size_t rval = 0; size_t req_oid_len; + char *oldp = NULL; - /* Validate fields are here */ + /* + * This is the posix shm state + */ + int shm_fd = -1; + char *shm_mem = NULL; + size_t shm_len = 0; + const char *shm_path; + + /* + * We absolutely require there to be a sysctl_oid field. + * Ensure it's here. + */ if (! nvlist_exists_binary(nvl, "sysctl_oid")) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", @@ -237,13 +253,71 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) goto finish; } + /* + * If the shm stuff is provided, grab it. + * + * XXX Validate that it is indeed a valid path somehow? + */ + if (nvlist_exists_string(nvl, "sysctl_respbuf_shm_path")) { + shm_path = nvlist_get_string(nvl, "sysctl_respbuf_shm_path"); + if (! nvlist_exists_number(nvl, "sysctl_respbuf_shm_len")) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: shm_path provided but not shm_len\n", + __func__); +#endif + retval = 0; + goto finish; + } + + /* + * If we have an shm_path, then we absolutely require + * a respbuf_len field. + */ + if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, + "%s: shm_path provided but no shm_respbuf_len!\n", + __func__); +#endif + retval = 0; + goto finish; + } + + shm_len = nvlist_get_number(nvl, "sysctl_respbuf_shm_len"); + + shm_fd = shm_open(shm_path, O_RDWR, 0644); + if (shm_fd < 0) { +#ifdef UINET_SYSCTL_DEBUG + warn("%s: shm_open (%s)", __func__, shm_path); +#endif + retval = 0; + goto finish; + } + + /* mmap it */ + shm_mem = mmap(NULL, shm_len, PROT_READ, 0, shm_fd, 0); + if (shm_mem == NULL) { +#ifdef UINET_SYSCTL_DEBUG + warn("%s: mmap (%s)", __func__, shm_path); +#endif + retval = 0; + goto finish; + } + } + /* * We may not have a response buffer length provided. * This is done when writing a sysctl value. */ if (nvlist_exists_number(nvl, "sysctl_respbuf_len")) { - if (nvlist_get_number(nvl, "sysctl_respbuf_len") > - U_SYSCTL_MAX_REQ_BUF_LEN) { + + /* + * Only validate length here if we don't have a shm. + * We enforce a maximum size requirement on non-SHM + * requests. + */ + if (shm_mem == NULL && nvlist_get_number(nvl, + "sysctl_respbuf_len") > U_SYSCTL_MAX_REQ_BUF_LEN) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " "too big! (%llu)\n", @@ -260,9 +334,39 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) wbuf_len = 0; } + /* + * If we have a shm, ensure respbuf_len <= shm_len. + */ + if (shm_mem != NULL) { + if (wbuf_len > shm_len) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: respbuf_len %d > shm_len %d\n", + __func__, + ns, + (int) wbuf_len, + (int) shm_len); +#endif + retval = 0; + goto finish; + } + } + + /* + * If we have a shm_buf, pass that in. + * + * Otherwise, if wbuf_len is 0, pass in a NULL wbuf. + * + * Otherwise, allocate a wbuf. + */ + /* If wbuf_len is 0, then pass in a NULL wbuf */ + if (shm_mem != NULL) { + wbuf = NULL; + oldp = shm_mem; + } if (wbuf_len == 0) { wbuf = NULL; + oldp = NULL; } else { wbuf = calloc(1, wbuf_len); if (wbuf == NULL) { @@ -272,6 +376,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) retval = 0; goto finish; } + oldp = wbuf; } /* sysctl_reqbuf */ @@ -301,8 +406,8 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * passes in a NULL buffer and NULL oidlenp. */ error = uinet_sysctl((int *) req_oid, req_oid_len / sizeof(int), - wbuf, - wbuf == NULL ? NULL : &wbuf_len, + oldp, + oldp == NULL ? NULL : &wbuf_len, (char *) sbuf, sbuf_len, &rval, 0); @@ -349,6 +454,8 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) } nvlist_add_number(nvl_resp, "sysctl_errno", error); + + /* wbuf is NULL if we have a shm response */ if (error == 0 && wbuf != NULL) { nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); } @@ -369,6 +476,10 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) finish: if (wbuf != NULL) free(wbuf); + if (shm_mem != NULL) + munmap(shm_mem, shm_len); + if (shm_fd != -1) + close(shm_fd); if (nvl_resp != NULL) nvlist_destroy(nvl_resp); return (retval); From 84d9d157c415f5b484d0a2368a5d42fa05aa50e3 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 23 Apr 2014 08:19:03 -0700 Subject: [PATCH 037/148] Add missing break. --- bin/passive/passive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/passive/passive.c b/bin/passive/passive.c index 81903bd..e1cbd65 100644 --- a/bin/passive/passive.c +++ b/bin/passive/passive.c @@ -713,6 +713,7 @@ int main (int argc, char **argv) } else { interfaces[num_interfaces - 1].promisc = 1; } + break; case 'p': if (0 == interface_server_count) { printf("No listen address specified\n"); From e9a952dd592b8663203bea5e923c35d982cff96f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 09:15:11 -0700 Subject: [PATCH 038/148] Preparation work for unifying the sysctl and sysctlbyname routines * remove the #if 0'ed out code from sysctlbyname * pull out the state from the sysctl oid routine into a struct, so it can be passed into some worker functions. --- bin/passive/sysctl_api.c | 339 +++++++++++++-------------------------- 1 file changed, 113 insertions(+), 226 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 801a3a6..0427c0f 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -55,6 +55,29 @@ #define UINET_SYSCTL_DEBUG +struct u_sysctl_state_t { + nvlist_t *nvl_resp; + + char *wbuf; + size_t wbuf_len; + size_t sbuf_len; + const int *req_oid; + const char *sbuf; + int error; + size_t rval; + size_t req_oid_len; + char *oldp; + + /* + * This is the posix shm state + */ + int shm_fd; + char *shm_mem; + size_t shm_len; + const char *shm_path; + int retval; +}; + /* * Handle sysctl string type requests. * @@ -62,134 +85,9 @@ * not. */ static int -passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) +passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) { -#if 0 - struct sysctl_req_hdr *hdr; - nvlist_t *nvl_resp = NULL; - int retval = 0; - char *wbuf = NULL; - size_t wbuf_len = 0; - size_t sbuf_len = 0; - char *req_str = NULL; - const char *sbuf; - int error; - size_t rval = 0; - - /* Validate fields are here */ - if (! nvlist_exists_string(nvl, "sysctl_str")) { - fprintf(stderr, "%s: fd %d: missing sysctl_str\n", - __func__, - ns); - retval = 0; - goto finish; - } - req_str = strdup(nvlist_get_string(nvl, "sysctl_str")); - - /* sysctl_respbuf_len */ - if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { - fprintf(stderr, "%s: fd %d: missing sysctl_respbuf_len\n", - __func__, - ns); - retval = 0; - goto finish; - } - if (nvlist_get_number(nvl, "sysctl_respbuf_len") > SYSCTL_MAX_REQ_BUF_LEN) { - fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is too big (%llu)!\n", - __func__, - ns, - (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); - retval = 0; - goto finish; - } - wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); - wbuf = calloc(1, wbuf_len); - if (wbuf == NULL) { - fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); - retval = 0; - goto finish; - } - - /* sysctl_reqbuf */ - if (nvlist_exists_binary(nvl, "sysctl_reqbuf")) { - sbuf = nvlist_get_binary(nvl, "sysctl_reqbuf", &sbuf_len); - } else { - sbuf = NULL; - sbuf_len = 0; - } - - /* Issue sysctl */ - fprintf(stderr, - "%s: fd %d: sysctl '%s' src_len=%d, dst_len=%d\n", - __func__, - ns, - req_str, - (int) sbuf_len, - (int) wbuf_len); - - /* XXX typecasting sbuf sucks */ - error = uinet_sysctlbyname(req_str, - wbuf, &wbuf_len, - (char *) sbuf, sbuf_len, - &rval, - 0); - - fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", - __func__, - ns, - (int) error, - (unsigned long long) wbuf_len, - (unsigned long long) rval); - - /* - * XXX Validate the response back from uinet_sysctl() - * is within bounds for the response back to the - * client. - */ - if (error == 0 && rval >= wbuf_len) { - fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", - __func__, - ns, - (unsigned long long) rval, - (unsigned long long) wbuf_len); - retval = 0; - goto finish; - } - - /* Construct our response */ - nvl_resp = nvlist_create(0); - if (nvl_resp == NULL) { - fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, ns); - retval = 0; - goto finish; - } - - nvlist_add_number(nvl_resp, "sysctl_errno", error); - if (error == 0) { - nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); - } - if (nvlist_send(ns, nvl_resp) < 0) { - fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", - __func__, - ns, - errno); - retval = 1; - goto finish; - } - - /* Done! */ - retval = 1; - -finish: - if (req_str != NULL) - free(req_str); - if (wbuf != NULL) - free(wbuf); - if (nvl_resp != NULL) - nvlist_destroy(nvl_resp); - return (retval); -#endif return (-1); } @@ -203,28 +101,11 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl) * I'm just passing in sysctl_oid as a binary array. Ew. */ static int -passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) +passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) { - struct sysctl_req_hdr *hdr; - nvlist_t *nvl_resp = NULL; - int retval = 0; - char *wbuf = NULL; - size_t wbuf_len = 0; - size_t sbuf_len = 0; - const int *req_oid = NULL; - const char *sbuf; - int error; - size_t rval = 0; - size_t req_oid_len; - char *oldp = NULL; - - /* - * This is the posix shm state - */ - int shm_fd = -1; - char *shm_mem = NULL; - size_t shm_len = 0; - const char *shm_path; + /* Initial state */ + us->shm_fd = -1; + us->retval = 0; /* * We absolutely require there to be a sysctl_oid field. @@ -236,20 +117,20 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) __func__, ns); #endif - retval = 0; + us->retval = 0; goto finish; } - req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", - &req_oid_len); - if (req_oid_len % sizeof(int) != 0) { + us->req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", + &us->req_oid_len); + if (us->req_oid_len % sizeof(int) != 0) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", __func__, ns, - (unsigned long long) req_oid_len, + (unsigned long long) us->req_oid_len, (int) sizeof(int)); #endif - retval = 0; + us->retval = 0; goto finish; } @@ -259,13 +140,14 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * XXX Validate that it is indeed a valid path somehow? */ if (nvlist_exists_string(nvl, "sysctl_respbuf_shm_path")) { - shm_path = nvlist_get_string(nvl, "sysctl_respbuf_shm_path"); + /* XXX strdup, then free as appropriate */ + us->shm_path = nvlist_get_string(nvl, "sysctl_respbuf_shm_path"); if (! nvlist_exists_number(nvl, "sysctl_respbuf_shm_len")) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: shm_path provided but not shm_len\n", __func__); #endif - retval = 0; + us->retval = 0; goto finish; } @@ -279,28 +161,28 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) "%s: shm_path provided but no shm_respbuf_len!\n", __func__); #endif - retval = 0; + us->retval = 0; goto finish; } - shm_len = nvlist_get_number(nvl, "sysctl_respbuf_shm_len"); + us->shm_len = nvlist_get_number(nvl, "sysctl_respbuf_shm_len"); - shm_fd = shm_open(shm_path, O_RDWR, 0644); - if (shm_fd < 0) { + us->shm_fd = shm_open(us->shm_path, O_RDWR, 0644); + if (us->shm_fd < 0) { #ifdef UINET_SYSCTL_DEBUG - warn("%s: shm_open (%s)", __func__, shm_path); + warn("%s: shm_open (%s)", __func__, us->shm_path); #endif - retval = 0; + us->retval = 0; goto finish; } /* mmap it */ - shm_mem = mmap(NULL, shm_len, PROT_READ, 0, shm_fd, 0); - if (shm_mem == NULL) { + us->shm_mem = mmap(NULL, us->shm_len, PROT_READ, 0, us->shm_fd, 0); + if (us->shm_mem == NULL) { #ifdef UINET_SYSCTL_DEBUG - warn("%s: mmap (%s)", __func__, shm_path); + warn("%s: mmap (%s)", __func__, us->shm_path); #endif - retval = 0; + us->retval = 0; goto finish; } } @@ -316,7 +198,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * We enforce a maximum size requirement on non-SHM * requests. */ - if (shm_mem == NULL && nvlist_get_number(nvl, + if (us->shm_mem == NULL && nvlist_get_number(nvl, "sysctl_respbuf_len") > U_SYSCTL_MAX_REQ_BUF_LEN) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " @@ -326,27 +208,27 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); #endif - retval = 0; + us->retval = 0; goto finish; } - wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); + us->wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); } else { - wbuf_len = 0; + us->wbuf_len = 0; } /* * If we have a shm, ensure respbuf_len <= shm_len. */ - if (shm_mem != NULL) { - if (wbuf_len > shm_len) { + if (us->shm_mem != NULL) { + if (us->wbuf_len > us->shm_len) { #ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: respbuf_len %d > shm_len %d\n", + fprintf(stderr, "%s: fd %d: respbuf_len %lld > shm_len %lld\n", __func__, ns, - (int) wbuf_len, - (int) shm_len); + (long long) us->wbuf_len, + (long long) us->shm_len); #endif - retval = 0; + us->retval = 0; goto finish; } } @@ -360,31 +242,31 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) */ /* If wbuf_len is 0, then pass in a NULL wbuf */ - if (shm_mem != NULL) { - wbuf = NULL; - oldp = shm_mem; + if (us->shm_mem != NULL) { + us->wbuf = NULL; + us->oldp = us->shm_mem; } - if (wbuf_len == 0) { - wbuf = NULL; - oldp = NULL; + if (us->wbuf_len == 0) { + us->wbuf = NULL; + us->oldp = NULL; } else { - wbuf = calloc(1, wbuf_len); - if (wbuf == NULL) { + us->wbuf = calloc(1, us->wbuf_len); + if (us->wbuf == NULL) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); #endif - retval = 0; + us->retval = 0; goto finish; } - oldp = wbuf; + us->oldp = us->wbuf; } /* sysctl_reqbuf */ if (nvlist_exists_binary(nvl, "sysctl_reqbuf")) { - sbuf = nvlist_get_binary(nvl, "sysctl_reqbuf", &sbuf_len); + us->sbuf = nvlist_get_binary(nvl, "sysctl_reqbuf", &us->sbuf_len); } else { - sbuf = NULL; - sbuf_len = 0; + us->sbuf = NULL; + us->sbuf_len = 0; } /* Issue sysctl */ @@ -393,11 +275,11 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) "%s: fd %d: sysctl oid oidlen=%d oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", __func__, ns, - (int) (req_oid_len / sizeof(int)), - wbuf, - (int) wbuf_len, - sbuf, - (int) sbuf_len); + (int) (us->req_oid_len / sizeof(int)), + us->wbuf, + (int) us->wbuf_len, + us->sbuf, + (int) us->sbuf_len); #endif /* XXX typecasting sbuf and req_oid sucks */ @@ -405,20 +287,22 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * Pass in a NULL wbuf_len if wbuf is NULL. sysctl writing * passes in a NULL buffer and NULL oidlenp. */ - error = uinet_sysctl((int *) req_oid, req_oid_len / sizeof(int), - oldp, - oldp == NULL ? NULL : &wbuf_len, - (char *) sbuf, sbuf_len, - &rval, + us->error = uinet_sysctl((int *) us->req_oid, + us->req_oid_len / sizeof(int), + us->oldp, + us->oldp == NULL ? NULL : &us->wbuf_len, + (char *) us->sbuf, + us->sbuf_len, + &us->rval, 0); #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", __func__, ns, - (int) error, - (unsigned long long) wbuf_len, - (unsigned long long) rval); + (int) us->error, + (unsigned long long) us->wbuf_len, + (unsigned long long) us->rval); #endif /* @@ -433,56 +317,56 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl) * is within bounds for the response back to the * client. */ - if (wbuf != NULL && error == 0 && rval >= wbuf_len) { + if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, ns, - (unsigned long long) rval, - (unsigned long long) wbuf_len); + (unsigned long long) us->rval, + (unsigned long long) us->wbuf_len); #endif - retval = 0; + us->retval = 0; goto finish; } /* Construct our response */ - nvl_resp = nvlist_create(0); - if (nvl_resp == NULL) { + us->nvl_resp = nvlist_create(0); + if (us->nvl_resp == NULL) { fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, ns); - retval = 0; + us->retval = 0; goto finish; } - nvlist_add_number(nvl_resp, "sysctl_errno", error); + nvlist_add_number(us->nvl_resp, "sysctl_errno", us->error); /* wbuf is NULL if we have a shm response */ - if (error == 0 && wbuf != NULL) { - nvlist_add_binary(nvl_resp, "sysctl_respbuf", wbuf, rval); + if (us->error == 0 && us->wbuf != NULL) { + nvlist_add_binary(us->nvl_resp, "sysctl_respbuf", us->wbuf, us->rval); } - nvlist_add_number(nvl_resp, "sysctl_respbuf_len", rval); + nvlist_add_number(us->nvl_resp, "sysctl_respbuf_len", us->rval); - if (nvlist_send(ns, nvl_resp) < 0) { + if (nvlist_send(ns, us->nvl_resp) < 0) { fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", __func__, ns, errno); - retval = 1; + us->retval = 1; goto finish; } /* Done! */ - retval = 1; + us->retval = 1; finish: - if (wbuf != NULL) - free(wbuf); - if (shm_mem != NULL) - munmap(shm_mem, shm_len); - if (shm_fd != -1) - close(shm_fd); - if (nvl_resp != NULL) - nvlist_destroy(nvl_resp); - return (retval); + if (us->wbuf != NULL) + free(us->wbuf); + if (us->shm_mem != NULL) + munmap(us->shm_mem, us->shm_len); + if (us->shm_fd != -1) + close(us->shm_fd); + if (us->nvl_resp != NULL) + nvlist_destroy(us->nvl_resp); + return (us->retval); } void * @@ -537,6 +421,8 @@ passive_sysctl_listener(void *arg) } for (;;) { + struct u_sysctl_state_t us; + nvl = nvlist_recv(ns); if (nvl == NULL) break; @@ -557,10 +443,11 @@ passive_sysctl_listener(void *arg) #endif /* Dispatch as appropriate */ + bzero(&us, sizeof(us)); if (strncmp(type, "sysctl_str", 10) == 0) { - ret = passive_sysctl_reqtype_str(ns, nvl); + ret = passive_sysctl_reqtype_str(ns, nvl, &us); } else if (strncmp(type, "sysctl_oid", 10) == 0) { - ret = passive_sysctl_reqtype_oid(ns, nvl); + ret = passive_sysctl_reqtype_oid(ns, nvl, &us); } else { fprintf(stderr, "%s: fd %d: unknown type=%s\n", __func__, From d0d0d18afb28986d516fe836667e04ec6b52fa71 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 09:29:12 -0700 Subject: [PATCH 039/148] Refactor out the init and completion code into worker functions. --- bin/passive/sysctl_api.c | 147 ++++++++++++++++++++++----------------- 1 file changed, 85 insertions(+), 62 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 0427c0f..a253772 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -58,6 +58,7 @@ struct u_sysctl_state_t { nvlist_t *nvl_resp; + int ns; char *wbuf; size_t wbuf_len; size_t sbuf_len; @@ -78,6 +79,29 @@ struct u_sysctl_state_t { int retval; }; +static void +passive_sysctl_state_init(struct u_sysctl_state_t *us, int ns) +{ + bzero(us, sizeof(*us)); + + us->shm_fd = -1; + us->ns = ns; +} + +static void +passive_sysctl_state_clean(struct u_sysctl_state_t *us) +{ + + if (us->wbuf != NULL) + free(us->wbuf); + if (us->shm_mem != NULL) + munmap(us->shm_mem, us->shm_len); + if (us->shm_fd != -1) + close(us->shm_fd); + if (us->nvl_resp != NULL) + nvlist_destroy(us->nvl_resp); +} + /* * Handle sysctl string type requests. * @@ -91,6 +115,63 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) return (-1); } +static void +passive_sysctl_handle_resp(struct u_sysctl_state_t *us) +{ + + /* + * We only copy the data back if wbuf is not NULL. + * + * The undocumented size lookup in sysctl is done by + * doing a sysctl fetch on the given OID but with oldplen=0 and + * oldp=NULL, oldplen gets updated with the storage size. + */ + /* + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. + */ + if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", + __func__, + us->ns, + (unsigned long long) us->rval, + (unsigned long long) us->wbuf_len); +#endif + us->retval = 0; + return; + } + + /* Construct our response */ + us->nvl_resp = nvlist_create(0); + if (us->nvl_resp == NULL) { + fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, us->ns); + us->retval = 0; + return; + } + + nvlist_add_number(us->nvl_resp, "sysctl_errno", us->error); + + /* wbuf is NULL if we have a shm response */ + if (us->error == 0 && us->wbuf != NULL) { + nvlist_add_binary(us->nvl_resp, "sysctl_respbuf", us->wbuf, us->rval); + } + nvlist_add_number(us->nvl_resp, "sysctl_respbuf_len", us->rval); + + if (nvlist_send(us->ns, us->nvl_resp) < 0) { + fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", + __func__, + us->ns, + errno); + us->retval = 0; + return; + } + + /* Done! */ + us->retval = 1; +} + /* * Handle sysctl oid type requests. * @@ -103,9 +184,8 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) static int passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) { - /* Initial state */ - us->shm_fd = -1; - us->retval = 0; + /* Setup! */ + passive_sysctl_state_init(us, ns); /* * We absolutely require there to be a sysctl_oid field. @@ -305,67 +385,10 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) (unsigned long long) us->rval); #endif - /* - * We only copy the data back if wbuf is not NULL. - * - * The undocumented size lookup in sysctl is done by - * doing a sysctl fetch on the given OID but with oldplen=0 and - * oldp=NULL, oldplen gets updated with the storage size. - */ - /* - * XXX Validate the response back from uinet_sysctl() - * is within bounds for the response back to the - * client. - */ - if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", - __func__, - ns, - (unsigned long long) us->rval, - (unsigned long long) us->wbuf_len); -#endif - us->retval = 0; - goto finish; - } - - /* Construct our response */ - us->nvl_resp = nvlist_create(0); - if (us->nvl_resp == NULL) { - fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, ns); - us->retval = 0; - goto finish; - } - - nvlist_add_number(us->nvl_resp, "sysctl_errno", us->error); - - /* wbuf is NULL if we have a shm response */ - if (us->error == 0 && us->wbuf != NULL) { - nvlist_add_binary(us->nvl_resp, "sysctl_respbuf", us->wbuf, us->rval); - } - nvlist_add_number(us->nvl_resp, "sysctl_respbuf_len", us->rval); - - if (nvlist_send(ns, us->nvl_resp) < 0) { - fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", - __func__, - ns, - errno); - us->retval = 1; - goto finish; - } - - /* Done! */ - us->retval = 1; + passive_sysctl_handle_resp(us); finish: - if (us->wbuf != NULL) - free(us->wbuf); - if (us->shm_mem != NULL) - munmap(us->shm_mem, us->shm_len); - if (us->shm_fd != -1) - close(us->shm_fd); - if (us->nvl_resp != NULL) - nvlist_destroy(us->nvl_resp); + passive_sysctl_state_clean(us); return (us->retval); } From 00cbbd4e79ff69559690c89369433b53fe44b5f0 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 10:01:02 -0700 Subject: [PATCH 040/148] Unused. --- bin/passive/sysctl_api.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index a253772..9ce7745 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -47,12 +47,6 @@ #include "nv.h" #include "sysctl_api.h" -#if 0 -#define SYSCTL_BUF_LEN 131072 -#define SYSCTL_MAX_BUF_LEN 1048576 -#define SYSCTL_MAX_STR_LEN 1024 -#endif - #define UINET_SYSCTL_DEBUG struct u_sysctl_state_t { From 0f3966b3052d64c9c28c517c9a3e7f70be19db2a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 10:07:36 -0700 Subject: [PATCH 041/148] Refactor out the setup code. --- bin/passive/sysctl_api.c | 225 +++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 104 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 9ce7745..e805830 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -109,104 +109,12 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) return (-1); } -static void -passive_sysctl_handle_resp(struct u_sysctl_state_t *us) -{ - - /* - * We only copy the data back if wbuf is not NULL. - * - * The undocumented size lookup in sysctl is done by - * doing a sysctl fetch on the given OID but with oldplen=0 and - * oldp=NULL, oldplen gets updated with the storage size. - */ - /* - * XXX Validate the response back from uinet_sysctl() - * is within bounds for the response back to the - * client. - */ - if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", - __func__, - us->ns, - (unsigned long long) us->rval, - (unsigned long long) us->wbuf_len); -#endif - us->retval = 0; - return; - } - - /* Construct our response */ - us->nvl_resp = nvlist_create(0); - if (us->nvl_resp == NULL) { - fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, us->ns); - us->retval = 0; - return; - } - - nvlist_add_number(us->nvl_resp, "sysctl_errno", us->error); - - /* wbuf is NULL if we have a shm response */ - if (us->error == 0 && us->wbuf != NULL) { - nvlist_add_binary(us->nvl_resp, "sysctl_respbuf", us->wbuf, us->rval); - } - nvlist_add_number(us->nvl_resp, "sysctl_respbuf_len", us->rval); - - if (nvlist_send(us->ns, us->nvl_resp) < 0) { - fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", - __func__, - us->ns, - errno); - us->retval = 0; - return; - } - - /* Done! */ - us->retval = 1; -} - /* - * Handle sysctl oid type requests. - * - * Returns 1 if the connection should stay open; 0 if - * not. - * - * XXX this is definitely not endian-clean. - * I'm just passing in sysctl_oid as a binary array. Ew. + * Return 1 if things are ok, 0 if somehow things failed. */ static int -passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) +passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) { - /* Setup! */ - passive_sysctl_state_init(us, ns); - - /* - * We absolutely require there to be a sysctl_oid field. - * Ensure it's here. - */ - if (! nvlist_exists_binary(nvl, "sysctl_oid")) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", - __func__, - ns); -#endif - us->retval = 0; - goto finish; - } - us->req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", - &us->req_oid_len); - if (us->req_oid_len % sizeof(int) != 0) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", - __func__, - ns, - (unsigned long long) us->req_oid_len, - (int) sizeof(int)); -#endif - us->retval = 0; - goto finish; - } /* * If the shm stuff is provided, grab it. @@ -222,7 +130,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) __func__); #endif us->retval = 0; - goto finish; + return (0); } /* @@ -236,7 +144,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) __func__); #endif us->retval = 0; - goto finish; + return (0); } us->shm_len = nvlist_get_number(nvl, "sysctl_respbuf_shm_len"); @@ -247,7 +155,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) warn("%s: shm_open (%s)", __func__, us->shm_path); #endif us->retval = 0; - goto finish; + return (0); } /* mmap it */ @@ -257,7 +165,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) warn("%s: mmap (%s)", __func__, us->shm_path); #endif us->retval = 0; - goto finish; + return (0); } } @@ -278,12 +186,12 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " "too big! (%llu)\n", __func__, - ns, + us->ns, (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); #endif us->retval = 0; - goto finish; + return (0); } us->wbuf_len = nvlist_get_number(nvl, "sysctl_respbuf_len"); } else { @@ -298,12 +206,12 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: respbuf_len %lld > shm_len %lld\n", __func__, - ns, + us->ns, (long long) us->wbuf_len, (long long) us->shm_len); #endif us->retval = 0; - goto finish; + return (0); } } @@ -327,10 +235,10 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) us->wbuf = calloc(1, us->wbuf_len); if (us->wbuf == NULL) { #ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, ns); + fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, us->ns); #endif us->retval = 0; - goto finish; + return (0); } us->oldp = us->wbuf; } @@ -343,6 +251,115 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) us->sbuf_len = 0; } + return (1); +} + + +static void +passive_sysctl_handle_resp(struct u_sysctl_state_t *us) +{ + + /* + * We only copy the data back if wbuf is not NULL. + * + * The undocumented size lookup in sysctl is done by + * doing a sysctl fetch on the given OID but with oldplen=0 and + * oldp=NULL, oldplen gets updated with the storage size. + */ + /* + * XXX Validate the response back from uinet_sysctl() + * is within bounds for the response back to the + * client. + */ + if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", + __func__, + us->ns, + (unsigned long long) us->rval, + (unsigned long long) us->wbuf_len); +#endif + us->retval = 0; + return; + } + + /* Construct our response */ + us->nvl_resp = nvlist_create(0); + if (us->nvl_resp == NULL) { + fprintf(stderr, "%s: fd %d: nvlist_create failed\n", __func__, us->ns); + us->retval = 0; + return; + } + + nvlist_add_number(us->nvl_resp, "sysctl_errno", us->error); + + /* wbuf is NULL if we have a shm response */ + if (us->error == 0 && us->wbuf != NULL) { + nvlist_add_binary(us->nvl_resp, "sysctl_respbuf", us->wbuf, us->rval); + } + nvlist_add_number(us->nvl_resp, "sysctl_respbuf_len", us->rval); + + if (nvlist_send(us->ns, us->nvl_resp) < 0) { + fprintf(stderr, "%s: fd %d: nvlist_send failed; errno=%d\n", + __func__, + us->ns, + errno); + us->retval = 0; + return; + } + + /* Done! */ + us->retval = 1; +} + +/* + * Handle sysctl oid type requests. + * + * Returns 1 if the connection should stay open; 0 if + * not. + * + * XXX this is definitely not endian-clean. + * I'm just passing in sysctl_oid as a binary array. Ew. + */ +static int +passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) +{ + /* Setup! */ + passive_sysctl_state_init(us, ns); + + /* Parse initial bits */ + + /* + * We absolutely require there to be a sysctl_oid field. + * Ensure it's here. + */ + if (! nvlist_exists_binary(nvl, "sysctl_oid")) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", + __func__, + ns); +#endif + us->retval = 0; + goto finish; + } + us->req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", + &us->req_oid_len); + if (us->req_oid_len % sizeof(int) != 0) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", + __func__, + ns, + (unsigned long long) us->req_oid_len, + (int) sizeof(int)); +#endif + us->retval = 0; + goto finish; + } + + /* parse shared bits */ + if (! passive_sysctl_handle_req(us, nvl)) + goto finish; + /* Issue sysctl */ #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, From 352766007e81820c24d05ed21eae1ac3abc24fee Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 10:37:34 -0700 Subject: [PATCH 042/148] The req_oid and req_oid fields are private to the OID sysctl routine. Move them out. --- bin/passive/sysctl_api.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index e805830..e52ef13 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -56,11 +56,9 @@ struct u_sysctl_state_t { char *wbuf; size_t wbuf_len; size_t sbuf_len; - const int *req_oid; const char *sbuf; int error; size_t rval; - size_t req_oid_len; char *oldp; /* @@ -324,6 +322,9 @@ passive_sysctl_handle_resp(struct u_sysctl_state_t *us) static int passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) { + const int *req_oid; + size_t req_oid_len; + /* Setup! */ passive_sysctl_state_init(us, ns); @@ -342,14 +343,14 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) us->retval = 0; goto finish; } - us->req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", - &us->req_oid_len); - if (us->req_oid_len % sizeof(int) != 0) { + req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", + &req_oid_len); + if (req_oid_len % sizeof(int) != 0) { #ifdef UINET_SYSCTL_DEBUG fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", __func__, ns, - (unsigned long long) us->req_oid_len, + (unsigned long long) req_oid_len, (int) sizeof(int)); #endif us->retval = 0; @@ -366,7 +367,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) "%s: fd %d: sysctl oid oidlen=%d oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", __func__, ns, - (int) (us->req_oid_len / sizeof(int)), + (int) (req_oid_len / sizeof(int)), us->wbuf, (int) us->wbuf_len, us->sbuf, @@ -378,8 +379,8 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) * Pass in a NULL wbuf_len if wbuf is NULL. sysctl writing * passes in a NULL buffer and NULL oidlenp. */ - us->error = uinet_sysctl((int *) us->req_oid, - us->req_oid_len / sizeof(int), + us->error = uinet_sysctl((int *) req_oid, + req_oid_len / sizeof(int), us->oldp, us->oldp == NULL ? NULL : &us->wbuf_len, (char *) us->sbuf, From 76bc0bfcbb6f28e42491bffb76b702ba92323b1f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 10:41:13 -0700 Subject: [PATCH 043/148] Re-implement sysctlbyname. --- bin/passive/sysctl_api.c | 91 ++++++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 13 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index e52ef13..1d5e8e0 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -94,19 +94,6 @@ passive_sysctl_state_clean(struct u_sysctl_state_t *us) nvlist_destroy(us->nvl_resp); } -/* - * Handle sysctl string type requests. - * - * Returns 1 if the connection should stay open; 0 if - * not. - */ -static int -passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) -{ - - return (-1); -} - /* * Return 1 if things are ok, 0 if somehow things failed. */ @@ -310,6 +297,84 @@ passive_sysctl_handle_resp(struct u_sysctl_state_t *us) us->retval = 1; } +/* + * Handle sysctl string type requests. + * + * Returns 1 if the connection should stay open; 0 if + * not. + */ +static int +passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) +{ + const char *req_str; + + /* Setup! */ + passive_sysctl_state_init(us, ns); + + /* Parse initial bits */ + + /* + * We absolutely require there to be a sysctl_str field. + * Ensure it's here. + */ + if (! nvlist_exists_string(nvl, "sysctl_str")) { +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: missing sysctl_str\n", + __func__, + ns); +#endif + us->retval = 0; + goto finish; + } + req_str = nvlist_get_string(nvl, "sysctl_str"); + + /* XXX enforce maximum string length */ + + /* parse shared bits */ + if (! passive_sysctl_handle_req(us, nvl)) + goto finish; + + /* Issue sysctl */ +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, + "%s: fd %d: sysctl str=%s, oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", + __func__, + ns, + req_str, + us->wbuf, + (int) us->wbuf_len, + us->sbuf, + (int) us->sbuf_len); +#endif + + /* + * Pass in a NULL wbuf_len if wbuf is NULL. sysctl writing + * passes in a NULL buffer and NULL oidlenp. + */ + us->error = uinet_sysctlbyname((char *) req_str, + us->oldp, + us->oldp == NULL ? NULL : &us->wbuf_len, + (char *) us->sbuf, + us->sbuf_len, + &us->rval, + 0); + +#ifdef UINET_SYSCTL_DEBUG + fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", + __func__, + ns, + (int) us->error, + (unsigned long long) us->wbuf_len, + (unsigned long long) us->rval); +#endif + + passive_sysctl_handle_resp(us); + +finish: + passive_sysctl_state_clean(us); + return (us->retval); +} + /* * Handle sysctl oid type requests. * From 08f8a503165800b9aa32c1775b617fdfa087aad5 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 25 Apr 2014 10:47:27 -0700 Subject: [PATCH 044/148] Tidy up how the debugging printing is enabled or not. --- bin/passive/sysctl_api.c | 74 +++++++++++++++------------------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 1d5e8e0..4a2470d 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -49,6 +49,12 @@ #define UINET_SYSCTL_DEBUG +#ifdef UINET_SYSCTL_DEBUG +#define UINET_SYSCTL_DPRINTF(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) +#else +#define UINET_SYSCTL_DPRINTF(fmt, ...) +#endif + struct u_sysctl_state_t { nvlist_t *nvl_resp; @@ -110,10 +116,8 @@ passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) /* XXX strdup, then free as appropriate */ us->shm_path = nvlist_get_string(nvl, "sysctl_respbuf_shm_path"); if (! nvlist_exists_number(nvl, "sysctl_respbuf_shm_len")) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: shm_path provided but not shm_len\n", - __func__); -#endif + UINET_SYSCTL_DPRINTF("%s: shm_path provided but not shm_len\n", + __func__); us->retval = 0; return (0); } @@ -123,11 +127,8 @@ passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) * a respbuf_len field. */ if (! nvlist_exists_number(nvl, "sysctl_respbuf_len")) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, - "%s: shm_path provided but no shm_respbuf_len!\n", + UINET_SYSCTL_DPRINTF("%s: shm_path provided but no shm_respbuf_len!\n", __func__); -#endif us->retval = 0; return (0); } @@ -167,14 +168,12 @@ passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) */ if (us->shm_mem == NULL && nvlist_get_number(nvl, "sysctl_respbuf_len") > U_SYSCTL_MAX_REQ_BUF_LEN) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: sysctl_respbuf_len is " + UINET_SYSCTL_DPRINTF("%s: fd %d: sysctl_respbuf_len is " "too big! (%llu)\n", __func__, us->ns, (unsigned long long) nvlist_get_number(nvl, "sysctl_respbuf_len")); -#endif us->retval = 0; return (0); } @@ -188,13 +187,11 @@ passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) */ if (us->shm_mem != NULL) { if (us->wbuf_len > us->shm_len) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: respbuf_len %lld > shm_len %lld\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: respbuf_len %lld > shm_len %lld\n", __func__, us->ns, (long long) us->wbuf_len, (long long) us->shm_len); -#endif us->retval = 0; return (0); } @@ -219,9 +216,9 @@ passive_sysctl_handle_req(struct u_sysctl_state_t *us, nvlist_t *nvl) } else { us->wbuf = calloc(1, us->wbuf_len); if (us->wbuf == NULL) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: malloc failed\n", __func__, us->ns); -#endif + UINET_SYSCTL_DPRINTF("%s: fd %d: malloc failed\n", + __func__, + us->ns); us->retval = 0; return (0); } @@ -257,13 +254,11 @@ passive_sysctl_handle_resp(struct u_sysctl_state_t *us) * client. */ if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, us->ns, (unsigned long long) us->rval, (unsigned long long) us->wbuf_len); -#endif us->retval = 0; return; } @@ -318,11 +313,9 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) * Ensure it's here. */ if (! nvlist_exists_string(nvl, "sysctl_str")) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: missing sysctl_str\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: missing sysctl_str\n", __func__, ns); -#endif us->retval = 0; goto finish; } @@ -335,9 +328,8 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) goto finish; /* Issue sysctl */ -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, - "%s: fd %d: sysctl str=%s, oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: sysctl str=%s, oldp=%p, " + "oldplen=%d, newp=%p, newplen=%d\n", __func__, ns, req_str, @@ -345,7 +337,6 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) (int) us->wbuf_len, us->sbuf, (int) us->sbuf_len); -#endif /* * Pass in a NULL wbuf_len if wbuf is NULL. sysctl writing @@ -359,14 +350,13 @@ passive_sysctl_reqtype_str(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) &us->rval, 0); -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: sysctl error=%d, wbuf_len=%llu, " + "rval=%llu\n", __func__, ns, (int) us->error, (unsigned long long) us->wbuf_len, (unsigned long long) us->rval); -#endif passive_sysctl_handle_resp(us); @@ -400,24 +390,21 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) * Ensure it's here. */ if (! nvlist_exists_binary(nvl, "sysctl_oid")) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: missing sysctl_oid\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: missing sysctl_oid\n", __func__, ns); -#endif us->retval = 0; goto finish; } req_oid = (const int *) nvlist_get_binary(nvl, "sysctl_oid", &req_oid_len); if (req_oid_len % sizeof(int) != 0) { -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: req_oid_len (%llu) is not a multiple of %d\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: req_oid_len (%llu) " + "is not a multiple of %d\n", __func__, ns, (unsigned long long) req_oid_len, (int) sizeof(int)); -#endif us->retval = 0; goto finish; } @@ -427,9 +414,8 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) goto finish; /* Issue sysctl */ -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, - "%s: fd %d: sysctl oid oidlen=%d oldp=%p, oldplen=%d, newp=%p, newplen=%d\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: sysctl oid oidlen=%d oldp=%p, " + "oldplen=%d, newp=%p, newplen=%d\n", __func__, ns, (int) (req_oid_len / sizeof(int)), @@ -437,7 +423,6 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) (int) us->wbuf_len, us->sbuf, (int) us->sbuf_len); -#endif /* XXX typecasting sbuf and req_oid sucks */ /* @@ -453,14 +438,13 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) &us->rval, 0); -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: sysctl error=%d, wbuf_len=%llu, rval=%llu\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: sysctl error=%d, " + "wbuf_len=%llu, rval=%llu\n", __func__, ns, (int) us->error, (unsigned long long) us->wbuf_len, (unsigned long long) us->rval); -#endif passive_sysctl_handle_resp(us); @@ -535,12 +519,10 @@ passive_sysctl_listener(void *arg) } type = nvlist_get_string(nvl, "type"); -#ifdef UINET_SYSCTL_DEBUG - fprintf(stderr, "%s: fd %d: type=%s\n", + UINET_SYSCTL_DPRINTF("%s: fd %d: type=%s\n", __func__, ns, type); -#endif /* Dispatch as appropriate */ bzero(&us, sizeof(us)); From d37b4fc521ebcc2ba5c1fe25ce2d8cbcb99226a5 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Apr 2014 18:10:56 +0000 Subject: [PATCH 045/148] Modify the comment - the code below actually does the check. --- bin/passive/sysctl_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index 4a2470d..cd33745 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -248,8 +248,9 @@ passive_sysctl_handle_resp(struct u_sysctl_state_t *us) * doing a sysctl fetch on the given OID but with oldplen=0 and * oldp=NULL, oldplen gets updated with the storage size. */ + /* - * XXX Validate the response back from uinet_sysctl() + * Validate the response back from uinet_sysctl() * is within bounds for the response back to the * client. */ From 8086fbd41a10565507065dc2b134a5f79e6de612 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Apr 2014 18:15:18 +0000 Subject: [PATCH 046/148] * Break out the sysctl api bits into a public and private include file. * Add copyright. --- bin/passive/passive.c | 1 + bin/passive/sysctl_api.c | 1 + bin/passive/sysctl_api.h | 29 +++++++++++++++++++++++++---- bin/passive/sysctl_api_priv.h | 31 +++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 bin/passive/sysctl_api_priv.h diff --git a/bin/passive/passive.c b/bin/passive/passive.c index 01f187d..4e302b1 100644 --- a/bin/passive/passive.c +++ b/bin/passive/passive.c @@ -36,6 +36,7 @@ #include "uinet_api.h" #include "sysctl_api.h" +#include "sysctl_api_priv.h" #define EV_STANDALONE 1 #define EV_UINET_ENABLE 1 diff --git a/bin/passive/sysctl_api.c b/bin/passive/sysctl_api.c index cd33745..2b4ad2a 100644 --- a/bin/passive/sysctl_api.c +++ b/bin/passive/sysctl_api.c @@ -46,6 +46,7 @@ #include "uinet_config.h" #include "nv.h" #include "sysctl_api.h" +#include "sysctl_api_priv.h" #define UINET_SYSCTL_DEBUG diff --git a/bin/passive/sysctl_api.h b/bin/passive/sysctl_api.h index ff311e0..f04baa5 100644 --- a/bin/passive/sysctl_api.h +++ b/bin/passive/sysctl_api.h @@ -1,10 +1,31 @@ +/* + * Copyright (c) 2014 Adrian Chadd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #ifndef __SYSCTL_API_H__ #define __SYSCTL_API_H__ -/* XXX this is a public definition */ #define U_SYSCTL_MAX_REQ_BUF_LEN 1048576 -/* XXX this is a private definition */ -extern void * passive_sysctl_listener(void *arg); - #endif diff --git a/bin/passive/sysctl_api_priv.h b/bin/passive/sysctl_api_priv.h new file mode 100644 index 0000000..db7ffaf --- /dev/null +++ b/bin/passive/sysctl_api_priv.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2014 Adrian Chadd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __SYSCTL_API_PRIV_H__ +#define __SYSCTL_API_PRIV_H__ + +extern void * passive_sysctl_listener(void *arg); + +#endif From d93d53bb8f522e0728389334ba6cd42bfa7b48a1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Apr 2014 18:16:42 +0000 Subject: [PATCH 047/148] Document libnv. --- README | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README b/README index fb23d99..03f8106 100644 --- a/README +++ b/README @@ -21,6 +21,12 @@ the latter interface is relatively new and untested). Building libuinet ============================================================================= +The sysctl code (and likely more over time) uses libnv from FreeBSD-HEAD +to serialise data between processes. + +cd lib/libnv +make + cd lib/libuinet gmake From c095796eae781dfce1fa0a1c9f5e48019a467308 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Apr 2014 19:05:19 +0000 Subject: [PATCH 048/148] Don't build .so. --- lib/libnv/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile index 7c24ba1..c43f4f5 100644 --- a/lib/libnv/Makefile +++ b/lib/libnv/Makefile @@ -1,8 +1,8 @@ # $FreeBSD: head/lib/libnv/Makefile 258065 2013-11-12 19:39:14Z pjd $ LIB= nv -SHLIBDIR?= /lib -SHLIB_MAJOR= 0 +#SHLIBDIR?= /lib +#SHLIB_MAJOR= 0 SRCS= dnvlist.c SRCS+= msgio.c From 2c31e1600d4ee9d2e50155402cb5189f2bf8e065 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Apr 2014 19:07:57 +0000 Subject: [PATCH 049/148] Migrate all of the sysctl api stuff into libuinet. --- bin/passive/Makefile | 6 +++--- bin/passive/passive.c | 7 ++++--- lib/libuinet/Makefile | 5 +++-- .../libuinet/api_include/uinet_host_sysctl_api.h | 0 .../libuinet/api_include/uinet_host_sysctl_api_priv.h | 2 +- lib/libuinet/uinet_api.symlist | 1 + .../sysctl_api.c => lib/libuinet/uinet_host_sysctl_api.c | 6 +++--- 7 files changed, 15 insertions(+), 12 deletions(-) rename bin/passive/sysctl_api.h => lib/libuinet/api_include/uinet_host_sysctl_api.h (100%) rename bin/passive/sysctl_api_priv.h => lib/libuinet/api_include/uinet_host_sysctl_api_priv.h (95%) rename bin/passive/sysctl_api.c => lib/libuinet/uinet_host_sysctl_api.c (99%) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index bd371c2..020b331 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -2,12 +2,12 @@ TOPDIR?=${CURDIR}/../.. PROG=passive -SRCS=passive.c sysctl_api.c +SRCS=passive.c -UINET_LIBS=uinet +UINET_LIBS=uinet nv CFLAGS= -I${TOPDIR}/lib/libev -I${TOPDIR}/lib/libnv -LDADD= ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libnv/libnv.a -lm -lpcap +LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap DEBUG_FLAGS=-g -O diff --git a/bin/passive/passive.c b/bin/passive/passive.c index 4e302b1..9dfda65 100644 --- a/bin/passive/passive.c +++ b/bin/passive/passive.c @@ -35,8 +35,8 @@ #include #include "uinet_api.h" -#include "sysctl_api.h" -#include "sysctl_api_priv.h" +#include "uinet_host_sysctl_api.h" +#include "uinet_host_sysctl_api_priv.h" #define EV_STANDALONE 1 #define EV_UINET_ENABLE 1 @@ -916,7 +916,8 @@ int main (int argc, char **argv) interface_thread_start, &interfaces[i]); } - error = pthread_create(&sysctl_thr, NULL, passive_sysctl_listener, NULL); + error = pthread_create(&sysctl_thr, NULL, + uinet_host_sysctl_listener_thread, NULL); if (error != 0) { printf("Failed to bring up sysctl thread: %d\n", errno); } diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 6fc3d97..0070a22 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -19,7 +19,7 @@ INCLUDES+= -I./machine_include INCLUDES+= -I./opt # Include search path for files that only include host OS headers -HOST_INCLUDES= -I. -I${API_INCLUDES_ROOT} +HOST_INCLUDES= -I. -I${API_INCLUDES_ROOT} -I../libnv ifdef NETMAP_INCLUDES HOST_INCLUDES+= -I${NETMAP_INCLUDES} @@ -155,7 +155,8 @@ UINET_HOST_SRCS+= \ uinet_arc4random.c \ uinet_host_interface.c \ uinet_if_pcap_host.c \ - uinet_kern_shutdown.c + uinet_kern_shutdown.c \ + uinet_host_sysctl_api.c ifneq (${HOST_OS},Darwin) UINET_HOST_SRCS+= uinet_if_netmap_host.c diff --git a/bin/passive/sysctl_api.h b/lib/libuinet/api_include/uinet_host_sysctl_api.h similarity index 100% rename from bin/passive/sysctl_api.h rename to lib/libuinet/api_include/uinet_host_sysctl_api.h diff --git a/bin/passive/sysctl_api_priv.h b/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h similarity index 95% rename from bin/passive/sysctl_api_priv.h rename to lib/libuinet/api_include/uinet_host_sysctl_api_priv.h index db7ffaf..a5ab2cc 100644 --- a/bin/passive/sysctl_api_priv.h +++ b/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h @@ -26,6 +26,6 @@ #ifndef __SYSCTL_API_PRIV_H__ #define __SYSCTL_API_PRIV_H__ -extern void * passive_sysctl_listener(void *arg); +extern void * uinet_host_sysctl_listener_thread(void *arg); #endif diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index e47a720..a18d223 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -71,3 +71,4 @@ uinet_synfilter_getl2info uinet_synfilter_install uinet_sysctl uinet_sysctlbyname +uinet_host_sysctl_listener_thread diff --git a/bin/passive/sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c similarity index 99% rename from bin/passive/sysctl_api.c rename to lib/libuinet/uinet_host_sysctl_api.c index 2b4ad2a..01fb160 100644 --- a/bin/passive/sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -45,8 +45,8 @@ #include "uinet_api.h" #include "uinet_config.h" #include "nv.h" -#include "sysctl_api.h" -#include "sysctl_api_priv.h" +#include "uinet_host_sysctl_api.h" +#include "uinet_host_sysctl_api_priv.h" #define UINET_SYSCTL_DEBUG @@ -456,7 +456,7 @@ passive_sysctl_reqtype_oid(int ns, nvlist_t *nvl, struct u_sysctl_state_t *us) } void * -passive_sysctl_listener(void *arg) +uinet_host_sysctl_listener_thread(void *arg) { int s, r; struct sockaddr_un sun; From d0dc3945b893f100b54cfd13fc6b08001eaca589 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 2 May 2014 08:17:48 -0700 Subject: [PATCH 050/148] Shuffle all of the assert() things that use strings over to use UINET_ASSERT(). Trying to do this hack in clang just resorts in a lot of compiler warnings. --- lib/libev/ev.c | 73 +++++++++++++++++++++++---------------------- lib/libev/ev.h | 3 ++ lib/libev/ev_poll.c | 2 +- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/lib/libev/ev.c b/lib/libev/ev.c index d93dd85..39f4ca2 100644 --- a/lib/libev/ev.c +++ b/lib/libev/ev.c @@ -729,6 +729,8 @@ struct signalfd_siginfo #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE #endif +#include + /*****************************************************************************/ #if __cplusplus @@ -1800,7 +1802,7 @@ fd_reify (EV_P) { unsigned long arg; - assert (("libev: only socket fds supported in this configuration", ioctlsocket (handle, FIONREAD, &arg) == 0)); + UINET_ASSERT("libev: only socket fds supported in this configuration", ioctlsocket (handle, FIONREAD, &arg) == 0); /* handle changed, but fd didn't - we need to do it in two steps */ backend_modify (EV_A_ fd, anfd->events, 0); @@ -2511,7 +2513,8 @@ uinet_process_pending_list (EV_P_ ev_async *w_async, int revents) ev_uinet_ctx *soctx; ev_uinet *w; - assert(("libev: uinet_prev_pend list not empty", UINET_LIST_EMPTY (&uinet_prev_pend_head))); + UINET_ASSERT("libev: uinet_prev_pend list not empty", + UINET_LIST_EMPTY(&uinet_prev_pend_head)); pthread_mutex_lock (&uinet_pend_lock); UINET_LIST_SWAP (&uinet_pend_head, &uinet_prev_pend_head, ev_uinet_ctx, pend_list); @@ -3013,10 +3016,10 @@ ev_loop_new (unsigned int flags) EV_THROW static void noinline ecb_cold verify_watcher (EV_P_ W w) { - assert (("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI)); + UINET_ASSERT("libev: watcher has invalid priority", ABSPRI (w) >= 0 && ABSPRI (w) < NUMPRI); if (w->pending) - assert (("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w)); + UINET_ASSERT("libev: pending watcher not on pending queue", pendings [ABSPRI (w)][w->pending - 1].w == w); } static void noinline ecb_cold @@ -3026,9 +3029,9 @@ verify_heap (EV_P_ ANHE *heap, int N) for (i = HEAP0; i < N + HEAP0; ++i) { - assert (("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i)); - assert (("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i]))); - assert (("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i])))); + UINET_ASSERT("libev: active index mismatch in heap", ev_active (ANHE_w (heap [i])) == i); + UINET_ASSERT("libev: heap condition violated", i == HEAP0 || ANHE_at (heap [HPARENT (i)]) <= ANHE_at (heap [i])); + UINET_ASSERT("libev: heap at cache mismatch", ANHE_at (heap [i]) == ev_at (ANHE_w (heap [i]))); verify_watcher (EV_A_ (W)ANHE_w (heap [i])); } @@ -3039,7 +3042,7 @@ array_verify (EV_P_ W *ws, int cnt) { while (cnt--) { - assert (("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1)); + UINET_ASSERT("libev: active index mismatch", ev_active (ws [cnt]) == cnt + 1); verify_watcher (EV_A_ ws [cnt]); } } @@ -3057,7 +3060,7 @@ ev_verify (EV_P) EV_THROW assert (fdchangemax >= fdchangecnt); for (i = 0; i < fdchangecnt; ++i) - assert (("libev: negative fd in fdchanges", fdchanges [i] >= 0)); + UINET_ASSERT("libev: negative fd in fdchanges", fdchanges [i] >= 0); assert (anfdmax >= 0); for (i = 0; i < anfdmax; ++i) @@ -3070,12 +3073,12 @@ ev_verify (EV_P) EV_THROW if (j++ & 1) { - assert (("libev: io watcher list contains a loop", w != w2)); + UINET_ASSERT("libev: io watcher list contains a loop", w != w2); w2 = w2->next; } - assert (("libev: inactive fd watcher on anfd list", ev_active (w) == 1)); - assert (("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i)); + UINET_ASSERT("libev: inactive fd watcher on anfd list", ev_active (w) == 1); + UINET_ASSERT("libev: fd mismatch between watcher and anfd", ((ev_io *)w)->fd == i); } } @@ -3257,7 +3260,7 @@ timers_reify (EV_P) if (ev_at (w) < mn_now) ev_at (w) = mn_now; - assert (("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.)); + UINET_ASSERT("libev: negative ev_timer repeat value found while processing timers", w->repeat > 0.); ANHE_at_cache (timers [HEAP0]); downheap (timers, timercnt, HEAP0); @@ -3319,7 +3322,7 @@ periodics_reify (EV_P) { ev_at (w) = w->reschedule_cb (w, ev_rt_now); - assert (("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now)); + UINET_ASSERT("libev: ev_periodic reschedule callback returned time in the past", ev_at (w) >= ev_rt_now); ANHE_at_cache (periodics [HEAP0]); downheap (periodics, periodiccnt, HEAP0); @@ -3458,7 +3461,7 @@ ev_run (EV_P_ int flags) ++loop_depth; #endif - assert (("libev: ev_loop recursion during release detected", loop_done != EVBREAK_RECURSE)); + UINET_ASSERT("libev: ev_loop recursion during release detected", loop_done != EVBREAK_RECURSE); loop_done = EVBREAK_CANCEL; @@ -3579,7 +3582,7 @@ ev_run (EV_P_ int flags) ECB_MEMORY_FENCE_ACQUIRE; if (pipe_write_skipped) { - assert (("libev: pipe_w not active, but pipe not written", ev_is_active (&pipe_w))); + UINET_ASSERT("libev: pipe_w not active, but pipe not written", ev_is_active (&pipe_w)); ev_feed_event (EV_A_ &pipe_w, EV_CUSTOM); } @@ -3753,8 +3756,8 @@ ev_io_start (EV_P_ ev_io *w) EV_THROW if (expect_false (ev_is_active (w))) return; - assert (("libev: ev_io_start called with negative fd", fd >= 0)); - assert (("libev: ev_io_start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE)))); + UINET_ASSERT("libev: ev_io_start called with negative fd", fd >= 0); + UINET_ASSERT("libev: ev_io_start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE))); EV_FREQUENT_CHECK; @@ -3763,7 +3766,7 @@ ev_io_start (EV_P_ ev_io *w) EV_THROW wlist_add (&anfds[fd].head, (WL)w); /* common bug, apparently */ - assert (("libev: ev_io_start called with corrupted watcher", ((WL)w)->next != (WL)w)); + UINET_ASSERT("libev: ev_io_start called with corrupted watcher", ((WL)w)->next != (WL)w); fd_change (EV_A_ fd, w->events & EV__IOFDSET | EV_ANFD_REIFY); w->events &= ~EV__IOFDSET; @@ -3778,7 +3781,7 @@ ev_io_stop (EV_P_ ev_io *w) EV_THROW if (expect_false (!ev_is_active (w))) return; - assert (("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax)); + UINET_ASSERT("libev: ev_io_stop called with illegal fd (must stay constant after start!)", w->fd >= 0 && w->fd < anfdmax); EV_FREQUENT_CHECK; @@ -3798,7 +3801,7 @@ ev_timer_start (EV_P_ ev_timer *w) EV_THROW ev_at (w) += mn_now; - assert (("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.)); + UINET_ASSERT("libev: ev_timer_start called with negative timer repeat value", w->repeat >= 0.); EV_FREQUENT_CHECK; @@ -3826,7 +3829,7 @@ ev_timer_stop (EV_P_ ev_timer *w) EV_THROW { int active = ev_active (w); - assert (("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w)); + UINET_ASSERT("libev: internal timer heap corruption", ANHE_w (timers [active]) == (WT)w); --timercnt; @@ -3888,7 +3891,7 @@ ev_periodic_start (EV_P_ ev_periodic *w) EV_THROW ev_at (w) = w->reschedule_cb (w, ev_rt_now); else if (w->interval) { - assert (("libev: ev_periodic_start called with negative interval value", w->interval >= 0.)); + UINET_ASSERT("libev: ev_periodic_start called with negative interval value", w->interval >= 0.); periodic_recalc (EV_A_ w); } else @@ -3920,7 +3923,7 @@ ev_periodic_stop (EV_P_ ev_periodic *w) EV_THROW { int active = ev_active (w); - assert (("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w)); + UINET_ASSERT("libev: internal periodic heap corruption", ANHE_w (periodics [active]) == (WT)w); --periodiccnt; @@ -3957,11 +3960,11 @@ ev_signal_start (EV_P_ ev_signal *w) EV_THROW if (expect_false (ev_is_active (w))) return; - assert (("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG)); + UINET_ASSERT("libev: ev_signal_start called with illegal signal number", w->signum > 0 && w->signum < EV_NSIG); #if EV_MULTIPLICITY - assert (("libev: a signal must not be attached to two different loops", - !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop)); + UINET_ASSERT("libev: a signal must not be attached to two different loops", + !signals [w->signum - 1].loop || signals [w->signum - 1].loop == loop); signals [w->signum - 1].loop = EV_A; ECB_MEMORY_FENCE_RELEASE; @@ -4078,7 +4081,7 @@ void ev_child_start (EV_P_ ev_child *w) EV_THROW { #if EV_MULTIPLICITY - assert (("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr)); + UINET_ASSERT("libev: child watchers are only supported in the default loop", loop == ev_default_loop_ptr); #endif if (expect_false (ev_is_active (w))) return; @@ -4657,7 +4660,7 @@ ev_embed_start (EV_P_ ev_embed *w) EV_THROW { EV_P = w->other; - assert (("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ())); + UINET_ASSERT("libev: loop to be embedded is not embeddable", backend & ev_embeddable_backends ()); ev_io_init (&w->io, embed_io_cb, backend_fd, EV_READ); } @@ -4857,7 +4860,7 @@ ev_uinet_attach (struct uinet_socket *so) void ev_uinet_detach (struct ev_uinet_ctx *ctx) { - assert (("libev: detaching uinet ctx that is still in use", ctx->head == NULL)); + UINET_ASSERT("libev: detaching uinet ctx that is still in use", ctx->head == NULL); ev_free (ctx); } @@ -4873,9 +4876,9 @@ ev_uinet_start (EV_P_ ev_uinet *w) EV_THROW if (expect_false (ev_is_active (w))) return; - assert (("libev: ev_uinet_start called with NULL socket", NULL != so)); - assert (("libev: ev_uinet_start called with illegal event mask", !(w->events & ~(EV_READ | EV_WRITE)))); - assert (("libev: ev_uinet_start called with empty event mask", w->events & (EV_READ | EV_WRITE))); + UINET_ASSERT("libev: ev_uinet_start called with NULL socket", NULL != so); + UINET_ASSERT("libev: ev_uinet_start called with illegal event mask", !(w->events & ~(EV_READ | EV_WRITE))); + UINET_ASSERT("libev: ev_uinet_start called with empty event mask", w->events & (EV_READ | EV_WRITE)); EV_FREQUENT_CHECK; @@ -4980,8 +4983,8 @@ ev_uinet_stop (EV_P_ ev_uinet *w) EV_THROW */ UINET_LIST_REMOVE (soctx, pend_list); } else { - assert (("libev: uinet context neither pending nor inhibited in ev_uinet_stop", - soctx->pend_flags & EV_UINET_PENDING)); + UINET_ASSERT("libev: uinet context neither pending nor inhibited in ev_uinet_stop", + soctx->pend_flags & EV_UINET_PENDING); pthread_mutex_lock (&uinet_pend_lock); UINET_LIST_REMOVE (soctx, pend_list); diff --git a/lib/libev/ev.h b/lib/libev/ev.h index 60d67da..697603d 100644 --- a/lib/libev/ev.h +++ b/lib/libev/ev.h @@ -890,6 +890,9 @@ EV_API_DECL void ev_uinet_stop (EV_P_ ev_uinet *w) EV_THROW; #endif +/* XXX doesn't belong here */ +#define UINET_ASSERT(msg, expr) assert(expr) + EV_CPP(}) #endif diff --git a/lib/libev/ev_poll.c b/lib/libev/ev_poll.c index 4832351..1359631 100644 --- a/lib/libev/ev_poll.c +++ b/lib/libev/ev_poll.c @@ -107,7 +107,7 @@ poll_poll (EV_P_ ev_tstamp timeout) else for (p = polls; res; ++p) { - assert (("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt)); + UINET_ASSERT("libev: poll() returned illegal result, broken BSD kernel?", p < polls + pollcnt); if (expect_false (p->revents)) /* this expect is debatable */ { From 016f36d0cc0526b802e9b2d60d0f4d76b1c6c1a7 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 19:47:40 +0000 Subject: [PATCH 051/148] gmake-ify these things. --- lib/libhttp_parser/Makefile | 2 + lib/libnv/Makefile | 165 +++--------------------------------- 2 files changed, 13 insertions(+), 154 deletions(-) diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 4cd25db..3094f00 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -96,6 +96,8 @@ parsertrace_g: http_parser_g.o contrib/parsertrace.c tags: http_parser.c http_parser.h test.c ctags $^ +all: library package + clean: rm -f *.o *.a tags test test_fast test_g \ http_parser.tar libhttp_parser.so* \ diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile index c43f4f5..0af1827 100644 --- a/lib/libnv/Makefile +++ b/lib/libnv/Makefile @@ -1,161 +1,18 @@ -# $FreeBSD: head/lib/libnv/Makefile 258065 2013-11-12 19:39:14Z pjd $ -LIB= nv -#SHLIBDIR?= /lib -#SHLIB_MAJOR= 0 +TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/lib.mk -SRCS= dnvlist.c -SRCS+= msgio.c -SRCS+= nvlist.c -SRCS+= nvpair.c +LIB= nv -INCS= dnv.h -INCS+= nv.h +SRCS= dnvlist.c msgio.c nvlist.c nvpair.c +OBJS= dnvlist.o msgio.o nvlist.o nvpair.o +INCS= dnv.h nv.h -MAN+= nv.3 +all: libnv.a -MLINKS+=nv.3 libnv.3 \ - nv.3 nvlist.3 -MLINKS+=nv.3 nvlist_create.3 \ - nv.3 nvlist_destroy.3 \ - nv.3 nvlist_error.3 \ - nv.3 nvlist_empty.3 \ - nv.3 nvlist_clone.3 \ - nv.3 nvlist_dump.3 \ - nv.3 nvlist_fdump.3 \ - nv.3 nvlist_size.3 \ - nv.3 nvlist_pack.3 \ - nv.3 nvlist_unpack.3 \ - nv.3 nvlist_send.3 \ - nv.3 nvlist_recv.3 \ - nv.3 nvlist_xfer.3 \ - nv.3 nvlist_next.3 \ - nv.3 nvlist_exists.3 \ - nv.3 nvlist_exists_type.3 \ - nv.3 nvlist_exists_null.3 \ - nv.3 nvlist_exists_bool.3 \ - nv.3 nvlist_exists_number.3 \ - nv.3 nvlist_exists_string.3 \ - nv.3 nvlist_exists_nvlist.3 \ - nv.3 nvlist_exists_descriptor.3 \ - nv.3 nvlist_exists_binary.3 \ - nv.3 nvlist_add_null.3 \ - nv.3 nvlist_add_bool.3 \ - nv.3 nvlist_add_number.3 \ - nv.3 nvlist_add_string.3 \ - nv.3 nvlist_add_stringf.3 \ - nv.3 nvlist_add_stringv.3 \ - nv.3 nvlist_add_nvlist.3 \ - nv.3 nvlist_add_descriptor.3 \ - nv.3 nvlist_add_binary.3 \ - nv.3 nvlist_move_string.3 \ - nv.3 nvlist_move_nvlist.3 \ - nv.3 nvlist_move_descriptor.3 \ - nv.3 nvlist_move_binary.3 \ - nv.3 nvlist_get_bool.3 \ - nv.3 nvlist_get_number.3 \ - nv.3 nvlist_get_string.3 \ - nv.3 nvlist_get_nvlist.3 \ - nv.3 nvlist_get_descriptor.3 \ - nv.3 nvlist_get_binary.3 \ - nv.3 nvlist_take_bool.3 \ - nv.3 nvlist_take_number.3 \ - nv.3 nvlist_take_string.3 \ - nv.3 nvlist_take_nvlist.3 \ - nv.3 nvlist_take_descriptor.3 \ - nv.3 nvlist_take_binary.3 \ - nv.3 nvlist_free.3 \ - nv.3 nvlist_free_type.3 \ - nv.3 nvlist_free_null.3 \ - nv.3 nvlist_free_bool.3 \ - nv.3 nvlist_free_number.3 \ - nv.3 nvlist_free_string.3 \ - nv.3 nvlist_free_nvlist.3 \ - nv.3 nvlist_free_descriptor.3 \ - nv.3 nvlist_free_binary.3 -MLINKS+=nv.3 nvlist_existsf.3 \ - nv.3 nvlist_existsf_type.3 \ - nv.3 nvlist_existsf_null.3 \ - nv.3 nvlist_existsf_bool.3 \ - nv.3 nvlist_existsf_number.3 \ - nv.3 nvlist_existsf_string.3 \ - nv.3 nvlist_existsf_nvlist.3 \ - nv.3 nvlist_existsf_descriptor.3 \ - nv.3 nvlist_existsf_binary.3 \ - nv.3 nvlist_addf_null.3 \ - nv.3 nvlist_addf_bool.3 \ - nv.3 nvlist_addf_number.3 \ - nv.3 nvlist_addf_string.3 \ - nv.3 nvlist_addf_nvlist.3 \ - nv.3 nvlist_addf_descriptor.3 \ - nv.3 nvlist_addf_binary.3 \ - nv.3 nvlist_movef_string.3 \ - nv.3 nvlist_movef_nvlist.3 \ - nv.3 nvlist_movef_descriptor.3 \ - nv.3 nvlist_movef_binary.3 \ - nv.3 nvlist_getf_bool.3 \ - nv.3 nvlist_getf_number.3 \ - nv.3 nvlist_getf_string.3 \ - nv.3 nvlist_getf_nvlist.3 \ - nv.3 nvlist_getf_descriptor.3 \ - nv.3 nvlist_getf_binary.3 \ - nv.3 nvlist_takef_bool.3 \ - nv.3 nvlist_takef_number.3 \ - nv.3 nvlist_takef_string.3 \ - nv.3 nvlist_takef_nvlist.3 \ - nv.3 nvlist_takef_descriptor.3 \ - nv.3 nvlist_takef_binary.3 \ - nv.3 nvlist_freef.3 \ - nv.3 nvlist_freef_type.3 \ - nv.3 nvlist_freef_null.3 \ - nv.3 nvlist_freef_bool.3 \ - nv.3 nvlist_freef_number.3 \ - nv.3 nvlist_freef_string.3 \ - nv.3 nvlist_freef_nvlist.3 \ - nv.3 nvlist_freef_descriptor.3 \ - nv.3 nvlist_freef_binary.3 -MLINKS+=nv.3 nvlist_existsv.3 \ - nv.3 nvlist_existsv_type.3 \ - nv.3 nvlist_existsv_null.3 \ - nv.3 nvlist_existsv_bool.3 \ - nv.3 nvlist_existsv_number.3 \ - nv.3 nvlist_existsv_string.3 \ - nv.3 nvlist_existsv_nvlist.3 \ - nv.3 nvlist_existsv_descriptor.3 \ - nv.3 nvlist_existsv_binary.3 \ - nv.3 nvlist_addv_null.3 \ - nv.3 nvlist_addv_bool.3 \ - nv.3 nvlist_addv_number.3 \ - nv.3 nvlist_addv_string.3 \ - nv.3 nvlist_addv_nvlist.3 \ - nv.3 nvlist_addv_descriptor.3 \ - nv.3 nvlist_addv_binary.3 \ - nv.3 nvlist_movev_string.3 \ - nv.3 nvlist_movev_nvlist.3 \ - nv.3 nvlist_movev_descriptor.3 \ - nv.3 nvlist_movev_binary.3 \ - nv.3 nvlist_getv_bool.3 \ - nv.3 nvlist_getv_number.3 \ - nv.3 nvlist_getv_string.3 \ - nv.3 nvlist_getv_nvlist.3 \ - nv.3 nvlist_getv_descriptor.3 \ - nv.3 nvlist_getv_binary.3 \ - nv.3 nvlist_takev_bool.3 \ - nv.3 nvlist_takev_number.3 \ - nv.3 nvlist_takev_string.3 \ - nv.3 nvlist_takev_nvlist.3 \ - nv.3 nvlist_takev_descriptor.3 \ - nv.3 nvlist_takev_binary.3 \ - nv.3 nvlist_freef.3 \ - nv.3 nvlist_freev_type.3 \ - nv.3 nvlist_freev_null.3 \ - nv.3 nvlist_freev_bool.3 \ - nv.3 nvlist_freev_number.3 \ - nv.3 nvlist_freev_string.3 \ - nv.3 nvlist_freev_nvlist.3 \ - nv.3 nvlist_freev_descriptor.3 \ - nv.3 nvlist_freev_binary.3 +libnv.a: $(OBJS) + $(AR) -c -r libnv.a $(OBJS) -WARNS?= 6 +clean: + $(RM) $(OBJS) libnv.a -.include From 2e02209269f2ece8037086d2d085cde3a82ba075 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 19:47:52 +0000 Subject: [PATCH 052/148] Add a very basic wrapper makefile. It doesn't run configure in libev yet; that has to be done. --- lib/Makefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 lib/Makefile diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 0000000..177464a --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,12 @@ + +SUBDIRS=libuinet libev libnv libhttp_parser + +all: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done + +clean: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done + +install: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done + From afab3b2463f7189fd90c86a7292ac1b1f883b216 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 19:53:18 +0000 Subject: [PATCH 053/148] Add subdirectory targeting Makefiles with the relevant dumb hackery to get this stuff to build. Add 'default' and 'all' targets as appropriate. Now this stuff builds by 'make clean config all' Next up, an install target and some unified includes so things like destination directory and CFLAGS stuff can be configured at build time. --- Makefile | 15 +++++++++++++++ bin/Makefile | 15 +++++++++++++++ lib/Makefile | 4 ++++ lib/libuinet/Makefile | 2 ++ mk/prog.mk | 4 ++++ 5 files changed, 40 insertions(+) create mode 100644 Makefile create mode 100644 bin/Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a4a9bb1 --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ + +SUBDIRS=lib bin + +config: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) config ) ; done + +all: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done + +clean: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done + +install: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done + diff --git a/bin/Makefile b/bin/Makefile new file mode 100644 index 0000000..0ed9df5 --- /dev/null +++ b/bin/Makefile @@ -0,0 +1,15 @@ + +SUBDIRS=passive + +config: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) config ) ; done + +all: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done + +clean: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done + +install: + for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done + diff --git a/lib/Makefile b/lib/Makefile index 177464a..2308ade 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,6 +1,9 @@ SUBDIRS=libuinet libev libnv libhttp_parser +config: + (cd libev ; ./configure --with-uinet=../libuinet/api_include ) + all: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done @@ -10,3 +13,4 @@ clean: install: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done +default: config all diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 0070a22..2ee9c5b 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -299,6 +299,8 @@ HOST_SRCS = ${UINET_HOST_SRCS} OBJS+= $(patsubst %.c,%.o,${SRCS}) HOST_OBJS+= $(patsubst %.c,%.o,${HOST_SRCS}) +all: libuinet.a + # # The library is built by first incrementally linking all the object # to resolve internal references. Then, all symbols are made local. diff --git a/mk/prog.mk b/mk/prog.mk index 93d147a..99b172b 100644 --- a/mk/prog.mk +++ b/mk/prog.mk @@ -61,3 +61,7 @@ endif clean: @rm -f ${PROG} ${OBJS} +all: ${PROG} + + +config: From ab18429ff2714589f30d3adff921b674e42409a6 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 07:44:42 -0700 Subject: [PATCH 054/148] Add cflags.mk which has the debug flags for building. tie it into various places so it's consistently used. --- bin/passive/Makefile | 3 ++- cflags.mk | 1 + lib/Makefile | 7 ++++++- lib/libhttp_parser/Makefile | 12 +++++++++--- lib/libnv/Makefile | 3 +++ lib/libuinet/Makefile | 2 +- 6 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 cflags.mk diff --git a/bin/passive/Makefile b/bin/passive/Makefile index a0a475d..117e654 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -1,4 +1,5 @@ TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG=passive @@ -14,6 +15,6 @@ CFLAGS+= -I${TOPDIR}/lib/libhttp_parser -DENABLE_EXTRACT LDADD+= -L${TOPDIR}/lib/libhttp_parser -lhttp_parser -lz endif -DEBUG_FLAGS=-g -O0 +DEBUG_FLAGS=${DEBUG} include ${TOPDIR}/mk/prog.mk diff --git a/cflags.mk b/cflags.mk new file mode 100644 index 0000000..a2a5710 --- /dev/null +++ b/cflags.mk @@ -0,0 +1 @@ +DEBUG ?= -O -gdwarf-2 diff --git a/lib/Makefile b/lib/Makefile index 2308ade..e3b9ade 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,8 +1,13 @@ SUBDIRS=libuinet libev libnv libhttp_parser +TOPDIR?=${CURDIR}/.. +include ${TOPDIR}/cflags.mk + +default: all + config: - (cd libev ; ./configure --with-uinet=../libuinet/api_include ) + (cd libev ; env CFLAGS="${DEBUG}" ./configure --with-uinet=../libuinet/api_include ) all: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 3094f00..03adb31 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -22,19 +22,23 @@ PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"') SONAME ?= libhttp_parser.so.2.3 SONAME_GENERIC ?= libhttp_parser.so +TOPDIR?=$(CURDIR)/../.. +include $(TOPDIR)/cflags.mk + CC?=gcc AR?=ar CPPFLAGS += -I. + CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1 CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA) CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0 CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA) -CFLAGS += -Wall -Wextra -Werror +CFLAGS += -Wall -Wextra -Werror $(DEBUG) CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA) CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA) -CFLAGS_LIB = $(CFLAGS_FAST) -fPIC +CFLAGS_LIB = $(CFLAGS) -fPIC LDFLAGS_LIB = $(LDFLAGS) -shared @@ -72,7 +76,7 @@ test-valgrind: test_g valgrind ./test_g libhttp_parser.o: http_parser.c http_parser.h Makefile - $(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o + $(CC) $(CPPFLAGS) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o library: libhttp_parser.o $(CC) $(LDFLAGS_LIB) -o $(SONAME) $< @@ -96,6 +100,8 @@ parsertrace_g: http_parser_g.o contrib/parsertrace.c tags: http_parser.c http_parser.h test.c ctags $^ +default: all + all: library package clean: diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile index 0af1827..37d4bb8 100644 --- a/lib/libnv/Makefile +++ b/lib/libnv/Makefile @@ -1,6 +1,9 @@ TOPDIR?=${CURDIR}/../.. include ${TOPDIR}/lib.mk +include ${TOPDIR}/cflags.mk + +CFLAGS+= ${DEBUG} LIB= nv diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 2ee9c5b..49d1b66 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -2,7 +2,6 @@ # Derived from FreeBSD auto-generated kernel Makefile and # machine-specific Makefile templates # -DEBUG=-gdwarf-2 -O TOPDIR?=${CURDIR}/../.. S=${TOPDIR}/sys @@ -12,6 +11,7 @@ OVERRIDE_INCLUDES_ROOT:=${CURDIR}/override_include X86_INCLUDES=0 include ${TOPDIR}/mk/kern.pre.mk +include ${TOPDIR}/cflags.mk KERNPREINCLUDES:= ${INCLUDES} INCLUDES= -I${OVERRIDE_INCLUDES_ROOT} -I${API_INCLUDES_ROOT} ${KERNPREINCLUDES} From a05c4dd467eae733f7cd5f937fb73c7a92bc7a47 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 07:48:51 -0700 Subject: [PATCH 055/148] Don't use DEBUG, use DEBUG_FLAGS. Defining DEBUG is .. different. --- bin/passive/Makefile | 2 -- cflags.mk | 2 +- lib/Makefile | 2 +- lib/libhttp_parser/Makefile | 2 +- lib/libnv/Makefile | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 117e654..4acc298 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -15,6 +15,4 @@ CFLAGS+= -I${TOPDIR}/lib/libhttp_parser -DENABLE_EXTRACT LDADD+= -L${TOPDIR}/lib/libhttp_parser -lhttp_parser -lz endif -DEBUG_FLAGS=${DEBUG} - include ${TOPDIR}/mk/prog.mk diff --git a/cflags.mk b/cflags.mk index a2a5710..0bdc9c0 100644 --- a/cflags.mk +++ b/cflags.mk @@ -1 +1 @@ -DEBUG ?= -O -gdwarf-2 +DEBUG_FLAGS ?= -O -gdwarf-2 diff --git a/lib/Makefile b/lib/Makefile index e3b9ade..4ea78e9 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -7,7 +7,7 @@ include ${TOPDIR}/cflags.mk default: all config: - (cd libev ; env CFLAGS="${DEBUG}" ./configure --with-uinet=../libuinet/api_include ) + (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include ) all: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 03adb31..5c26ce9 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -35,7 +35,7 @@ CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA) CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0 CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA) -CFLAGS += -Wall -Wextra -Werror $(DEBUG) +CFLAGS += -Wall -Wextra -Werror $(DEBUG_FLAGS) CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA) CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA) CFLAGS_LIB = $(CFLAGS) -fPIC diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile index 37d4bb8..57fb0bb 100644 --- a/lib/libnv/Makefile +++ b/lib/libnv/Makefile @@ -3,7 +3,7 @@ TOPDIR?=${CURDIR}/../.. include ${TOPDIR}/lib.mk include ${TOPDIR}/cflags.mk -CFLAGS+= ${DEBUG} +CFLAGS+= ${DEBUG_FLAGS} LIB= nv From b53880756e10c54d5d4981c3357d9f579f487b42 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 07:49:25 -0700 Subject: [PATCH 056/148] DEBUG needs to be defined as part of the kernel build to get debug flags. --- lib/libuinet/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 49d1b66..94e5d6e 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -13,6 +13,8 @@ X86_INCLUDES=0 include ${TOPDIR}/mk/kern.pre.mk include ${TOPDIR}/cflags.mk +DEBUG=${DEBUG_FLAGS} + KERNPREINCLUDES:= ${INCLUDES} INCLUDES= -I${OVERRIDE_INCLUDES_ROOT} -I${API_INCLUDES_ROOT} ${KERNPREINCLUDES} INCLUDES+= -I./machine_include From 1432a11248d8ff4e0f671a8e692f77aea7afb2d8 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 7 May 2014 08:16:21 -0700 Subject: [PATCH 057/148] add some install targets everywhere. THe default is /usr/local/ but setting UINET_DESTDIR in the make environment will override this. --- cflags.mk | 8 ++++++++ lib/Makefile | 2 +- lib/libhttp_parser/Makefile | 6 ++++++ lib/libnv/Makefile | 6 ++++++ lib/libuinet/Makefile | 12 ++++++++++-- mk/prog.mk | 3 +++ 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/cflags.mk b/cflags.mk index 0bdc9c0..71d417b 100644 --- a/cflags.mk +++ b/cflags.mk @@ -1 +1,9 @@ DEBUG_FLAGS ?= -O -gdwarf-2 + +UINET_DESTDIR ?= /usr/local/ + +UINET_INSTALL ?= install +UINET_INSTALL_DIR ?= $(UINET_INSTALL) -m 0755 +UINET_INSTALL_LIB ?= $(UINET_INSTALL) -m 0644 +UINET_INSTALL_INC ?= $(UINET_INSTALL) -m 0644 +UINET_INSTALL_BIN ?= $(UINET_INSTALL) -m 0755 diff --git a/lib/Makefile b/lib/Makefile index 4ea78e9..5c5d344 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -7,7 +7,7 @@ include ${TOPDIR}/cflags.mk default: all config: - (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include ) + (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include --prefix="${UINET_DESTDIR}" --includedir="${UINET_DESTDIR}/include/libev" ) all: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 5c26ce9..8aac1fe 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -109,6 +109,12 @@ clean: http_parser.tar libhttp_parser.so* \ url_parser url_parser_g parsertrace parsertrace_g +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/http_parser + ${UINET_INSTALL_LIB} libhttp_parser.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_INC} http_parser.h ${UINET_DESTDIR}/include/http_parser + contrib/url_parser.c: http_parser.h contrib/parsertrace.c: http_parser.h diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile index 57fb0bb..1d6421b 100644 --- a/lib/libnv/Makefile +++ b/lib/libnv/Makefile @@ -19,3 +19,9 @@ libnv.a: $(OBJS) clean: $(RM) $(OBJS) libnv.a +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/nv + ${UINET_INSTALL_LIB} libnv.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_INC} dnv.h ${UINET_DESTDIR}/include/nv + ${UINET_INSTALL_INC} nv.h ${UINET_DESTDIR}/include/nv diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 94e5d6e..fe09491 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -357,7 +357,16 @@ machine_includes: cp -r $S/x86/include/* ${MACHINE_INCLUDES_ROOT}/x86; \ fi - +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_LIB} libuinet.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_INC} api_include/uinet_api.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_api_types.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_api_errno.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_config.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_host_sysctl_api.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_queue.h ${UINET_DESTDIR}/include/libuinet/ # # Distilled from FreeBSD src/sys/conf/kern.post.mk @@ -371,6 +380,5 @@ vnode_if_newproto.h: vnode_if_typedef.h: ${AWK} -f $S/tools/vnode_if.awk $S/kern/vnode_if.src -q - include ${TOPDIR}/mk/kern.mk diff --git a/mk/prog.mk b/mk/prog.mk index 99b172b..5b9d095 100644 --- a/mk/prog.mk +++ b/mk/prog.mk @@ -63,5 +63,8 @@ clean: all: ${PROG} +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/bin + ${UINET_INSTALL_BIN} ${PROG} ${UINET_DESTDIR}/bin config: From 7266fc11e56a761344035c7649ad354cbd1cf862 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 8 May 2014 05:27:27 -0700 Subject: [PATCH 058/148] Yes, we need this. --- lib/libuinet/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index fe09491..e1a006b 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -366,6 +366,7 @@ install: ${UINET_INSTALL_INC} api_include/uinet_api_errno.h ${UINET_DESTDIR}/include/libuinet/ ${UINET_INSTALL_INC} api_include/uinet_config.h ${UINET_DESTDIR}/include/libuinet/ ${UINET_INSTALL_INC} api_include/uinet_host_sysctl_api.h ${UINET_DESTDIR}/include/libuinet/ + ${UINET_INSTALL_INC} api_include/uinet_host_sysctl_api_priv.h ${UINET_DESTDIR}/include/libuinet/ ${UINET_INSTALL_INC} api_include/uinet_queue.h ${UINET_DESTDIR}/include/libuinet/ # From 0b43c5466eab6357e9f37849c680afc0e37f34ec Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 8 May 2014 06:26:55 -0700 Subject: [PATCH 059/148] Migrate all of the public libnv stuff to be called libuinetnv and uinet_nv*.h. This is so it doesn't clash with the libnv stuff on -HEAD. This doesn't rename all the symbols in the library; so linking both libnv and libuinetnv will cause drama. --- bin/passive/Makefile | 2 +- lib/Makefile | 2 +- lib/libnv/Makefile | 27 --------------------- lib/libuinet/Makefile | 2 +- lib/libuinet/uinet_host_sysctl_api.c | 2 +- lib/libuinetnv/Makefile | 25 +++++++++++++++++++ lib/{libnv => libuinetnv}/common_impl.h | 0 lib/{libnv => libuinetnv}/dnvlist.c | 5 ++-- lib/{libnv => libuinetnv}/msgio.c | 0 lib/{libnv => libuinetnv}/msgio.h | 0 lib/{libnv => libuinetnv}/nv.3 | 0 lib/{libnv => libuinetnv}/nv_impl.h | 0 lib/{libnv => libuinetnv}/nvlist.c | 2 +- lib/{libnv => libuinetnv}/nvlist_impl.h | 2 +- lib/{libnv => libuinetnv}/nvpair.c | 2 +- lib/{libnv => libuinetnv}/nvpair_impl.h | 2 +- lib/{libnv/dnv.h => libuinetnv/uinet_dnv.h} | 10 ++++---- lib/{libnv/nv.h => libuinetnv/uinet_nv.h} | 8 +++--- 18 files changed, 44 insertions(+), 47 deletions(-) delete mode 100644 lib/libnv/Makefile create mode 100644 lib/libuinetnv/Makefile rename lib/{libnv => libuinetnv}/common_impl.h (100%) rename lib/{libnv => libuinetnv}/dnvlist.c (99%) rename lib/{libnv => libuinetnv}/msgio.c (100%) rename lib/{libnv => libuinetnv}/msgio.h (100%) rename lib/{libnv => libuinetnv}/nv.3 (100%) rename lib/{libnv => libuinetnv}/nv_impl.h (100%) rename lib/{libnv => libuinetnv}/nvlist.c (99%) rename lib/{libnv => libuinetnv}/nvlist_impl.h (98%) rename lib/{libnv => libuinetnv}/nvpair.c (99%) rename lib/{libnv => libuinetnv}/nvpair_impl.h (99%) rename lib/{libnv/dnv.h => libuinetnv/uinet_dnv.h} (97%) rename lib/{libnv/nv.h => libuinetnv/uinet_nv.h} (99%) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 4acc298..2a940fe 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -5,7 +5,7 @@ PROG=passive SRCS=passive.c -UINET_LIBS=uinet nv +UINET_LIBS=uinet uinetnv CFLAGS= -I${TOPDIR}/lib/libev -I${TOPDIR}/lib/libnv LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap diff --git a/lib/Makefile b/lib/Makefile index 5c5d344..c970f2d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,5 @@ -SUBDIRS=libuinet libev libnv libhttp_parser +SUBDIRS=libuinet libev libuinetnv libhttp_parser TOPDIR?=${CURDIR}/.. include ${TOPDIR}/cflags.mk diff --git a/lib/libnv/Makefile b/lib/libnv/Makefile deleted file mode 100644 index 1d6421b..0000000 --- a/lib/libnv/Makefile +++ /dev/null @@ -1,27 +0,0 @@ - -TOPDIR?=${CURDIR}/../.. -include ${TOPDIR}/lib.mk -include ${TOPDIR}/cflags.mk - -CFLAGS+= ${DEBUG_FLAGS} - -LIB= nv - -SRCS= dnvlist.c msgio.c nvlist.c nvpair.c -OBJS= dnvlist.o msgio.o nvlist.o nvpair.o -INCS= dnv.h nv.h - -all: libnv.a - -libnv.a: $(OBJS) - $(AR) -c -r libnv.a $(OBJS) - -clean: - $(RM) $(OBJS) libnv.a - -install: - ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib - ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/nv - ${UINET_INSTALL_LIB} libnv.a ${UINET_DESTDIR}/lib - ${UINET_INSTALL_INC} dnv.h ${UINET_DESTDIR}/include/nv - ${UINET_INSTALL_INC} nv.h ${UINET_DESTDIR}/include/nv diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index e1a006b..693166f 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -21,7 +21,7 @@ INCLUDES+= -I./machine_include INCLUDES+= -I./opt # Include search path for files that only include host OS headers -HOST_INCLUDES= -I. -I${API_INCLUDES_ROOT} -I../libnv +HOST_INCLUDES= -I. -I${API_INCLUDES_ROOT} -I../libuinetnv ifdef NETMAP_INCLUDES HOST_INCLUDES+= -I${NETMAP_INCLUDES} diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index 01fb160..eabcf62 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -44,7 +44,7 @@ #include "uinet_api.h" #include "uinet_config.h" -#include "nv.h" +#include "uinet_nv.h" #include "uinet_host_sysctl_api.h" #include "uinet_host_sysctl_api_priv.h" diff --git a/lib/libuinetnv/Makefile b/lib/libuinetnv/Makefile new file mode 100644 index 0000000..f8074d6 --- /dev/null +++ b/lib/libuinetnv/Makefile @@ -0,0 +1,25 @@ + +TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/lib.mk +include ${TOPDIR}/cflags.mk + +CFLAGS+= ${DEBUG_FLAGS} + +SRCS= dnvlist.c msgio.c nvlist.c nvpair.c +OBJS= dnvlist.o msgio.o nvlist.o nvpair.o +INCS= uinet_dnv.h uinet_nv.h + +all: libuinetnv.a + +libuinetnv.a: $(OBJS) + $(AR) -c -r libuinetnv.a $(OBJS) + +clean: + $(RM) $(OBJS) libuinetnv.a + +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/nv + ${UINET_INSTALL_LIB} libuinetnv.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_INC} uinet_dnv.h ${UINET_DESTDIR}/include/nv + ${UINET_INSTALL_INC} uinet_nv.h ${UINET_DESTDIR}/include/nv diff --git a/lib/libnv/common_impl.h b/lib/libuinetnv/common_impl.h similarity index 100% rename from lib/libnv/common_impl.h rename to lib/libuinetnv/common_impl.h diff --git a/lib/libnv/dnvlist.c b/lib/libuinetnv/dnvlist.c similarity index 99% rename from lib/libnv/dnvlist.c rename to lib/libuinetnv/dnvlist.c index 97d02ee..dfa4073 100644 --- a/lib/libnv/dnvlist.c +++ b/lib/libuinetnv/dnvlist.c @@ -34,10 +34,9 @@ __FBSDID("$FreeBSD: head/lib/libnv/dnvlist.c 258065 2013-11-12 19:39:14Z pjd $") #include #include -#include "nv.h" +#include "uinet_nv.h" #include "nv_impl.h" - -#include "dnv.h" +#include "uinet_dnv.h" #define DNVLIST_GET(ftype, type) \ ftype \ diff --git a/lib/libnv/msgio.c b/lib/libuinetnv/msgio.c similarity index 100% rename from lib/libnv/msgio.c rename to lib/libuinetnv/msgio.c diff --git a/lib/libnv/msgio.h b/lib/libuinetnv/msgio.h similarity index 100% rename from lib/libnv/msgio.h rename to lib/libuinetnv/msgio.h diff --git a/lib/libnv/nv.3 b/lib/libuinetnv/nv.3 similarity index 100% rename from lib/libnv/nv.3 rename to lib/libuinetnv/nv.3 diff --git a/lib/libnv/nv_impl.h b/lib/libuinetnv/nv_impl.h similarity index 100% rename from lib/libnv/nv_impl.h rename to lib/libuinetnv/nv_impl.h diff --git a/lib/libnv/nvlist.c b/lib/libuinetnv/nvlist.c similarity index 99% rename from lib/libnv/nvlist.c rename to lib/libuinetnv/nvlist.c index f4e1d6f..32544ab 100644 --- a/lib/libnv/nvlist.c +++ b/lib/libuinetnv/nvlist.c @@ -50,7 +50,7 @@ __FBSDID("$FreeBSD: head/lib/libnv/nvlist.c 264021 2014-04-01 21:30:54Z jilles $ #endif #include "msgio.h" -#include "nv.h" +#include "uinet_nv.h" #include "nv_impl.h" #include "nvlist_impl.h" #include "nvpair_impl.h" diff --git a/lib/libnv/nvlist_impl.h b/lib/libuinetnv/nvlist_impl.h similarity index 98% rename from lib/libnv/nvlist_impl.h rename to lib/libuinetnv/nvlist_impl.h index 693b71c..30b2809 100644 --- a/lib/libnv/nvlist_impl.h +++ b/lib/libuinetnv/nvlist_impl.h @@ -34,7 +34,7 @@ #include -#include "nv.h" +#include "uinet_nv.h" void *nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep); nvlist_t *nvlist_xunpack(const void *buf, size_t size, const int *fds, diff --git a/lib/libnv/nvpair.c b/lib/libuinetnv/nvpair.c similarity index 99% rename from lib/libnv/nvpair.c rename to lib/libuinetnv/nvpair.c index 8415e8b..d7df165 100644 --- a/lib/libnv/nvpair.c +++ b/lib/libuinetnv/nvpair.c @@ -48,7 +48,7 @@ __FBSDID("$FreeBSD: head/lib/libnv/nvpair.c 258594 2013-11-25 20:45:30Z pjd $"); #endif #include "common_impl.h" -#include "nv.h" +#include "uinet_nv.h" #include "nv_impl.h" #include "nvlist_impl.h" #include "nvpair_impl.h" diff --git a/lib/libnv/nvpair_impl.h b/lib/libuinetnv/nvpair_impl.h similarity index 99% rename from lib/libnv/nvpair_impl.h rename to lib/libuinetnv/nvpair_impl.h index 64fcfde..d8d5d79 100644 --- a/lib/libnv/nvpair_impl.h +++ b/lib/libuinetnv/nvpair_impl.h @@ -36,7 +36,7 @@ #include -#include "nv.h" +#include "uinet_nv.h" TAILQ_HEAD(nvl_head, nvpair); diff --git a/lib/libnv/dnv.h b/lib/libuinetnv/uinet_dnv.h similarity index 97% rename from lib/libnv/dnv.h rename to lib/libuinetnv/uinet_dnv.h index c4ba65b..264d916 100644 --- a/lib/libnv/dnv.h +++ b/lib/libuinetnv/uinet_dnv.h @@ -29,8 +29,8 @@ * $FreeBSD: head/lib/libnv/dnv.h 258065 2013-11-12 19:39:14Z pjd $ */ -#ifndef _DNV_H_ -#define _DNV_H_ +#ifndef _UINET_DNV_H_ +#define _UINET_DNV_H_ #include @@ -38,8 +38,8 @@ #include #include -#ifndef _NVLIST_T_DECLARED -#define _NVLIST_T_DECLARED +#ifndef _UINET_NVLIST_T_DECLARED +#define _UINET_NVLIST_T_DECLARED struct nvlist; typedef struct nvlist nvlist_t; @@ -103,4 +103,4 @@ nvlist_t *dnvlist_takev_nvlist(nvlist_t *nvl, nvlist_t *defval, const char *name int dnvlist_takev_descriptor(nvlist_t *nvl, int defval, const char *namefmt, va_list nameap) __printflike(3, 0); void *dnvlist_takev_binary(nvlist_t *nvl, size_t *sizep, void *defval, size_t defsize, const char *namefmt, va_list nameap) __printflike(5, 0); -#endif /* !_DNV_H_ */ +#endif /* !_UINET_DNV_H_ */ diff --git a/lib/libnv/nv.h b/lib/libuinetnv/uinet_nv.h similarity index 99% rename from lib/libnv/nv.h rename to lib/libuinetnv/uinet_nv.h index 85718c5..ad09429 100644 --- a/lib/libnv/nv.h +++ b/lib/libuinetnv/uinet_nv.h @@ -29,8 +29,8 @@ * $FreeBSD: head/lib/libnv/nv.h 258065 2013-11-12 19:39:14Z pjd $ */ -#ifndef _NV_H_ -#define _NV_H_ +#ifndef _UINET_NV_H_ +#define _UINET_NV_H_ #include @@ -39,8 +39,8 @@ #include #include -#ifndef _NVLIST_T_DECLARED -#define _NVLIST_T_DECLARED +#ifndef _UINET_NVLIST_T_DECLARED +#define _UINET_NVLIST_T_DECLARED struct nvlist; typedef struct nvlist nvlist_t; From 8c99292e69c59428b290a569ff1623216636e208 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 8 May 2014 06:46:31 -0700 Subject: [PATCH 060/148] Don't build the shared library for libev. --- lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index c970f2d..ac91792 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -7,7 +7,7 @@ include ${TOPDIR}/cflags.mk default: all config: - (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include --prefix="${UINET_DESTDIR}" --includedir="${UINET_DESTDIR}/include/libev" ) + (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include --prefix="${UINET_DESTDIR}" --includedir="${UINET_DESTDIR}/include/libev" --enable-shared=no ) all: for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done From e3db2dd8c981ed85e220d36f60347dc1ab6055e4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 9 May 2014 08:02:45 -0700 Subject: [PATCH 061/148] Install the shared library too. --- lib/libhttp_parser/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 8aac1fe..427e939 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -113,6 +113,7 @@ install: ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/http_parser ${UINET_INSTALL_LIB} libhttp_parser.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_LIB} libhttp_parser.so.2.3 ${UINET_DESTDIR}/lib ${UINET_INSTALL_INC} http_parser.h ${UINET_DESTDIR}/include/http_parser contrib/url_parser.c: http_parser.h From d541f3a9ae1f7343e61abec6d25087502a0dbeb4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 13 May 2014 09:27:51 -0700 Subject: [PATCH 062/148] Add some more printf kernel calls, so they're used by the kernel code rather than the libc code. This is required for ether_sprintf(), as this uses %xD which is a FreeBSD extension. --- lib/libuinet/uinet_subr_prf.c | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/lib/libuinet/uinet_subr_prf.c b/lib/libuinet/uinet_subr_prf.c index d29e58d..66a6fc9 100644 --- a/lib/libuinet/uinet_subr_prf.c +++ b/lib/libuinet/uinet_subr_prf.c @@ -572,3 +572,62 @@ vprintf(const char *fmt, va_list ap) return (retval); } + +/* + * Scaled down version of sprintf(3). + */ +int +sprintf(char *buf, const char *cfmt, ...) +{ + int retval; + va_list ap; + + va_start(ap, cfmt); + retval = kvprintf(cfmt, NULL, (void *)buf, 10, ap); + buf[retval] = '\0'; + va_end(ap); + return (retval); +} + +static void +snprintf_func(int ch, void *arg) +{ + struct snprintf_arg *const info = arg; + + if (info->remain >= 2) { + *info->str++ = ch; + info->remain--; + } +} + +/* + * Scaled down version of vsnprintf(3). + */ +int +vsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + struct snprintf_arg info; + int retval; + + info.str = str; + info.remain = size; + retval = kvprintf(format, snprintf_func, &info, 10, ap); + if (info.remain >= 1) + *info.str++ = '\0'; + return (retval); +} + +/* + * Scaled down version of snprintf(3). + */ +int +snprintf(char *str, size_t size, const char *format, ...) +{ + int retval; + va_list ap; + + va_start(ap, format); + retval = vsnprintf(str, size, format, ap); + va_end(ap); + return(retval); +} From ec7ed0d2def028f35968567aa53dabe1a7a58fbd Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 13 May 2014 09:51:30 -0700 Subject: [PATCH 063/148] Revert this; Patrick fixed it in his tree. --- lib/libuinet/uinet_subr_prf.c | 59 ----------------------------------- 1 file changed, 59 deletions(-) diff --git a/lib/libuinet/uinet_subr_prf.c b/lib/libuinet/uinet_subr_prf.c index 66a6fc9..d29e58d 100644 --- a/lib/libuinet/uinet_subr_prf.c +++ b/lib/libuinet/uinet_subr_prf.c @@ -572,62 +572,3 @@ vprintf(const char *fmt, va_list ap) return (retval); } - -/* - * Scaled down version of sprintf(3). - */ -int -sprintf(char *buf, const char *cfmt, ...) -{ - int retval; - va_list ap; - - va_start(ap, cfmt); - retval = kvprintf(cfmt, NULL, (void *)buf, 10, ap); - buf[retval] = '\0'; - va_end(ap); - return (retval); -} - -static void -snprintf_func(int ch, void *arg) -{ - struct snprintf_arg *const info = arg; - - if (info->remain >= 2) { - *info->str++ = ch; - info->remain--; - } -} - -/* - * Scaled down version of vsnprintf(3). - */ -int -vsnprintf(char *str, size_t size, const char *format, va_list ap) -{ - struct snprintf_arg info; - int retval; - - info.str = str; - info.remain = size; - retval = kvprintf(format, snprintf_func, &info, 10, ap); - if (info.remain >= 1) - *info.str++ = '\0'; - return (retval); -} - -/* - * Scaled down version of snprintf(3). - */ -int -snprintf(char *str, size_t size, const char *format, ...) -{ - int retval; - va_list ap; - - va_start(ap, format); - retval = vsnprintf(str, size, format, ap); - va_end(ap); - return(retval); -} From 9a1ec318d0e6ecc82c5249c19de799cef051e42e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 13 May 2014 12:01:57 -0700 Subject: [PATCH 064/148] Add an evil, evil hack to register a userland hook via pfil on AF_INET. pfil is the freebsd kernel packet filter API. It ties into a few locations and allows for link-layer, ipv4 and ipv6 packet interception and manipulation. Here I'm using it as a poor mans bpf in order to sneak a peek at ip frames. So: * add an opaque type (uinet_mbuf) that userland sees as the mbuf; * and a couple of hacky accessor macros; * then add a function to register an ipv4 pfil hook; * .. said hook also exposes the L2 information. It's totally dirty but good enough for what I need at work. --- lib/libuinet/api_include/uinet_api.h | 3 + lib/libuinet/api_include/uinet_api_types.h | 4 + lib/libuinet/uinet_api.c | 112 +++++++++++++++++++++ lib/libuinet/uinet_api.symlist | 3 + 4 files changed, 122 insertions(+) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index 6638436..a295c9c 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -122,7 +122,10 @@ uinet_synf_deferral_t uinet_synfilter_deferral_alloc(struct uinet_socket *so, ui int uinet_synfilter_deferral_deliver(struct uinet_socket *so, uinet_synf_deferral_t deferral, int decision); void uinet_synfilter_deferral_free(uinet_synf_deferral_t deferral); uinet_api_synfilter_cookie_t uinet_synfilter_deferral_get_cookie(uinet_synf_deferral_t deferral); +int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg); +const char * uinet_mbuf_data(const struct uinet_mbuf *); +size_t uinet_mbuf_len(const struct uinet_mbuf *); #ifdef __cplusplus } diff --git a/lib/libuinet/api_include/uinet_api_types.h b/lib/libuinet/api_include/uinet_api_types.h index d0ee3ac..b8a204f 100644 --- a/lib/libuinet/api_include/uinet_api_types.h +++ b/lib/libuinet/api_include/uinet_api_types.h @@ -30,9 +30,13 @@ #define UINET_IF_NAMESIZE 16 struct uinet_socket; +struct uinet_mbuf; +struct uinet_in_l2info; typedef void * uinet_api_synfilter_cookie_t; +typedef void (*uinet_pfil_cb_t)(const struct uinet_mbuf *m, struct uinet_in_l2info *l2i); + typedef int (*uinet_api_synfilter_callback_t)(struct uinet_socket *, void *, uinet_api_synfilter_cookie_t); struct uinet_api_synfilter_ctx { diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 37c2c38..ee44ee7 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -39,9 +39,11 @@ #include #include +#include #include #include #include +#include #include "uinet_api.h" #include "uinet_config_internal.h" @@ -1217,5 +1219,115 @@ uinet_sysctl(int *name, u_int namelen, void *oldp, size_t *oldplen, return (error); } +/* + * XXX static callback sucks, but it's what I have to go on. + */ +static uinet_pfil_cb_t g_uinet_pfil_cb = NULL; +static void * g_uinet_pfil_cbdata = NULL; + +/* + * Hook for processing IPv4 frames. + */ +static int +uinet_pfil_in_hook_v4(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, + struct inpcb *inp) +{ + struct ifl2info *l2i_tag; + struct uinet_in_l2info uinet_l2i; + + /* + * No hook? Turf out. + */ + if (g_uinet_pfil_cb == NULL) + return (0); + + /* + * See if there's L2 information for this frame. + */ + l2i_tag = (struct ifl2info *)m_tag_locate(*m, + MTAG_PROMISCINET, + MTAG_PROMISCINET_L2INFO, + NULL); + +#if 0 + if (l2i_tag == NULL) { + printf("%s: no L2 information\n", + __func__); + } else { + printf("%s: src=%s", + __func__, + ether_sprintf(l2i_tag->ifl2i_info.inl2i_local_addr)); + printf(" dst=%s\n", + ether_sprintf(l2i_tag->ifl2i_info.inl2i_foreign_addr)); + } +#endif + + /* + * Populate the libuinet L2 header type + * + * XXX this should be a method! + */ + memcpy(&uinet_l2i, &l2i_tag->ifl2i_info, sizeof(uinet_l2i)); + /* + * Call our callback to process the frame + */ + g_uinet_pfil_cb((const struct uinet_mbuf *) *m, &uinet_l2i); + /* Pass all for now */ + return (0); +} + +/* + * XXX test hack to play with pfil + */ +int +uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg) +{ + int error; + VNET_ITERATOR_DECL(vnet_iter); + struct pfil_head *pfh; + + if (g_uinet_pfil_cb != NULL) { + printf("%s: callback already registered!\n", __func__); + return (-1); + } + + g_uinet_pfil_cb = cb; + g_uinet_pfil_cbdata = arg; + + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + /* XXX TODO: ipv6 */ + pfh = pfil_head_get(PFIL_TYPE_AF, AF_INET); + error = pfil_add_hook(uinet_pfil_in_hook_v4, NULL, + PFIL_IN | PFIL_WAITOK, pfh); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + return (0); +} + +/* + * Get a pointer to the given mbuf data. + * + * This only grabs the pointer to this first mbuf; not the whole + * chain worth of data. That's a different API (which likely should + * be implemented at some point.) + */ +const char * +uinet_mbuf_data(const struct uinet_mbuf *m) +{ + const struct mbuf *mb = (const struct mbuf *) m; + + return mtod(mb, const char *); +} + +size_t +uinet_mbuf_len(const struct uinet_mbuf *m) +{ + const struct mbuf *mb = (const struct mbuf *) m; + + return (mb->m_len); +} diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index a18d223..bff5d0d 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -72,3 +72,6 @@ uinet_synfilter_install uinet_sysctl uinet_sysctlbyname uinet_host_sysctl_listener_thread +uinet_register_pfil_in +uinet_mbuf_data +uinet_mbuf_len From 00cd13a3fbdb67abfb7f63c3550545f0006f0349 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 13 May 2014 12:06:21 -0700 Subject: [PATCH 065/148] .. and whislt I'm here, don't provide l2 information if it's not present. It's only present for promisc sniffed frames. (and don't deref the NULL pointer either.) --- lib/libuinet/uinet_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index ee44ee7..64f6cb9 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1267,12 +1267,14 @@ uinet_pfil_in_hook_v4(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * * XXX this should be a method! */ - memcpy(&uinet_l2i, &l2i_tag->ifl2i_info, sizeof(uinet_l2i)); + if (l2i_tag != NULL) + memcpy(&uinet_l2i, &l2i_tag->ifl2i_info, sizeof(uinet_l2i)); /* * Call our callback to process the frame */ - g_uinet_pfil_cb((const struct uinet_mbuf *) *m, &uinet_l2i); + g_uinet_pfil_cb((const struct uinet_mbuf *) *m, + l2i_tag != NULL ? &uinet_l2i : NULL); /* Pass all for now */ return (0); From b63489d9f00954e52c16f203114b241ca5d11b1b Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 29 May 2014 10:14:48 -0700 Subject: [PATCH 066/148] Flesh out the very basic framework for a libuinet 2-interface bridge. --- lib/libuinet/Makefile | 3 +- lib/libuinet/api_include/uinet_config.h | 3 +- lib/libuinet/uinet_config.c | 7 ++ lib/libuinet/uinet_if_bridge.c | 154 ++++++++++++++++++++++++ lib/libuinet/uinet_if_bridge.h | 7 ++ 5 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 lib/libuinet/uinet_if_bridge.c create mode 100644 lib/libuinet/uinet_if_bridge.h diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 693166f..2021eb1 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -145,7 +145,8 @@ UINET_SRCS+= \ uinet_vm_glue.c \ uinet_vm_kern.c \ uinet_vm_meter.c \ - uinet_vm_object.c + uinet_vm_object.c \ + uinet_if_bridge.c ifneq (${HOST_OS},Darwin) UINET_SRCS+= uinet_if_netmap.c diff --git a/lib/libuinet/api_include/uinet_config.h b/lib/libuinet/api_include/uinet_config.h index a9d4554..2196376 100644 --- a/lib/libuinet/api_include/uinet_config.h +++ b/lib/libuinet/api_include/uinet_config.h @@ -43,7 +43,8 @@ typedef enum { typedef enum { UINET_IFTYPE_LOOPBACK, UINET_IFTYPE_NETMAP, - UINET_IFTYPE_PCAP + UINET_IFTYPE_PCAP, + UINET_IFTYPE_BRIDGE } uinet_iftype_t; diff --git a/lib/libuinet/uinet_config.c b/lib/libuinet/uinet_config.c index d8aeb37..9b4def7 100644 --- a/lib/libuinet/uinet_config.c +++ b/lib/libuinet/uinet_config.c @@ -34,6 +34,7 @@ #include "uinet_config_internal.h" #include "uinet_if_netmap.h" #include "uinet_if_pcap.h" +#include "uinet_if_bridge.h" static TAILQ_HEAD(config_head, uinet_config_if) if_conf = TAILQ_HEAD_INITIALIZER(if_conf); @@ -136,6 +137,9 @@ uinet_ifcreate(uinet_iftype_t type, const char *configstr, const char *alias, case UINET_IFTYPE_PCAP: error = if_pcap_attach(cfg); break; + case UINET_IFTYPE_BRIDGE: + error = if_bridge_attach(cfg); + break; default: printf("Error attaching interface with config %s: unknown interface type %d\n", cfg->configstr, cfg->type); error = ENXIO; @@ -179,6 +183,9 @@ uinet_ifdestroy(uinet_ifcookie_t cookie) case UINET_IFTYPE_PCAP: error = if_pcap_detach(cfg); break; + case UINET_IFTYPE_BRIDGE: + error = if_bridge_detach(cfg); + break; default: printf("Error detaching interface %s: unknown interface type %d\n", cfg->name, cfg->type); error = ENXIO; diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c new file mode 100644 index 0000000..40d9076 --- /dev/null +++ b/lib/libuinet/uinet_if_bridge.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2014 Adrian Chadd, Norse Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "uinet_config_internal.h" +#include "uinet_host_interface.h" +#include "uinet_if_bridge.h" + +/* + * This implements a two-port, transparent bridge interface. + * + * It's designed to just be a dumb conduit between two other + * physical interfaces. It's not supposed to be a fully fledged + * bridge. + */ + +extern struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); +extern int (*bridge_output_p)(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); + +struct if_bridge_softc { + struct ifnet *ifp; + const struct uinet_config_if *cfg; + uint8_t addr[ETHER_ADDR_LEN]; + + /* XXX TODO: more useful state? */ +}; + +/* + * Process an incoming frame. This gets called + * from the child device ether_input path. + */ +static struct mbuf * +if_bridge_input(struct ifnet *ifp, struct mbuf *m) +{ + + /* XXX for now, consume */ + m_freem(m); + return (NULL); +} + +/* + */ +static int +if_bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + + /* For now, we consume the frame */ + m_freem(m); + return (0); +} + +int +if_bridge_attach(struct uinet_config_if *cfg) +{ + struct if_bridge_softc *sc = NULL; + int error = 0; + + if (NULL == cfg->configstr) { + error = EINVAL; + goto fail; + } + + printf("%s: configstr=%s\n", __func__, cfg->configstr); + + /* + * The ethernet path has a bunch of hard-coded + * bridge function pointers for whatever implements + * bridging. It's a hack, but in order to get + * frames and link status changes, we need to + * also do the same. + */ + bridge_input_p = if_bridge_input; + bridge_output_p = if_bridge_output; + + /* + * Setup local MAC address from configuration. + */ + + /* + * Allocate netif context. + */ + + /* + * Setup netif methods. + */ + + /* + * Add the given child interfaces to the bridge. + */ + + /* + * Link uinet cfg state back to the newly setup ifnet. + */ + + return (0); + +fail: + /* XXX TODO: deregister child interfaces */ + if (sc) + free(sc, M_DEVBUF); + return (error); + +} + +int +if_bridge_detach(struct uinet_config_if *cfg) +{ + /* XXX TODO */ + return (0); +} diff --git a/lib/libuinet/uinet_if_bridge.h b/lib/libuinet/uinet_if_bridge.h new file mode 100644 index 0000000..9755730 --- /dev/null +++ b/lib/libuinet/uinet_if_bridge.h @@ -0,0 +1,7 @@ +#ifndef __UINET_IF_BRIDGE_H__ +#define __UINET_IF_BRIDGE_H__ + +extern int if_bridge_attach(struct uinet_config_if *cfg); +extern int if_bridge_detach(struct uinet_config_if *cfg); + +#endif /* __UINET_IF_BRIDGE_H__ */ From f2cca7e061ac25bd515ba558afd60aa8c120d00a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 30 May 2014 07:40:41 -0700 Subject: [PATCH 067/148] Start fleshing out the rest of the bridge interface framework. Still untested! --- lib/libuinet/uinet_if_bridge.c | 102 ++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 40d9076..bcf572d 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -61,13 +61,15 @@ extern int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); struct if_bridge_softc { - struct ifnet *ifp; + struct ifnet *sc_ifp; const struct uinet_config_if *cfg; uint8_t addr[ETHER_ADDR_LEN]; /* XXX TODO: more useful state? */ }; +static int bridge_if_count = 0; + /* * Process an incoming frame. This gets called * from the child device ether_input path. @@ -93,6 +95,59 @@ if_bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, return (0); } +static void +if_bridge_init(void *arg) +{ + struct if_bridge_softc *sc = arg; + struct ifnet *ifp = sc->sc_ifp; + + ifp->if_drv_flags = IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; +} + +static void +if_bridge_stop(struct if_bridge_softc *sc) +{ + struct ifnet *ifp = sc->sc_ifp; + + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE); +} + +static int +if_bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + int error = 0; + struct if_bridge_softc *sc = ifp->if_softc; + + switch (cmd) { + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + if_bridge_init(sc); + else if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if_bridge_stop(sc); + break; + default: + error = ether_ioctl(ifp, cmd, data); + break; + } + return (error); +} + +static int +if_bridge_transmit(struct ifnet *ifp, struct mbuf *m) +{ + + /* XXX for now, free */ + m_freem(m); + return (ENOBUFS); +} + +static void +if_bridge_qflush(struct ifnet *ifp) +{ + +} + int if_bridge_attach(struct uinet_config_if *cfg) { @@ -106,6 +161,19 @@ if_bridge_attach(struct uinet_config_if *cfg) printf("%s: configstr=%s\n", __func__, cfg->configstr); + sc = malloc(sizeof(struct if_bridge_softc), M_DEVBUF, M_WAITOK); + if (sc == NULL) { + printf("%s: malloc failed\n", __func__); + error = ENOMEM; + goto fail; + } + + /* Set the interface name */ + snprintf(cfg->name, sizeof(cfg->name), "bridge%u", bridge_if_count); + bridge_if_count++; + + sc->cfg = cfg; + /* * The ethernet path has a bunch of hard-coded * bridge function pointers for whatever implements @@ -118,15 +186,45 @@ if_bridge_attach(struct uinet_config_if *cfg) /* * Setup local MAC address from configuration. + * XXX TODO */ + sc->addr[0] = 0x62; + sc->addr[1] = 0x73; + sc->addr[2] = 0x64; + sc->addr[3] = arc4random(); + sc->addr[4] = arc4random(); + sc->addr[5] = arc4random(); /* * Allocate netif context. */ + sc->sc_ifp = if_alloc(IFT_ETHER); + if (sc->sc_ifp == NULL) { + printf("%s: if_alloc failed", __func__); + error = ENOMEM; + goto fail; + } + sc->sc_ifp->if_softc = sc; + + /* + * Setup basic flags and such. + */ + if_initname(sc->sc_ifp, sc->cfg->name, IF_DUNIT_NONE); + sc->sc_ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; /* * Setup netif methods. */ + sc->sc_ifp->if_init = if_bridge_init; + sc->sc_ifp->if_ioctl = if_bridge_ioctl; + sc->sc_ifp->if_transmit = if_bridge_transmit; + sc->sc_ifp->if_qflush = if_bridge_qflush; + + sc->sc_ifp->if_fib = sc->cfg->cdom; + + /* Set local MAC now */ + ether_ifattach(sc->sc_ifp, sc->addr); + sc->sc_ifp->if_capabilities = sc->sc_ifp->if_capenable = 0; /* * Add the given child interfaces to the bridge. @@ -140,6 +238,8 @@ if_bridge_attach(struct uinet_config_if *cfg) fail: /* XXX TODO: deregister child interfaces */ + if (sc && sc->sc_ifp) + if_free(sc->sc_ifp); if (sc) free(sc, M_DEVBUF); return (error); From 0f169d14f9748977861aea5b4d07d62eccf22dc9 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 30 May 2014 23:56:56 +0000 Subject: [PATCH 068/148] Flesh out enough code to add things to the bridge list. Also print out when things are received. This doesn't yet forward the frame upward - that's next. --- lib/libuinet/uinet_if_bridge.c | 117 +++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index bcf572d..d97b64d 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -60,11 +60,21 @@ extern struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); extern int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); +struct if_bridge_member; + +struct if_bridge_member { + LIST_ENTRY(if_bridge_member) bif_next; + struct ifnet *ifp; +}; + struct if_bridge_softc { struct ifnet *sc_ifp; const struct uinet_config_if *cfg; uint8_t addr[ETHER_ADDR_LEN]; + struct mtx sc_mtx; + LIST_HEAD(, if_bridge_member) sc_iflist; /* member interface list */ + /* XXX TODO: more useful state? */ }; @@ -77,6 +87,11 @@ static int bridge_if_count = 0; static struct mbuf * if_bridge_input(struct ifnet *ifp, struct mbuf *m) { + struct if_bridge_softc *sc; + + sc = ifp->if_bridge; + + printf("%s: m=%p: called\n", __func__, m); /* XXX for now, consume */ m_freem(m); @@ -148,6 +163,96 @@ if_bridge_qflush(struct ifnet *ifp) } +static int +if_bridge_existsm_locked(struct if_bridge_softc *sc, struct ifnet *nifp) +{ + struct if_bridge_member *bif; + + /* XXX assert locked */ + LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->ifp == nifp) + return (1); + } + return (0); +} + +static int +if_bridge_addm(struct if_bridge_softc *sc, const char *ifname) +{ + struct ifnet *nifp = NULL; + struct if_bridge_member *bif; + int error = 0; + + /* Do lookup */ + nifp = ifunit_ref(ifname); + if (nifp == NULL) { + printf("%s: '%s' not found\n", + __func__, + ifname); + return (ENOENT); + } + + mtx_lock(&sc->sc_mtx); + + /* See if this exists. Don't double-add */ + if (if_bridge_existsm_locked(sc, nifp)) { + printf("%s: '%s' already is in this bridge\n", + __func__, + ifname); + error = EINVAL; + goto fail; + } + + /* Is it a member of ANY bridge? */ + if (nifp->if_bridge != NULL) { + printf("%s: '%s' is already in _a_ bridge\n", + __func__, + ifname); + error = EBUSY; + goto fail; + } + + /* Allocate bridge-member entry, add to list */ + bif = malloc(sizeof(struct if_bridge_member), M_DEVBUF, M_NOWAIT); + if (bif == NULL) { + printf("%s: failed to malloc", __func__); + error = ENOMEM; + goto fail; + } + + /* Add to list; link back from the ifnet to the parent bridge */ + bif->ifp = nifp; + LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); + nifp->if_bridge = sc; + + mtx_unlock(&sc->sc_mtx); + + /* Make promisc */ + error = ifpromisc(nifp, 1); + if (error != 0) { + mtx_lock(&sc->sc_mtx); + /* XXX methodize */ + LIST_REMOVE(bif, bif_next); + mtx_unlock(&sc->sc_mtx); + free(bif, M_DEVBUF); + printf("%s: '%s' couldn't make it promisc!\n", __func__, ifname); + error = EINVAL; + goto fail; + } + + printf("%s: added '%s' to bridge\n", __func__, ifname); + + /* Done! */ + return (0); +fail: + mtx_unlock(&sc->sc_mtx); + /* Free reference */ + if (nifp) + if_rele(nifp); + + return (error); +} + int if_bridge_attach(struct uinet_config_if *cfg) { @@ -211,6 +316,7 @@ if_bridge_attach(struct uinet_config_if *cfg) */ if_initname(sc->sc_ifp, sc->cfg->name, IF_DUNIT_NONE); sc->sc_ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + sc->sc_ifp->if_mtu = 1500; /* XXX verify! */ /* * Setup netif methods. @@ -226,13 +332,24 @@ if_bridge_attach(struct uinet_config_if *cfg) ether_ifattach(sc->sc_ifp, sc->addr); sc->sc_ifp->if_capabilities = sc->sc_ifp->if_capenable = 0; + /* Mutex protecting the bridge list */ + mtx_init(&sc->sc_mtx, "if_bridge", NULL, MTX_DEF); + + /* This is our list of child interfaces */ + LIST_INIT(&sc->sc_iflist); + /* * Add the given child interfaces to the bridge. + * (whilst also putting them into promisc mode.) */ + (void) if_bridge_addm(sc, "netmap0"); + (void) if_bridge_addm(sc, "netmap1"); /* * Link uinet cfg state back to the newly setup ifnet. */ + cfg->ifindex = sc->sc_ifp->if_index; + cfg->ifdata = sc; return (0); From 23c1fb819156885f1b8b27b29ba2a6e9de77f805 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 00:05:48 +0000 Subject: [PATCH 069/148] This is a totally untested path to push the intercepted packet up to the upper layer for processing. It's not yet duplicated and pushed out the other port just yet. I'll do that next. --- lib/libuinet/uinet_if_bridge.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index d97b64d..ca612a3 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -88,14 +88,34 @@ static struct mbuf * if_bridge_input(struct ifnet *ifp, struct mbuf *m) { struct if_bridge_softc *sc; + struct ifnet *bifp; + struct mbuf *mc2; sc = ifp->if_bridge; + bifp = sc->sc_ifp; printf("%s: m=%p: called\n", __func__, m); - /* XXX for now, consume */ - m_freem(m); - return (NULL); + /* + * XXX todo: hook in a packet lookup function to + * let bits of code decide if the packet should be + * bridged. + */ + + /* Duplicate; pass up to the stack */ + mc2 = m_dup(m, M_DONTWAIT); + if (mc2 != NULL) { + /* Keep the layer3 header aligned */ + int i = min(mc2->m_pkthdr.len, max_protohdr); + mc2 = m_copyup(mc2, i, ETHER_ALIGN); + } + if (mc2 != NULL) { + mc2->m_pkthdr.rcvif = bifp; + (*bifp->if_input)(bifp, mc2); + } + + /* Return the original packet for local processing. */ + return (m); } /* From 2502ca7500df67e2262ead8a9d8568e64256112a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 02:19:21 +0000 Subject: [PATCH 070/148] Untested hack to at least duplicate the mbuf and send it on its way to the other port(s). It's totally untested and totally inefficient but it'll do for prototyping. --- lib/libuinet/uinet_if_bridge.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index ca612a3..8a2c9d8 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -88,6 +88,7 @@ static struct mbuf * if_bridge_input(struct ifnet *ifp, struct mbuf *m) { struct if_bridge_softc *sc; + struct if_bridge_member *bif; struct ifnet *bifp; struct mbuf *mc2; @@ -102,8 +103,35 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) * bridged. */ + /* Duplicate; pass to the other port */ + /* + * XXX so I'm not grabbing a reference when transmitting; + * I'm just hoping that the interface isn't removed whilst + * actively transmitting. + */ + + /* + * XXX TODO: don't hold the lock across sending to the two + * (or more) ports - it's highly inefficient and effectively + * serialises transmit. We'll have to use the bridge XLOCK/ + * LOCK2REF/etc stuff to do this without holding a lock. + */ + mtx_lock(&sc->sc_mtx); + LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->ifp == ifp) + continue; + mc2 = m_dup(m, M_DONTWAIT); + /* XXX count failure */ + if (mc2 == NULL) + continue; + /* XXX count failure */ + (void) bif->ifp->if_transmit(bif->ifp, mc2); + } + mtx_unlock(&sc->sc_mtx); + /* Duplicate; pass up to the stack */ mc2 = m_dup(m, M_DONTWAIT); + /* XXX count failure */ if (mc2 != NULL) { /* Keep the layer3 header aligned */ int i = min(mc2->m_pkthdr.len, max_protohdr); @@ -125,6 +153,7 @@ if_bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, struct rtentry *rt) { + printf("%s: m=%p; called\n", __func__, m); /* For now, we consume the frame */ m_freem(m); return (0); @@ -172,6 +201,8 @@ static int if_bridge_transmit(struct ifnet *ifp, struct mbuf *m) { + printf("%s: m=%p; called\n", __func__, m); + /* XXX for now, free */ m_freem(m); return (ENOBUFS); From 291532aec100b4962c541456e42842870cb933d1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 04:38:38 +0000 Subject: [PATCH 071/148] Now that this is working, we don't need it. --- lib/libuinet/uinet_if_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 8a2c9d8..1f49d3c 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -95,7 +95,7 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) sc = ifp->if_bridge; bifp = sc->sc_ifp; - printf("%s: m=%p: called\n", __func__, m); +// printf("%s: m=%p: called\n", __func__, m); /* * XXX todo: hook in a packet lookup function to From f75c1cc1a5a02d5bc52c60cb566b9a75079f2575 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 04:38:53 +0000 Subject: [PATCH 072/148] now that I'm tinkering with bridging, the pfil hook ends up doing IP interception on both the incoming physical interface (netmap0/netmap1 depending upon direction) and bridge0. Now, to add annoyance to annoyance, IFF_PROMISCINET is only set on the interface we are doing promiscuous passive ip intereption on. In this instance it's bridge0. So the frames from netmapX don't have l2 information. So, extend the API a little more so i can tell it which ifnet to sniff for. Later on I'll re-extend it to pass everything up and filter on the userland code side. ight now though it's hard to do as the ifnet doesn't have access to the cfg bits in any generic way - ifnet->ifp_softc points to the interface specific type, not the generic config section. Anyway. This'll do for now. --- lib/libuinet/api_include/uinet_api.h | 2 +- lib/libuinet/uinet_api.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index a295c9c..26fd6a1 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -122,7 +122,7 @@ uinet_synf_deferral_t uinet_synfilter_deferral_alloc(struct uinet_socket *so, ui int uinet_synfilter_deferral_deliver(struct uinet_socket *so, uinet_synf_deferral_t deferral, int decision); void uinet_synfilter_deferral_free(uinet_synf_deferral_t deferral); uinet_api_synfilter_cookie_t uinet_synfilter_deferral_get_cookie(uinet_synf_deferral_t deferral); -int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg); +int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname); const char * uinet_mbuf_data(const struct uinet_mbuf *); size_t uinet_mbuf_len(const struct uinet_mbuf *); diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 64f6cb9..6be758d 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1224,6 +1224,7 @@ uinet_sysctl(int *name, u_int namelen, void *oldp, size_t *oldplen, */ static uinet_pfil_cb_t g_uinet_pfil_cb = NULL; static void * g_uinet_pfil_cbdata = NULL; +static struct ifnet *g_uinet_pfil_ifp = NULL; /* * Hook for processing IPv4 frames. @@ -1241,6 +1242,16 @@ uinet_pfil_in_hook_v4(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, if (g_uinet_pfil_cb == NULL) return (0); + /* + * Check if the ifp matches the ifp name we're interested in. + * When doing bridging we will see incoming frames for the + * physical incoming interface (eg netmap0, netmap1) and + * the bridge interface (bridge0). We may actually not want + * that. + */ + if (g_uinet_pfil_ifp && (g_uinet_pfil_ifp != ifp)) + return (0); + /* * See if there's L2 information for this frame. */ @@ -1284,7 +1295,7 @@ uinet_pfil_in_hook_v4(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, * XXX test hack to play with pfil */ int -uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg) +uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname) { int error; VNET_ITERATOR_DECL(vnet_iter); @@ -1298,6 +1309,11 @@ uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg) g_uinet_pfil_cb = cb; g_uinet_pfil_cbdata = arg; + /* Take a reference to the ifnet if we're interested in it */ + if (ifname != NULL) { + g_uinet_pfil_ifp = ifunit_ref(ifname); + } + VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); From 6eb63da1c753be903327354fd9f907390fceea08 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 06:13:09 +0000 Subject: [PATCH 073/148] Add in a basic config string parser. It handles the child interfaces and the MAC address in the config string from libuinet. --- lib/libuinet/uinet_if_bridge.c | 95 +++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 1f49d3c..9cfb155 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -70,7 +70,7 @@ struct if_bridge_member { struct if_bridge_softc { struct ifnet *sc_ifp; const struct uinet_config_if *cfg; - uint8_t addr[ETHER_ADDR_LEN]; + struct ether_addr sc_addr; struct mtx sc_mtx; LIST_HEAD(, if_bridge_member) sc_iflist; /* member interface list */ @@ -304,11 +304,31 @@ if_bridge_addm(struct if_bridge_softc *sc, const char *ifname) return (error); } + +static struct ether_addr * +i_ether_aton_r(const char *a, struct ether_addr *e) +{ + int i; + unsigned int o0, o1, o2, o3, o4, o5; + + i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); + if (i != 6) + return (NULL); + e->octet[0]=o0; + e->octet[1]=o1; + e->octet[2]=o2; + e->octet[3]=o3; + e->octet[4]=o4; + e->octet[5]=o5; + return (e); +} + int if_bridge_attach(struct uinet_config_if *cfg) { struct if_bridge_softc *sc = NULL; int error = 0; + char *cstr = NULL, *s; if (NULL == cfg->configstr) { error = EINVAL; @@ -316,6 +336,12 @@ if_bridge_attach(struct uinet_config_if *cfg) } printf("%s: configstr=%s\n", __func__, cfg->configstr); + cstr = strdup(cfg->configstr, M_TEMP); + if (cstr == NULL) { + printf("%s: strdup failed\n", __func__); + error = ENOMEM; + goto fail; + } sc = malloc(sizeof(struct if_bridge_softc), M_DEVBUF, M_WAITOK); if (sc == NULL) { @@ -341,15 +367,14 @@ if_bridge_attach(struct uinet_config_if *cfg) bridge_output_p = if_bridge_output; /* - * Setup local MAC address from configuration. - * XXX TODO + * Setup initial local MAC address - random. */ - sc->addr[0] = 0x62; - sc->addr[1] = 0x73; - sc->addr[2] = 0x64; - sc->addr[3] = arc4random(); - sc->addr[4] = arc4random(); - sc->addr[5] = arc4random(); + sc->sc_addr.octet[0] = 0x62; + sc->sc_addr.octet[1] = 0x73; + sc->sc_addr.octet[2] = 0x64; + sc->sc_addr.octet[3] = arc4random(); + sc->sc_addr.octet[4] = arc4random(); + sc->sc_addr.octet[5] = arc4random(); /* * Allocate netif context. @@ -379,22 +404,52 @@ if_bridge_attach(struct uinet_config_if *cfg) sc->sc_ifp->if_fib = sc->cfg->cdom; - /* Set local MAC now */ - ether_ifattach(sc->sc_ifp, sc->addr); - sc->sc_ifp->if_capabilities = sc->sc_ifp->if_capenable = 0; - /* Mutex protecting the bridge list */ mtx_init(&sc->sc_mtx, "if_bridge", NULL, MTX_DEF); /* This is our list of child interfaces */ LIST_INIT(&sc->sc_iflist); - /* - * Add the given child interfaces to the bridge. - * (whilst also putting them into promisc mode.) - */ - (void) if_bridge_addm(sc, "netmap0"); - (void) if_bridge_addm(sc, "netmap1"); + /* Parse the config string */ + while ( (s = strsep(&cstr, ",")) != NULL) { + char *ss, *a, *v; + struct ether_addr ea; + + ss = strdup(s, M_TEMP); + if (ss == NULL) { + printf("%s: strdup: failed\n", __func__); + error = ENOMEM; + goto fail; + } + a = strsep(&ss, "="); + v = strsep(&ss, "="); + if (a == NULL || v == NULL) { + printf("%s: invalid config chunk '%s'\n", __func__, s); + error = ENOMEM; + free(ss, M_TEMP); + goto fail; + } + + /* Now, handle the various options */ + if (strcmp(a, "if") == 0) { + /* XXX error check */ + (void) if_bridge_addm(sc, v); + } else if (strcmp(a, "mac") == 0) { + /* XXX TODO: no ether_aton_r() in the kernel */ + if (i_ether_aton_r(v, &ea) != NULL) { + sc->sc_addr = ea; + } + } else { + printf("%5s; unknown config option '%s'\n", __func__, a); + free(ss, M_TEMP); + goto fail; + } + free(ss, M_TEMP); + } + + /* Set local MAC now */ + ether_ifattach(sc->sc_ifp, sc->sc_addr.octet); + sc->sc_ifp->if_capabilities = sc->sc_ifp->if_capenable = 0; /* * Link uinet cfg state back to the newly setup ifnet. @@ -405,6 +460,8 @@ if_bridge_attach(struct uinet_config_if *cfg) return (0); fail: + if (cstr) + free(cstr, M_TEMP); /* XXX TODO: deregister child interfaces */ if (sc && sc->sc_ifp) if_free(sc->sc_ifp); From 1498a3cc536c170ab5dda82a5fe99cceb05d37ae Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 31 May 2014 06:28:01 +0000 Subject: [PATCH 074/148] Remove this; it's not required for our platforms and it's just fragmenting the mbufs. Ugh. --- lib/libuinet/uinet_if_bridge.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 9cfb155..f293f34 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -132,11 +132,6 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) /* Duplicate; pass up to the stack */ mc2 = m_dup(m, M_DONTWAIT); /* XXX count failure */ - if (mc2 != NULL) { - /* Keep the layer3 header aligned */ - int i = min(mc2->m_pkthdr.len, max_protohdr); - mc2 = m_copyup(mc2, i, ETHER_ALIGN); - } if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; (*bifp->if_input)(bifp, mc2); From dfcd95867c6df665f417e1692ade611a1ff2d33f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 2 Jun 2014 19:59:00 +0000 Subject: [PATCH 075/148] Add the ifnet to uinet_config_if. That way it can be used by other code wishing to do things like directly inject packets. --- lib/libuinet/uinet_config_internal.h | 3 ++- lib/libuinet/uinet_if_bridge.c | 1 + lib/libuinet/uinet_if_netmap.c | 1 + lib/libuinet/uinet_if_pcap.c | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_config_internal.h b/lib/libuinet/uinet_config_internal.h index d5eb89d..1d54faf 100644 --- a/lib/libuinet/uinet_config_internal.h +++ b/lib/libuinet/uinet_config_internal.h @@ -44,7 +44,8 @@ struct uinet_config_if { int cpu; unsigned int cdom; unsigned int ifindex; - void *ifdata; + void *ifdata; /* softc */ + void *ifp; /* ifnet */ }; diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index f293f34..b1ae018 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -451,6 +451,7 @@ if_bridge_attach(struct uinet_config_if *cfg) */ cfg->ifindex = sc->sc_ifp->if_index; cfg->ifdata = sc; + cfg->ifp = sc->sc_ifp; return (0); diff --git a/lib/libuinet/uinet_if_netmap.c b/lib/libuinet/uinet_if_netmap.c index 09a89a9..13b0005 100644 --- a/lib/libuinet/uinet_if_netmap.c +++ b/lib/libuinet/uinet_if_netmap.c @@ -387,6 +387,7 @@ if_netmap_attach(struct uinet_config_if *cfg) cfg->ifindex = sc->ifp->if_index; cfg->ifdata = sc; + cfg->ifp = sc->ifp; return (0); diff --git a/lib/libuinet/uinet_if_pcap.c b/lib/libuinet/uinet_if_pcap.c index cf9e3ea..acd55c9 100644 --- a/lib/libuinet/uinet_if_pcap.c +++ b/lib/libuinet/uinet_if_pcap.c @@ -160,6 +160,7 @@ if_pcap_attach(struct uinet_config_if *cfg) cfg->ifindex = sc->ifp->if_index; cfg->ifdata = sc; + cfg->ifp = sc->ifp; return (0); From 25fd466fa539a2605468e83a8cee52b1423451f4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 2 Jun 2014 19:59:19 +0000 Subject: [PATCH 076/148] Implement a hacky and not-even-yet-tested transmit method. --- lib/libuinet/api_include/uinet_api.h | 3 +++ lib/libuinet/uinet_api.c | 32 ++++++++++++++++++++++++++++ lib/libuinet/uinet_api.symlist | 1 + 3 files changed, 36 insertions(+) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index 26fd6a1..d7995f2 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -126,6 +126,9 @@ int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname); const char * uinet_mbuf_data(const struct uinet_mbuf *); size_t uinet_mbuf_len(const struct uinet_mbuf *); +/* XXX ew */ +struct uinet_config_if; +int uinet_if_xmit(struct uinet_config_if *cif, const char *buf, int len); #ifdef __cplusplus } diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 6be758d..72c32fd 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1349,3 +1349,35 @@ uinet_mbuf_len(const struct uinet_mbuf *m) return (mb->m_len); } + +/* + * Queue this buffer for transmit. + * + * The transmit path will take a copy of the data; it won't reference it. + * + * Returns 0 on OK, non-zero on error. + * + * Note: this reaches into kernel code, so you need to have set up all + * the possible transmit threads as uinet threads, or this call will + * fail. + */ +int +uinet_if_xmit(struct uinet_config_if *cif, const char *buf, int len) +{ + struct mbuf *m; + struct ifnet *ifp; + + /* Create mbuf; populate it with the given buffer */ + m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); + if (m == NULL) + return (ENOBUFS); + + if (! m_append(m, (size_t) len, (void *) buf)) { + m_freem(m); + return (ENOMEM); + } + + /* Call if_transmit() on the given interface */ + ifp = cif->ifp; + return ((ifp->if_transmit)(ifp, m)); +} diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index bff5d0d..81deafb 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -75,3 +75,4 @@ uinet_host_sysctl_listener_thread uinet_register_pfil_in uinet_mbuf_data uinet_mbuf_len +uinet_if_xmit From 216bb2a543be50f5d9a1a93b0be186bfa9c41559 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 2 Jun 2014 21:32:07 +0000 Subject: [PATCH 077/148] Modify the API to have an inside and outside interface tag. This lets us (eventually) ensure we're matching packets in the correct direction. --- lib/libuinet/uinet_if_bridge.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index b1ae018..9a4df95 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -65,6 +65,8 @@ struct if_bridge_member; struct if_bridge_member { LIST_ENTRY(if_bridge_member) bif_next; struct ifnet *ifp; + int is_inside; + int is_outside; }; struct if_bridge_softc { @@ -110,13 +112,18 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) * actively transmitting. */ + /* + * XXX TODO: send packets out the right interface(s). + * ie, from is_input? send to only is_output. + */ + /* * XXX TODO: don't hold the lock across sending to the two * (or more) ports - it's highly inefficient and effectively * serialises transmit. We'll have to use the bridge XLOCK/ * LOCK2REF/etc stuff to do this without holding a lock. */ - mtx_lock(&sc->sc_mtx); + mtx_lock(&sc->sc_mtx); LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->ifp == ifp) continue; @@ -223,7 +230,7 @@ if_bridge_existsm_locked(struct if_bridge_softc *sc, struct ifnet *nifp) } static int -if_bridge_addm(struct if_bridge_softc *sc, const char *ifname) +if_bridge_addm(struct if_bridge_softc *sc, const char *ifname, int isin) { struct ifnet *nifp = NULL; struct if_bridge_member *bif; @@ -286,7 +293,12 @@ if_bridge_addm(struct if_bridge_softc *sc, const char *ifname) goto fail; } - printf("%s: added '%s' to bridge\n", __func__, ifname); + printf("%s: added '%s' to bridge (dir %s)\n", __func__, ifname, + (isin ? "input" : "output")); + if (isin) + bif->is_inside = 1; + else + bif->is_outside = 1; /* Done! */ return (0); @@ -426,9 +438,12 @@ if_bridge_attach(struct uinet_config_if *cfg) } /* Now, handle the various options */ - if (strcmp(a, "if") == 0) { + if (strcmp(a, "ifin") == 0) { + /* XXX error check */ + (void) if_bridge_addm(sc, v, 1); + } else if (strcmp(a, "ifout") == 0) { /* XXX error check */ - (void) if_bridge_addm(sc, v); + (void) if_bridge_addm(sc, v, 0); } else if (strcmp(a, "mac") == 0) { /* XXX TODO: no ether_aton_r() in the kernel */ if (i_ether_aton_r(v, &ea) != NULL) { From ff7bcc708194d60cd78d1f7ec642ba099658e6c1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 9 Jun 2014 02:21:17 +0000 Subject: [PATCH 078/148] Turns out that we don't get access to the uinet_config_if from outside of the libuinet code itself - so just use the cookie provided by uinet_ifcreate(). Still mostly untested. --- lib/libuinet/api_include/uinet_api.h | 4 +--- lib/libuinet/uinet_api.c | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index d7995f2..569d430 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -126,9 +126,7 @@ int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname); const char * uinet_mbuf_data(const struct uinet_mbuf *); size_t uinet_mbuf_len(const struct uinet_mbuf *); -/* XXX ew */ -struct uinet_config_if; -int uinet_if_xmit(struct uinet_config_if *cif, const char *buf, int len); +int uinet_if_xmit(void *cookie, const char *buf, int len); #ifdef __cplusplus } diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 72c32fd..2bd9bf8 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1362,8 +1362,9 @@ uinet_mbuf_len(const struct uinet_mbuf *m) * fail. */ int -uinet_if_xmit(struct uinet_config_if *cif, const char *buf, int len) +uinet_if_xmit(void *cookie, const char *buf, int len) { + struct uinet_config_if *cif = cookie; struct mbuf *m; struct ifnet *ifp; From c2334cc24e11a46832845029a246560b1092bdd0 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 4 Jul 2014 15:15:17 -0700 Subject: [PATCH 079/148] Force this to make. --- bin/passive/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 2a940fe..92345dd 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -8,7 +8,7 @@ SRCS=passive.c UINET_LIBS=uinet uinetnv CFLAGS= -I${TOPDIR}/lib/libev -I${TOPDIR}/lib/libnv -LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap +LDADD= -L${UINET_DESTDIR}/lib/ ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap ifndef NO_EXTRACT CFLAGS+= -I${TOPDIR}/lib/libhttp_parser -DENABLE_EXTRACT From 8b6a4ab41b1c6b3b35dde6c51158b4abcc799174 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 4 Jul 2014 15:24:50 -0700 Subject: [PATCH 080/148] Add an initial hack to implement multi-span support. This interface implements a very simple aggregator. --- lib/libuinet/Makefile | 3 +- lib/libuinet/api_include/uinet_config.h | 3 +- lib/libuinet/uinet_config.c | 4 + lib/libuinet/uinet_if_span.c | 444 ++++++++++++++++++++++++ lib/libuinet/uinet_if_span.h | 7 + 5 files changed, 459 insertions(+), 2 deletions(-) create mode 100644 lib/libuinet/uinet_if_span.c create mode 100644 lib/libuinet/uinet_if_span.h diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index ce8cc89..37838cf 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -147,7 +147,8 @@ UINET_SRCS+= \ uinet_vm_kern.c \ uinet_vm_meter.c \ uinet_vm_object.c \ - uinet_if_bridge.c + uinet_if_bridge.c \ + uinet_if_span.c ifneq (${HOST_OS},Darwin) UINET_SRCS+= uinet_if_netmap.c diff --git a/lib/libuinet/api_include/uinet_config.h b/lib/libuinet/api_include/uinet_config.h index 2196376..7b7612b 100644 --- a/lib/libuinet/api_include/uinet_config.h +++ b/lib/libuinet/api_include/uinet_config.h @@ -44,7 +44,8 @@ typedef enum { UINET_IFTYPE_LOOPBACK, UINET_IFTYPE_NETMAP, UINET_IFTYPE_PCAP, - UINET_IFTYPE_BRIDGE + UINET_IFTYPE_BRIDGE, + UINET_IFTYPE_SPAN } uinet_iftype_t; diff --git a/lib/libuinet/uinet_config.c b/lib/libuinet/uinet_config.c index 67e45b1..0d9298b 100644 --- a/lib/libuinet/uinet_config.c +++ b/lib/libuinet/uinet_config.c @@ -35,6 +35,7 @@ #include "uinet_if_netmap.h" #include "uinet_if_pcap.h" #include "uinet_if_bridge.h" +#include "uinet_if_span.h" static TAILQ_HEAD(config_head, uinet_config_if) if_conf = TAILQ_HEAD_INITIALIZER(if_conf); @@ -140,6 +141,9 @@ uinet_ifcreate(uinet_iftype_t type, const char *configstr, const char *alias, case UINET_IFTYPE_BRIDGE: error = if_bridge_attach(cfg); break; + case UINET_IFTYPE_SPAN: + error = if_span_attach(cfg); + break; default: printf("Error attaching interface with config %s: unknown interface type %d\n", cfg->configstr, cfg->type); error = ENXIO; diff --git a/lib/libuinet/uinet_if_span.c b/lib/libuinet/uinet_if_span.c new file mode 100644 index 0000000..9ceb498 --- /dev/null +++ b/lib/libuinet/uinet_if_span.c @@ -0,0 +1,444 @@ +/* + * Copyright (c) 2014 Adrian Chadd, Norse Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "uinet_config_internal.h" +#include "uinet_host_interface.h" +#include "uinet_if_span.h" + +/* + * This implements a multi-port span interface. + * + * It's design to be a read-only span interface where multiple ports + * are seeing different parts of the traffic. So this combines all of + * the traffic from various interfaces. + */ + +extern struct mbuf *(*span_input_p)(struct ifnet *, struct mbuf *); +extern int (*span_output_p)(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); + +struct if_span_member; + +struct if_span_member { + LIST_ENTRY(if_span_member) bif_next; + struct ifnet *ifp; +}; + +struct if_span_softc { + struct ifnet *sc_ifp; + const struct uinet_config_if *cfg; + struct ether_addr sc_addr; + + struct mtx sc_mtx; + LIST_HEAD(, if_span_member) sc_iflist; /* member interface list */ + + /* XXX TODO: more useful state? */ +}; + +static int span_if_count = 0; + +/* + * Process an incoming frame. This gets called + * from the child device ether_input path. + */ +static struct mbuf * +if_span_input(struct ifnet *ifp, struct mbuf *m) +{ + struct if_span_softc *sc; + struct ifnet *bifp; + struct mbuf *mc2; + + sc = ifp->if_bridge; + bifp = sc->sc_ifp; + +// printf("%s: m=%p: called\n", __func__, m); + + /* Duplicate; pass up to the stack */ + mc2 = m_dup(m, M_DONTWAIT); + /* XXX count failure */ + if (mc2 != NULL) { + mc2->m_pkthdr.rcvif = bifp; + (*bifp->if_input)(bifp, mc2); + } + + /* Return the original packet for local processing. */ + return (m); +} + +/* + */ +static int +if_span_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) +{ + + printf("%s: m=%p; called\n", __func__, m); + /* For now, we consume the frame */ + m_freem(m); + return (0); +} + +static void +if_span_init(void *arg) +{ + struct if_span_softc *sc = arg; + struct ifnet *ifp = sc->sc_ifp; + + ifp->if_drv_flags = IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; +} + +static void +if_span_stop(struct if_span_softc *sc) +{ + struct ifnet *ifp = sc->sc_ifp; + + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE); +} + +static int +if_span_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + int error = 0; + struct if_span_softc *sc = ifp->if_softc; + + switch (cmd) { + case SIOCSIFFLAGS: + if (ifp->if_flags & IFF_UP) + if_span_init(sc); + else if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if_span_stop(sc); + break; + default: + error = ether_ioctl(ifp, cmd, data); + break; + } + return (error); +} + +static int +if_span_transmit(struct ifnet *ifp, struct mbuf *m) +{ + + printf("%s: m=%p; called\n", __func__, m); + + /* XXX for now, free */ + m_freem(m); + return (ENOBUFS); +} + +static void +if_span_qflush(struct ifnet *ifp) +{ + +} + +static int +if_span_existsm_locked(struct if_span_softc *sc, struct ifnet *nifp) +{ + struct if_span_member *bif; + + /* XXX assert locked */ + LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->ifp == nifp) + return (1); + } + return (0); +} + +static int +if_span_addm(struct if_span_softc *sc, const char *ifname) +{ + struct ifnet *nifp = NULL; + struct if_span_member *bif; + int error = 0; + + /* Do lookup */ + nifp = ifunit_ref(ifname); + if (nifp == NULL) { + printf("%s: '%s' not found\n", + __func__, + ifname); + return (ENOENT); + } + + mtx_lock(&sc->sc_mtx); + + /* See if this exists. Don't double-add */ + if (if_span_existsm_locked(sc, nifp)) { + printf("%s: '%s' already is in this span\n", + __func__, + ifname); + error = EINVAL; + goto fail; + } + + /* Is it a member of ANY span/bridge? */ + if (nifp->if_bridge != NULL) { + printf("%s: '%s' is already in _a_ span\n", + __func__, + ifname); + error = EBUSY; + goto fail; + } + + /* Allocate span-member entry, add to list */ + bif = malloc(sizeof(struct if_span_member), M_DEVBUF, M_NOWAIT); + if (bif == NULL) { + printf("%s: failed to malloc", __func__); + error = ENOMEM; + goto fail; + } + + /* Add to list; link back from the ifnet to the parent span */ + bif->ifp = nifp; + LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); + nifp->if_bridge = sc; + + mtx_unlock(&sc->sc_mtx); + + /* Make promisc */ + error = ifpromisc(nifp, 1); + if (error != 0) { + mtx_lock(&sc->sc_mtx); + /* XXX methodize */ + LIST_REMOVE(bif, bif_next); + mtx_unlock(&sc->sc_mtx); + free(bif, M_DEVBUF); + printf("%s: '%s' couldn't make it promisc!\n", __func__, ifname); + error = EINVAL; + goto fail; + } + + printf("%s: added '%s' to span\n", + __func__, + ifname); + + /* Done! */ + return (0); +fail: + mtx_unlock(&sc->sc_mtx); + /* Free reference */ + if (nifp) + if_rele(nifp); + + return (error); +} + + +static struct ether_addr * +i_ether_aton_r(const char *a, struct ether_addr *e) +{ + int i; + unsigned int o0, o1, o2, o3, o4, o5; + + i = sscanf(a, "%x:%x:%x:%x:%x:%x", &o0, &o1, &o2, &o3, &o4, &o5); + if (i != 6) + return (NULL); + e->octet[0]=o0; + e->octet[1]=o1; + e->octet[2]=o2; + e->octet[3]=o3; + e->octet[4]=o4; + e->octet[5]=o5; + return (e); +} + +int +if_span_attach(struct uinet_config_if *cfg) +{ + struct if_span_softc *sc = NULL; + int error = 0; + char *cstr = NULL, *s; + + if (NULL == cfg->configstr) { + error = EINVAL; + goto fail; + } + + printf("%s: configstr=%s\n", __func__, cfg->configstr); + cstr = strdup(cfg->configstr, M_TEMP); + if (cstr == NULL) { + printf("%s: strdup failed\n", __func__); + error = ENOMEM; + goto fail; + } + + sc = malloc(sizeof(struct if_span_softc), M_DEVBUF, M_WAITOK); + if (sc == NULL) { + printf("%s: malloc failed\n", __func__); + error = ENOMEM; + goto fail; + } + + /* Set the interface name */ + snprintf(cfg->name, sizeof(cfg->name), "span%u", span_if_count); + span_if_count++; + + sc->cfg = cfg; + + /* + * The ethernet path has a bunch of hard-coded + * span function pointers for whatever implements + * bridging. It's a hack, but in order to get + * frames and link status changes, we need to + * also do the same. + */ + brige_input_p = if_span_input; + bridge_output_p = if_span_output; + + /* + * Setup initial local MAC address - random. + */ + sc->sc_addr.octet[0] = 0x62; + sc->sc_addr.octet[1] = 0x73; + sc->sc_addr.octet[2] = 0x64; + sc->sc_addr.octet[3] = arc4random(); + sc->sc_addr.octet[4] = arc4random(); + sc->sc_addr.octet[5] = arc4random(); + + /* + * Allocate netif context. + */ + sc->sc_ifp = if_alloc(IFT_ETHER); + if (sc->sc_ifp == NULL) { + printf("%s: if_alloc failed", __func__); + error = ENOMEM; + goto fail; + } + sc->sc_ifp->if_softc = sc; + + /* + * Setup basic flags and such. + */ + if_initname(sc->sc_ifp, sc->cfg->name, IF_DUNIT_NONE); + sc->sc_ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + sc->sc_ifp->if_mtu = 1500; /* XXX verify! */ + + /* + * Setup netif methods. + */ + sc->sc_ifp->if_init = if_span_init; + sc->sc_ifp->if_ioctl = if_span_ioctl; + sc->sc_ifp->if_transmit = if_span_transmit; + sc->sc_ifp->if_qflush = if_span_qflush; + + sc->sc_ifp->if_fib = sc->cfg->cdom; + + /* Mutex protecting the span list */ + mtx_init(&sc->sc_mtx, "if_span", NULL, MTX_DEF); + + /* This is our list of child interfaces */ + LIST_INIT(&sc->sc_iflist); + + /* Parse the config string */ + while ( (s = strsep(&cstr, ",")) != NULL) { + char *ss, *a, *v; + struct ether_addr ea; + + ss = strdup(s, M_TEMP); + if (ss == NULL) { + printf("%s: strdup: failed\n", __func__); + error = ENOMEM; + goto fail; + } + a = strsep(&ss, "="); + v = strsep(&ss, "="); + if (a == NULL || v == NULL) { + printf("%s: invalid config chunk '%s'\n", __func__, s); + error = ENOMEM; + free(ss, M_TEMP); + goto fail; + } + + /* Now, handle the various options */ + if (strcmp(a, "if") == 0) { + /* XXX error check */ + (void) if_span_addm(sc, v); + } else if (strcmp(a, "mac") == 0) { + /* XXX TODO: no ether_aton_r() in the kernel */ + if (i_ether_aton_r(v, &ea) != NULL) { + sc->sc_addr = ea; + } + } else { + printf("%5s; unknown config option '%s'\n", __func__, a); + free(ss, M_TEMP); + goto fail; + } + free(ss, M_TEMP); + } + + /* Set local MAC now */ + ether_ifattach(sc->sc_ifp, sc->sc_addr.octet); + sc->sc_ifp->if_capabilities = sc->sc_ifp->if_capenable = 0; + + /* + * Link uinet cfg state back to the newly setup ifnet. + */ + cfg->ifindex = sc->sc_ifp->if_index; + cfg->ifdata = sc; + cfg->ifp = sc->sc_ifp; + + return (0); + +fail: + if (cstr) + free(cstr, M_TEMP); + /* XXX TODO: deregister child interfaces */ + if (sc && sc->sc_ifp) + if_free(sc->sc_ifp); + if (sc) + free(sc, M_DEVBUF); + return (error); + +} + +int +if_span_detach(struct uinet_config_if *cfg) +{ + /* XXX TODO */ + return (0); +} diff --git a/lib/libuinet/uinet_if_span.h b/lib/libuinet/uinet_if_span.h new file mode 100644 index 0000000..46d53d1 --- /dev/null +++ b/lib/libuinet/uinet_if_span.h @@ -0,0 +1,7 @@ +#ifndef __UINET_IF_SPAN_H__ +#define __UINET_IF_SPAN_H__ + +extern int if_span_attach(struct uinet_config_if *cfg); +extern int if_span_detach(struct uinet_config_if *cfg); + +#endif /* __UINET_IF_SPAN_H__ */ From bab05ca99d97a7e3efa54abd861a82e7cae350d1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 4 Jul 2014 15:32:02 -0700 Subject: [PATCH 081/148] Oops - make it compile. --- lib/libuinet/uinet_if_span.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/libuinet/uinet_if_span.c b/lib/libuinet/uinet_if_span.c index 9ceb498..c2cb60d 100644 --- a/lib/libuinet/uinet_if_span.c +++ b/lib/libuinet/uinet_if_span.c @@ -56,8 +56,8 @@ * the traffic from various interfaces. */ -extern struct mbuf *(*span_input_p)(struct ifnet *, struct mbuf *); -extern int (*span_output_p)(struct ifnet *, struct mbuf *, +extern struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); +extern int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); struct if_span_member; @@ -327,7 +327,7 @@ if_span_attach(struct uinet_config_if *cfg) * frames and link status changes, we need to * also do the same. */ - brige_input_p = if_span_input; + bridge_input_p = if_span_input; bridge_output_p = if_span_output; /* From 3e2e79df329fb4e82302b61e72994c43b84cdf9a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 05:33:36 -0700 Subject: [PATCH 082/148] Disable zero-copy receive for now. --- lib/libuinet/uinet_if_netmap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_if_netmap.c b/lib/libuinet/uinet_if_netmap.c index c6dc0c5..1426a0a 100644 --- a/lib/libuinet/uinet_if_netmap.c +++ b/lib/libuinet/uinet_if_netmap.c @@ -74,7 +74,9 @@ * Setting IF_NETMAP_RXRING_ZCOPY_FRAC_NUM to zero will disable zero copy * receive. */ -#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 1 +//#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 1 +/* Disable zero-copy for now */ +#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 0 #define IF_NETMAP_RXRING_ZCOPY_FRAC_DEN 2 #define IF_NETMAP_THREAD_STOP_CHECK_MS 200 From af5ba6ea50603c8151aa58dc1e37445b7c009a8f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 08:42:36 -0700 Subject: [PATCH 083/148] Make compile! --- bin/sysctl/Makefile | 4 ++-- bin/sysctl/u_sysctl.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/sysctl/Makefile b/bin/sysctl/Makefile index 43aefeb..b06f101 100644 --- a/bin/sysctl/Makefile +++ b/bin/sysctl/Makefile @@ -6,8 +6,8 @@ SRCS=sysctl.c u_sysctl.c UINET_LIBS=uinet -CFLAGS= -I${TOPDIR}/lib/libev -I../passive/ -I${TOPDIR}/lib/libnv -LDADD= ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libnv/libnv.a -lm -lpcap +CFLAGS= -I${TOPDIR}/lib/libev -I../passive/ -I${TOPDIR}/lib/libuinetnv +LDADD= ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libuinetnv/libuinetnv.a -lm -lpcap DEBUG_FLAGS=-g -O0 diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 7dbe0da..645c534 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -16,8 +16,8 @@ #include -#include "sysctl_api.h" -#include "nv.h" +#include "uinet_host_sysctl_api.h" +#include "uinet_nv.h" /* * XXX TODO: From b79fcf8bbbcbe1e06fb42b339399d98e2211b023 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 08:46:19 -0700 Subject: [PATCH 084/148] Add libmemstat from freebsd-9 into the tree: * comment out kvm code, we aren't using it * use the local sysctl code for uma query, malloc stats will come next. --- lib/Makefile | 2 +- lib/libuinet_memstat/Makefile | 25 + lib/libuinet_memstat/Makefile.orig | 30 ++ lib/libuinet_memstat/l_uma.h | 642 ++++++++++++++++++++++++ lib/libuinet_memstat/l_uma_dbg.h | 55 ++ lib/libuinet_memstat/l_uma_int.h | 451 +++++++++++++++++ lib/libuinet_memstat/libmemstat.3 | 492 ++++++++++++++++++ lib/libuinet_memstat/memstat.c | 434 ++++++++++++++++ lib/libuinet_memstat/memstat.h | 168 +++++++ lib/libuinet_memstat/memstat_all.c | 58 +++ lib/libuinet_memstat/memstat_internal.h | 126 +++++ lib/libuinet_memstat/memstat_malloc.c | 405 +++++++++++++++ lib/libuinet_memstat/memstat_uma.c | 477 ++++++++++++++++++ lib/libuinet_memstat/u_sysctl.c | 267 ++++++++++ lib/libuinet_memstat/u_sysctl.h | 12 + 15 files changed, 3643 insertions(+), 1 deletion(-) create mode 100644 lib/libuinet_memstat/Makefile create mode 100644 lib/libuinet_memstat/Makefile.orig create mode 100644 lib/libuinet_memstat/l_uma.h create mode 100644 lib/libuinet_memstat/l_uma_dbg.h create mode 100644 lib/libuinet_memstat/l_uma_int.h create mode 100644 lib/libuinet_memstat/libmemstat.3 create mode 100644 lib/libuinet_memstat/memstat.c create mode 100644 lib/libuinet_memstat/memstat.h create mode 100644 lib/libuinet_memstat/memstat_all.c create mode 100644 lib/libuinet_memstat/memstat_internal.h create mode 100644 lib/libuinet_memstat/memstat_malloc.c create mode 100644 lib/libuinet_memstat/memstat_uma.c create mode 100644 lib/libuinet_memstat/u_sysctl.c create mode 100644 lib/libuinet_memstat/u_sysctl.h diff --git a/lib/Makefile b/lib/Makefile index ac91792..8fc62a2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,5 +1,5 @@ -SUBDIRS=libuinet libev libuinetnv libhttp_parser +SUBDIRS=libuinet libev libuinetnv libuinet_memstat libhttp_parser TOPDIR?=${CURDIR}/.. include ${TOPDIR}/cflags.mk diff --git a/lib/libuinet_memstat/Makefile b/lib/libuinet_memstat/Makefile new file mode 100644 index 0000000..2149058 --- /dev/null +++ b/lib/libuinet_memstat/Makefile @@ -0,0 +1,25 @@ + +TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/lib.mk +include ${TOPDIR}/cflags.mk + +CFLAGS+= ${DEBUG_FLAGS} +CFLAGS+= -I${TOPDIR}/lib/libuinet/api_include/ -I${TOPDIR}/lib/libuinetnv/ + +SRCS= memstat.c memstat_all.c memstat_malloc.c memstat_uma.c u_sysctl.c +OBJS= memstat.o memstat_all.o memstat_malloc.o memstat_uma.o u_sysctl.o +INCS= memstat.h + +all: libuinet_memstat.a + +libuinet_memstat.a: $(OBJS) + $(AR) -c -r libuinet_memstat.a $(OBJS) + +clean: + $(RM) $(OBJS) libuinet_memstat.a + +install: + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/lib + ${UINET_INSTALL_DIR} -d ${UINET_DESTDIR}/include/memstat + ${UINET_INSTALL_LIB} libuinet_memstat.a ${UINET_DESTDIR}/lib + ${UINET_INSTALL_INC} memstat.h ${UINET_DESTDIR}/include/memstat diff --git a/lib/libuinet_memstat/Makefile.orig b/lib/libuinet_memstat/Makefile.orig new file mode 100644 index 0000000..89c52a8 --- /dev/null +++ b/lib/libuinet_memstat/Makefile.orig @@ -0,0 +1,30 @@ +# $FreeBSD: stable/9/lib/libmemstat/Makefile 195767 2009-07-19 17:25:24Z kensmith $ + +WARNS?= 3 +LIB= memstat +SHLIB_MAJOR= 3 +DPADD= ${LIBKVM} +LDADD= -lkvm +SRCS+= memstat.c +SRCS+= memstat_all.c +SRCS+= memstat_malloc.c +SRCS+= memstat_uma.c +INCS= memstat.h + +MAN= libmemstat.3 + +MLINKS+= libmemstat.3 memstat_mtl_alloc.3 +MLINKS+= libmemstat.3 memstat_mtl_first.3 +MLINKS+= libmemstat.3 memstat_mtl_next.3 +MLINKS+= libmemstat.3 memstat_mtl_find.3 +MLINKS+= libmemstat.3 memstat_mtl_free.3 +MLINKS+= libmemstat.3 memstat_mtl_geterror.3 +MLINKS+= libmemstat.3 memstat_strerror.3 +MLINKS+= libmemstat.3 memstat_sysctl_all.3 +MLINKS+= libmemstat.3 memstat_sysctl_malloc.3 +MLINKS+= libmemstat.3 memstat_sysctl_uma.3 +MLINKS+= libmemstat.3 memstat_kvm_all.3 +MLINKS+= libmemstat.3 memstat_kvm_malloc.3 +MLINKS+= libmemstat.3 memstat_kvm_uma.3 + +.include diff --git a/lib/libuinet_memstat/l_uma.h b/lib/libuinet_memstat/l_uma.h new file mode 100644 index 0000000..8196eda --- /dev/null +++ b/lib/libuinet_memstat/l_uma.h @@ -0,0 +1,642 @@ +/*- + * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson + * Copyright (c) 2004, 2005 Bosko Milekic + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: stable/9/sys/vm/uma.h 242365 2012-10-30 17:05:12Z mdf $ + * + */ + +/* + * uma.h - External definitions for the Universal Memory Allocator + * +*/ + +#ifndef VM_UMA_H +#define VM_UMA_H + +#include /* For NULL */ +#include /* For M_* */ + +/* User visible parameters */ +#define UMA_SMALLEST_UNIT (PAGE_SIZE / 256) /* Smallest item allocated */ + +/* Types and type defs */ + +struct uma_zone; +/* Opaque type used as a handle to the zone */ +typedef struct uma_zone * uma_zone_t; + +void zone_drain(uma_zone_t); + +/* + * Item constructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * arg The arg field passed to uma_zalloc_arg + * size The size of the allocated item + * flags See zalloc flags + * + * Returns: + * 0 on success + * errno on failure + * + * Discussion: + * The constructor is called just before the memory is returned + * to the user. It may block if necessary. + */ +typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); + +/* + * Item destructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being destructed. + * arg Argument passed through uma_zfree_arg + * + * Returns: + * Nothing + * + * Discussion: + * The destructor may perform operations that differ from those performed + * by the initializer, but it must leave the object in the same state. + * This IS type stable storage. This is called after EVERY zfree call. + */ +typedef void (*uma_dtor)(void *mem, int size, void *arg); + +/* + * Item initializer + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being initialized. + * flags See zalloc flags + * + * Returns: + * 0 on success + * errno on failure + * + * Discussion: + * The initializer is called when the memory is cached in the uma zone. + * The initializer and the destructor should leave the object in the same + * state. + */ +typedef int (*uma_init)(void *mem, int size, int flags); + +/* + * Item discard function + * + * Arguments: + * item A pointer to memory which has been 'freed' but has not left the + * zone's cache. + * size The size of the item being discarded. + * + * Returns: + * Nothing + * + * Discussion: + * This routine is called when memory leaves a zone and is returned to the + * system for other uses. It is the counter-part to the init function. + */ +typedef void (*uma_fini)(void *mem, int size); + +/* + * What's the difference between initializing and constructing? + * + * The item is initialized when it is cached, and this is the state that the + * object should be in when returned to the allocator. The purpose of this is + * to remove some code which would otherwise be called on each allocation by + * utilizing a known, stable state. This differs from the constructor which + * will be called on EVERY allocation. + * + * For example, in the initializer you may want to initialize embedded locks, + * NULL list pointers, set up initial states, magic numbers, etc. This way if + * the object is held in the allocator and re-used it won't be necessary to + * re-initialize it. + * + * The constructor may be used to lock a data structure, link it on to lists, + * bump reference counts or total counts of outstanding structures, etc. + * + */ + + +/* Function proto types */ + +/* + * Create a new uma zone + * + * Arguments: + * name The text name of the zone for debugging and stats. This memory + * should not be freed until the zone has been deallocated. + * size The size of the object that is being created. + * ctor The constructor that is called when the object is allocated. + * dtor The destructor that is called when the object is freed. + * init An initializer that sets up the initial state of the memory. + * fini A discard function that undoes initialization done by init. + * ctor/dtor/init/fini may all be null, see notes above. + * align A bitmask that corresponds to the requested alignment + * eg 4 would be 0x3 + * flags A set of parameters that control the behavior of the zone. + * + * Returns: + * A pointer to a structure which is intended to be opaque to users of + * the interface. The value may be null if the wait flag is not set. + */ +uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor, + uma_dtor dtor, uma_init uminit, uma_fini fini, + int align, u_int32_t flags); + +/* + * Create a secondary uma zone + * + * Arguments: + * name The text name of the zone for debugging and stats. This memory + * should not be freed until the zone has been deallocated. + * ctor The constructor that is called when the object is allocated. + * dtor The destructor that is called when the object is freed. + * zinit An initializer that sets up the initial state of the memory + * as the object passes from the Keg's slab to the Zone's cache. + * zfini A discard function that undoes initialization done by init + * as the object passes from the Zone's cache to the Keg's slab. + * + * ctor/dtor/zinit/zfini may all be null, see notes above. + * Note that the zinit and zfini specified here are NOT + * exactly the same as the init/fini specified to uma_zcreate() + * when creating a master zone. These zinit/zfini are called + * on the TRANSITION from keg to zone (and vice-versa). Once + * these are set, the primary zone may alter its init/fini + * (which are called when the object passes from VM to keg) + * using uma_zone_set_init/fini()) as well as its own + * zinit/zfini (unset by default for master zone) with + * uma_zone_set_zinit/zfini() (note subtle 'z' prefix). + * + * master A reference to this zone's Master Zone (Primary Zone), + * which contains the backing Keg for the Secondary Zone + * being added. + * + * Returns: + * A pointer to a structure which is intended to be opaque to users of + * the interface. The value may be null if the wait flag is not set. + */ +uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_zone_t master); + +/* + * Add a second master to a secondary zone. This provides multiple data + * backends for objects with the same size. Both masters must have + * compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are + * the only supported. + * + * Returns: + * Error on failure, 0 on success. + */ +int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); + +/* + * Definitions for uma_zcreate flags + * + * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to + * overlap when adding new features. 0xf0000000 is in use by uma_int.h. + */ +#define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by + physical memory XXX Not yet */ +#define UMA_ZONE_ZINIT 0x0002 /* Initialize with zeros */ +#define UMA_ZONE_STATIC 0x0004 /* Statically sized zone */ +#define UMA_ZONE_OFFPAGE 0x0008 /* Force the slab structure allocation + off of the real memory */ +#define UMA_ZONE_MALLOC 0x0010 /* For use by malloc(9) only! */ +#define UMA_ZONE_NOFREE 0x0020 /* Do not free slabs of this type! */ +#define UMA_ZONE_MTXCLASS 0x0040 /* Create a new lock class */ +#define UMA_ZONE_VM 0x0080 /* + * Used for internal vm datastructures + * only. + */ +#define UMA_ZONE_HASH 0x0100 /* + * Use a hash table instead of caching + * information in the vm_page. + */ +#define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ +#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ +#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ +#define UMA_ZONE_CACHESPREAD 0x1000 /* + * Spread memory start locations across + * all possible cache lines. May + * require many virtually contiguous + * backend pages and can fail early. + */ +#define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */ +#define UMA_ZONE_NODUMP 0x4000 /* + * Zone's pages will not be included in + * mini-dumps. + */ + +/* + * These flags are shared between the keg and zone. In zones wishing to add + * new kegs these flags must be compatible. Some are determined based on + * physical parameters of the request and may not be provided by the consumer. + */ +#define UMA_ZONE_INHERIT \ + (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \ + UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB) + +/* Definitions for align */ +#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ +#define UMA_ALIGN_LONG (sizeof(long) - 1) /* "" long */ +#define UMA_ALIGN_INT (sizeof(int) - 1) /* "" int */ +#define UMA_ALIGN_SHORT (sizeof(short) - 1) /* "" short */ +#define UMA_ALIGN_CHAR (sizeof(char) - 1) /* "" char */ +#define UMA_ALIGN_CACHE (0 - 1) /* Cache line size align */ + +/* + * Destroys an empty uma zone. If the zone is not empty uma complains loudly. + * + * Arguments: + * zone The zone we want to destroy. + * + */ +void uma_zdestroy(uma_zone_t zone); + +/* + * Allocates an item out of a zone + * + * Arguments: + * zone The zone we are allocating from + * arg This data is passed to the ctor function + * flags See sys/malloc.h for available flags. + * + * Returns: + * A non-null pointer to an initialized element from the zone is + * guaranteed if the wait flag is M_WAITOK. Otherwise a null pointer + * may be returned if the zone is empty or the ctor failed. + */ + +void *uma_zalloc_arg(uma_zone_t zone, void *arg, int flags); + +/* + * Allocates an item out of a zone without supplying an argument + * + * This is just a wrapper for uma_zalloc_arg for convenience. + * + */ +static __inline void *uma_zalloc(uma_zone_t zone, int flags); + +static __inline void * +uma_zalloc(uma_zone_t zone, int flags) +{ + return uma_zalloc_arg(zone, NULL, flags); +} + +/* + * Frees an item back into the specified zone. + * + * Arguments: + * zone The zone the item was originally allocated out of. + * item The memory to be freed. + * arg Argument passed to the destructor + * + * Returns: + * Nothing. + */ + +void uma_zfree_arg(uma_zone_t zone, void *item, void *arg); + +/* + * Frees an item back to a zone without supplying an argument + * + * This is just a wrapper for uma_zfree_arg for convenience. + * + */ +static __inline void uma_zfree(uma_zone_t zone, void *item); + +static __inline void +uma_zfree(uma_zone_t zone, void *item) +{ + uma_zfree_arg(zone, item, NULL); +} + +/* + * XXX The rest of the prototypes in this header are h0h0 magic for the VM. + * If you think you need to use it for a normal zone you're probably incorrect. + */ + +/* + * Backend page supplier routines + * + * Arguments: + * zone The zone that is requesting pages. + * size The number of bytes being requested. + * pflag Flags for these memory pages, see below. + * wait Indicates our willingness to block. + * + * Returns: + * A pointer to the allocated memory or NULL on failure. + */ + +typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait); + +/* + * Backend page free routines + * + * Arguments: + * item A pointer to the previously allocated pages. + * size The original size of the allocation. + * pflag The flags for the slab. See UMA_SLAB_* below. + * + * Returns: + * None + */ +typedef void (*uma_free)(void *item, int size, u_int8_t pflag); + + + +/* + * Sets up the uma allocator. (Called by vm_mem_init) + * + * Arguments: + * bootmem A pointer to memory used to bootstrap the system. + * + * Returns: + * Nothing + * + * Discussion: + * This memory is used for zones which allocate things before the + * backend page supplier can give us pages. It should be + * UMA_SLAB_SIZE * boot_pages bytes. (see uma_int.h) + * + */ + +void uma_startup(void *bootmem, int boot_pages); + +/* + * Finishes starting up the allocator. This should + * be called when kva is ready for normal allocs. + * + * Arguments: + * None + * + * Returns: + * Nothing + * + * Discussion: + * uma_startup2 is called by kmeminit() to enable us of uma for malloc. + */ + +void uma_startup2(void); + +/* + * Reclaims unused memory for all zones + * + * Arguments: + * None + * Returns: + * None + * + * This should only be called by the page out daemon. + */ + +void uma_reclaim(void); + +/* + * Sets the alignment mask to be used for all zones requesting cache + * alignment. Should be called by MD boot code prior to starting VM/UMA. + * + * Arguments: + * align The alignment mask + * + * Returns: + * Nothing + */ +void uma_set_align(int align); + +/* + * Switches the backing object of a zone + * + * Arguments: + * zone The zone to update. + * obj The VM object to use for future allocations. + * size The size of the object to allocate. + * + * Returns: + * 0 if kva space can not be allocated + * 1 if successful + * + * Discussion: + * A NULL object can be used and uma will allocate one for you. Setting + * the size will limit the amount of memory allocated to this zone. + * + */ +struct vm_object; +int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size); + +/* + * Sets a high limit on the number of items allowed in a zone + * + * Arguments: + * zone The zone to limit + * nitems The requested upper limit on the number of items allowed + * + * Returns: + * int The effective value of nitems after rounding up based on page size + */ +int uma_zone_set_max(uma_zone_t zone, int nitems); + +/* + * Obtains the effective limit on the number of items in a zone + * + * Arguments: + * zone The zone to obtain the effective limit from + * + * Return: + * 0 No limit + * int The effective limit of the zone + */ +int uma_zone_get_max(uma_zone_t zone); + +/* + * Obtains the approximate current number of items allocated from a zone + * + * Arguments: + * zone The zone to obtain the current allocation count from + * + * Return: + * int The approximate current number of items allocated from the zone + */ +int uma_zone_get_cur(uma_zone_t zone); + +/* + * The following two routines (uma_zone_set_init/fini) + * are used to set the backend init/fini pair which acts on an + * object as it becomes allocated and is placed in a slab within + * the specified zone's backing keg. These should probably not + * be changed once allocations have already begun, but only be set + * immediately upon zone creation. + */ +void uma_zone_set_init(uma_zone_t zone, uma_init uminit); +void uma_zone_set_fini(uma_zone_t zone, uma_fini fini); + +/* + * The following two routines (uma_zone_set_zinit/zfini) are + * used to set the zinit/zfini pair which acts on an object as + * it passes from the backing Keg's slab cache to the + * specified Zone's bucket cache. These should probably not + * be changed once allocations have already begun, but only be set + * immediately upon zone creation. + */ +void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit); +void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini); + +/* + * Replaces the standard page_alloc or obj_alloc functions for this zone + * + * Arguments: + * zone The zone whose backend allocator is being changed. + * allocf A pointer to the allocation function + * + * Returns: + * Nothing + * + * Discussion: + * This could be used to implement pageable allocation, or perhaps + * even DMA allocators if used in conjunction with the OFFPAGE + * zone flag. + */ + +void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf); + +/* + * Used for freeing memory provided by the allocf above + * + * Arguments: + * zone The zone that intends to use this free routine. + * freef The page freeing routine. + * + * Returns: + * Nothing + */ + +void uma_zone_set_freef(uma_zone_t zone, uma_free freef); + +/* + * These flags are setable in the allocf and visible in the freef. + */ +#define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ +#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */ +#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */ +#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ +#define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */ +#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ +/* 0x40 and 0x80 are available */ + +/* + * Used to pre-fill a zone with some number of items + * + * Arguments: + * zone The zone to fill + * itemcnt The number of items to reserve + * + * Returns: + * Nothing + * + * NOTE: This is blocking and should only be done at startup + */ +void uma_prealloc(uma_zone_t zone, int itemcnt); + +/* + * Used to lookup the reference counter allocated for an item + * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, + * reference counters are allocated for items and stored in + * the underlying slab header. + * + * Arguments: + * zone The UMA_ZONE_REFCNT zone to which the item belongs. + * item The address of the item for which we want a refcnt. + * + * Returns: + * A pointer to a u_int32_t reference counter. + */ +u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item); + +/* + * Used to determine if a fixed-size zone is exhausted. + * + * Arguments: + * zone The zone to check + * + * Returns: + * Non-zero if zone is exhausted. + */ +int uma_zone_exhausted(uma_zone_t zone); +int uma_zone_exhausted_nolock(uma_zone_t zone); + +/* + * Exported statistics structures to be used by user space monitoring tools. + * Statistics stream consists of a uma_stream_header, followed by a series of + * alternative uma_type_header and uma_type_stat structures. + */ +#define UMA_STREAM_VERSION 0x00000001 +struct uma_stream_header { + u_int32_t ush_version; /* Stream format version. */ + u_int32_t ush_maxcpus; /* Value of MAXCPU for stream. */ + u_int32_t ush_count; /* Number of records. */ + u_int32_t _ush_pad; /* Pad/reserved field. */ +}; + +#define UTH_MAX_NAME 32 +#define UTH_ZONE_SECONDARY 0x00000001 +struct uma_type_header { + /* + * Static per-zone data, some extracted from the supporting keg. + */ + char uth_name[UTH_MAX_NAME]; + u_int32_t uth_align; /* Keg: alignment. */ + u_int32_t uth_size; /* Keg: requested size of item. */ + u_int32_t uth_rsize; /* Keg: real size of item. */ + u_int32_t uth_maxpages; /* Keg: maximum number of pages. */ + u_int32_t uth_limit; /* Keg: max items to allocate. */ + + /* + * Current dynamic zone/keg-derived statistics. + */ + u_int32_t uth_pages; /* Keg: pages allocated. */ + u_int32_t uth_keg_free; /* Keg: items free. */ + u_int32_t uth_zone_free; /* Zone: items free. */ + u_int32_t uth_bucketsize; /* Zone: desired bucket size. */ + u_int32_t uth_zone_flags; /* Zone: flags. */ + u_int64_t uth_allocs; /* Zone: number of allocations. */ + u_int64_t uth_frees; /* Zone: number of frees. */ + u_int64_t uth_fails; /* Zone: number of alloc failures. */ + u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */ + u_int64_t _uth_reserved1[2]; /* Reserved. */ +}; + +struct uma_percpu_stat { + u_int64_t ups_allocs; /* Cache: number of allocations. */ + u_int64_t ups_frees; /* Cache: number of frees. */ + u_int64_t ups_cache_free; /* Cache: free items in cache. */ + u_int64_t _ups_reserved[5]; /* Reserved. */ +}; + +#endif diff --git a/lib/libuinet_memstat/l_uma_dbg.h b/lib/libuinet_memstat/l_uma_dbg.h new file mode 100644 index 0000000..aa4ea2d --- /dev/null +++ b/lib/libuinet_memstat/l_uma_dbg.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson + * Copyright (c) 2004, 2005 Bosko Milekic + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: stable/9/sys/vm/uma_dbg.h 148078 2005-07-16 09:51:52Z rwatson $ + * + */ + +/* + * + * This file includes definitions, structures, prototypes, and inlines used + * when debugging users of the UMA interface. + * + */ + +#ifndef VM_UMA_DBG_H +#define VM_UMA_DBG_H + +int trash_ctor(void *mem, int size, void *arg, int flags); +void trash_dtor(void *mem, int size, void *arg); +int trash_init(void *mem, int size, int flags); +void trash_fini(void *mem, int size); + +/* For use only by malloc */ +int mtrash_ctor(void *mem, int size, void *arg, int flags); +void mtrash_dtor(void *mem, int size, void *arg); +int mtrash_init(void *mem, int size, int flags); +void mtrash_fini(void *mem, int size); + +void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); +void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); + +#endif /* VM_UMA_DBG_H */ diff --git a/lib/libuinet_memstat/l_uma_int.h b/lib/libuinet_memstat/l_uma_int.h new file mode 100644 index 0000000..621ec3b --- /dev/null +++ b/lib/libuinet_memstat/l_uma_int.h @@ -0,0 +1,451 @@ +/*- + * Copyright (c) 2002-2005, 2009 Jeffrey Roberson + * Copyright (c) 2004, 2005 Bosko Milekic + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: stable/9/sys/vm/uma_int.h 242365 2012-10-30 17:05:12Z mdf $ + * + */ + +/* + * This file includes definitions, structures, prototypes, and inlines that + * should not be used outside of the actual implementation of UMA. + */ + +/* + * Here's a quick description of the relationship between the objects: + * + * Kegs contain lists of slabs which are stored in either the full bin, empty + * bin, or partially allocated bin, to reduce fragmentation. They also contain + * the user supplied value for size, which is adjusted for alignment purposes + * and rsize is the result of that. The Keg also stores information for + * managing a hash of page addresses that maps pages to uma_slab_t structures + * for pages that don't have embedded uma_slab_t's. + * + * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may + * be allocated off the page from a special slab zone. The free list within a + * slab is managed with a linked list of indices, which are 8 bit values. If + * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit + * values. Currently on alpha you can get 250 or so 32 byte items and on x86 + * you can get 250 or so 16byte items. For item sizes that would yield more + * than 10% memory waste we potentially allocate a separate uma_slab_t if this + * will improve the number of items per slab that will fit. + * + * Other potential space optimizations are storing the 8bit of linkage in space + * wasted between items due to alignment problems. This may yield a much better + * memory footprint for certain sizes of objects. Another alternative is to + * increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer + * dynamic slab sizes because we could stick with 8 bit indices and only use + * large slab sizes for zones with a lot of waste per slab. This may create + * inefficiencies in the vm subsystem due to fragmentation in the address space. + * + * The only really gross cases, with regards to memory waste, are for those + * items that are just over half the page size. You can get nearly 50% waste, + * so you fall back to the memory footprint of the power of two allocator. I + * have looked at memory allocation sizes on many of the machines available to + * me, and there does not seem to be an abundance of allocations at this range + * so at this time it may not make sense to optimize for it. This can, of + * course, be solved with dynamic slab sizes. + * + * Kegs may serve multiple Zones but by far most of the time they only serve + * one. When a Zone is created, a Keg is allocated and setup for it. While + * the backing Keg stores slabs, the Zone caches Buckets of items allocated + * from the slabs. Each Zone is equipped with an init/fini and ctor/dtor + * pair, as well as with its own set of small per-CPU caches, layered above + * the Zone's general Bucket cache. + * + * The PCPU caches are protected by critical sections, and may be accessed + * safely only from their associated CPU, while the Zones backed by the same + * Keg all share a common Keg lock (to coalesce contention on the backing + * slabs). The backing Keg typically only serves one Zone but in the case of + * multiple Zones, one of the Zones is considered the Master Zone and all + * Zone-related stats from the Keg are done in the Master Zone. For an + * example of a Multi-Zone setup, refer to the Mbuf allocation code. + */ + +/* + * This is the representation for normal (Non OFFPAGE slab) + * + * i == item + * s == slab pointer + * + * <---------------- Page (UMA_SLAB_SIZE) ------------------> + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ___________ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header|| + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________|| + * |___________________________________________________________| + * + * + * This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE. + * + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| | + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| | + * |___________________________________________________________| + * ___________ ^ + * |slab header| | + * |___________|---* + * + */ + +#ifndef VM_UMA_INT_H +#define VM_UMA_INT_H + +#define UMA_SLAB_SIZE PAGE_SIZE /* How big are our slabs? */ +#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */ +#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ + +#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */ + +/* Max waste before going to off page slab management */ +#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) + +/* + * I doubt there will be many cases where this is exceeded. This is the initial + * size of the hash table for uma_slabs that are managed off page. This hash + * does expand by powers of two. Currently it doesn't get smaller. + */ +#define UMA_HASH_SIZE_INIT 32 + +/* + * I should investigate other hashing algorithms. This should yield a low + * number of collisions if the pages are relatively contiguous. + * + * This is the same algorithm that most processor caches use. + * + * I'm shifting and masking instead of % because it should be faster. + */ + +#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \ + (h)->uh_hashmask) + +#define UMA_HASH_INSERT(h, s, mem) \ + SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), us_hlink) +#define UMA_HASH_REMOVE(h, s, mem) \ + SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), uma_slab, us_hlink) + +/* Hash table for freed address -> slab translation */ + +SLIST_HEAD(slabhead, uma_slab); + +struct uma_hash { + struct slabhead *uh_slab_hash; /* Hash table for slabs */ + int uh_hashsize; /* Current size of the hash table */ + int uh_hashmask; /* Mask used during hashing */ +}; + +/* + * align field or structure to cache line + */ +#if defined(__amd64__) +#define UMA_ALIGN __aligned(CACHE_LINE_SIZE) +#else +#define UMA_ALIGN +#endif + +/* + * Structures for per cpu queues. + */ + +struct uma_bucket { + LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */ + int16_t ub_cnt; /* Count of free items. */ + int16_t ub_entries; /* Max items. */ + void *ub_bucket[]; /* actual allocation storage */ +}; + +typedef struct uma_bucket * uma_bucket_t; + +struct uma_cache { + uma_bucket_t uc_freebucket; /* Bucket we're freeing to */ + uma_bucket_t uc_allocbucket; /* Bucket to allocate from */ + u_int64_t uc_allocs; /* Count of allocations */ + u_int64_t uc_frees; /* Count of frees */ +} UMA_ALIGN; + +typedef struct uma_cache * uma_cache_t; + +/* + * Keg management structure + * + * TODO: Optimize for cache line size + * + */ +struct uma_keg { + LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ + + struct mtx uk_lock; /* Lock for the keg */ + struct uma_hash uk_hash; + + const char *uk_name; /* Name of creating zone. */ + LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ + LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */ + LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */ + LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */ + + u_int32_t uk_recurse; /* Allocation recursion count */ + u_int32_t uk_align; /* Alignment mask */ + u_int32_t uk_pages; /* Total page count */ + u_int32_t uk_free; /* Count of items free in slabs */ + u_int32_t uk_size; /* Requested size of each item */ + u_int32_t uk_rsize; /* Real size of each item */ + u_int32_t uk_maxpages; /* Maximum number of pages to alloc */ + + uma_init uk_init; /* Keg's init routine */ + uma_fini uk_fini; /* Keg's fini routine */ + uma_alloc uk_allocf; /* Allocation function */ + uma_free uk_freef; /* Free routine */ + + struct vm_object *uk_obj; /* Zone specific object */ + vm_offset_t uk_kva; /* Base kva for zones with objs */ + uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ + + u_int16_t uk_pgoff; /* Offset to uma_slab struct */ + u_int16_t uk_ppera; /* pages per allocation from backend */ + u_int16_t uk_ipers; /* Items per slab */ + u_int32_t uk_flags; /* Internal flags */ +}; +typedef struct uma_keg * uma_keg_t; + +/* Page management structure */ + +/* Sorry for the union, but space efficiency is important */ +struct uma_slab_head { + uma_keg_t us_keg; /* Keg we live in */ + union { + LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ + unsigned long _us_size; /* Size of allocation */ + } us_type; + SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ + u_int8_t *us_data; /* First item */ + u_int8_t us_flags; /* Page flags see uma.h */ + u_int8_t us_freecount; /* How many are free? */ + u_int8_t us_firstfree; /* First free item index */ +}; + +/* The standard slab structure */ +struct uma_slab { + struct uma_slab_head us_head; /* slab header data */ + struct { + u_int8_t us_item; + } us_freelist[1]; /* actual number bigger */ +}; + +/* + * The slab structure for UMA_ZONE_REFCNT zones for whose items we + * maintain reference counters in the slab for. + */ +struct uma_slab_refcnt { + struct uma_slab_head us_head; /* slab header data */ + struct { + u_int8_t us_item; + u_int32_t us_refcnt; + } us_freelist[1]; /* actual number bigger */ +}; + +#define us_keg us_head.us_keg +#define us_link us_head.us_type._us_link +#define us_size us_head.us_type._us_size +#define us_hlink us_head.us_hlink +#define us_data us_head.us_data +#define us_flags us_head.us_flags +#define us_freecount us_head.us_freecount +#define us_firstfree us_head.us_firstfree + +typedef struct uma_slab * uma_slab_t; +typedef struct uma_slab_refcnt * uma_slabrefcnt_t; +typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); + + +/* + * These give us the size of one free item reference within our corresponding + * uma_slab structures, so that our calculations during zone setup are correct + * regardless of what the compiler decides to do with padding the structure + * arrays within uma_slab. + */ +#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head)) +#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \ + sizeof(struct uma_slab_head)) + +struct uma_klink { + LIST_ENTRY(uma_klink) kl_link; + uma_keg_t kl_keg; +}; +typedef struct uma_klink *uma_klink_t; + +/* + * Zone management structure + * + * TODO: Optimize for cache line size + * + */ +struct uma_zone { + const char *uz_name; /* Text name of the zone */ + struct mtx *uz_lock; /* Lock for the zone (keg's lock) */ + + LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */ + LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ + LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ + + LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */ + struct uma_klink uz_klink; /* klink for first keg. */ + + uma_slaballoc uz_slab; /* Allocate a slab from the backend. */ + uma_ctor uz_ctor; /* Constructor for each allocation */ + uma_dtor uz_dtor; /* Destructor */ + uma_init uz_init; /* Initializer for each item */ + uma_fini uz_fini; /* Discards memory */ + + u_int32_t uz_flags; /* Flags inherited from kegs */ + u_int32_t uz_size; /* Size inherited from kegs */ + + u_int64_t uz_allocs UMA_ALIGN; /* Total number of allocations */ + u_int64_t uz_frees; /* Total number of frees */ + u_int64_t uz_fails; /* Total number of alloc failures */ + u_int64_t uz_sleeps; /* Total number of alloc sleeps */ + uint16_t uz_fills; /* Outstanding bucket fills */ + uint16_t uz_count; /* Highest value ub_ptr can have */ + + /* + * This HAS to be the last item because we adjust the zone size + * based on NCPU and then allocate the space for the zones. + */ + struct uma_cache uz_cpu[1]; /* Per cpu caches */ +}; + +/* + * These flags must not overlap with the UMA_ZONE flags specified in uma.h. + */ +#define UMA_ZFLAG_BUCKET 0x02000000 /* Bucket zone. */ +#define UMA_ZFLAG_MULTI 0x04000000 /* Multiple kegs in the zone. */ +#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */ +#define UMA_ZFLAG_PRIVALLOC 0x10000000 /* Use uz_allocf. */ +#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ +#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */ +#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ + +#define UMA_ZFLAG_INHERIT (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | \ + UMA_ZFLAG_BUCKET) + +#undef UMA_ALIGN + +#ifdef _KERNEL +/* Internal prototypes */ +static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data); +void *uma_large_malloc(int size, int wait); +void uma_large_free(uma_slab_t slab); + +/* Lock Macros */ + +#define KEG_LOCK_INIT(k, lc) \ + do { \ + if ((lc)) \ + mtx_init(&(k)->uk_lock, (k)->uk_name, \ + (k)->uk_name, MTX_DEF | MTX_DUPOK); \ + else \ + mtx_init(&(k)->uk_lock, (k)->uk_name, \ + "UMA zone", MTX_DEF | MTX_DUPOK); \ + } while (0) + +#define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock) +#define KEG_LOCK(k) mtx_lock(&(k)->uk_lock) +#define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock) +#define ZONE_LOCK(z) mtx_lock((z)->uz_lock) +#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock) + +/* + * Find a slab within a hash table. This is used for OFFPAGE zones to lookup + * the slab structure. + * + * Arguments: + * hash The hash table to search. + * data The base page of the item. + * + * Returns: + * A pointer to a slab if successful, else NULL. + */ +static __inline uma_slab_t +hash_sfind(struct uma_hash *hash, u_int8_t *data) +{ + uma_slab_t slab; + int hval; + + hval = UMA_HASH(hash, data); + + SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) { + if ((u_int8_t *)slab->us_data == data) + return (slab); + } + return (NULL); +} + +static __inline uma_slab_t +vtoslab(vm_offset_t va) +{ + vm_page_t p; + uma_slab_t slab; + + p = PHYS_TO_VM_PAGE(pmap_kextract(va)); + slab = (uma_slab_t )p->object; + + if (p->flags & PG_SLAB) + return (slab); + else + return (NULL); +} + +static __inline void +vsetslab(vm_offset_t va, uma_slab_t slab) +{ + vm_page_t p; + + p = PHYS_TO_VM_PAGE(pmap_kextract(va)); + p->object = (vm_object_t)slab; + p->flags |= PG_SLAB; +} + +static __inline void +vsetobj(vm_offset_t va, vm_object_t obj) +{ + vm_page_t p; + + p = PHYS_TO_VM_PAGE(pmap_kextract(va)); + p->object = obj; + p->flags &= ~PG_SLAB; +} + +/* + * The following two functions may be defined by architecture specific code + * if they can provide more effecient allocation functions. This is useful + * for using direct mapped addresses. + */ +void *uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait); +void uma_small_free(void *mem, int size, u_int8_t flags); +#endif /* _KERNEL */ + +#endif /* VM_UMA_INT_H */ diff --git a/lib/libuinet_memstat/libmemstat.3 b/lib/libuinet_memstat/libmemstat.3 new file mode 100644 index 0000000..f6f6518 --- /dev/null +++ b/lib/libuinet_memstat/libmemstat.3 @@ -0,0 +1,492 @@ +.\" Copyright (c) 2005 Robert N. M. Watson +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD: stable/9/lib/libmemstat/libmemstat.3 234717 2012-04-26 20:11:57Z gjb $ +.\" +.Dd February 25, 2012 +.Dt LIBMEMSTAT 3 +.Os +.Sh NAME +.Nm libmemstat +.Nd "library interface to retrieve kernel memory allocator statistics" +.Sh LIBRARY +.Lb libmemstat +.Sh SYNOPSIS +.In sys/types.h +.In memstat.h +.Ss General Functions +.Ft "const char *" +.Fn memstat_strerror "int error" +.Ss Memory Type List Management Functions +.Ft "struct memory_type_list *" +.Fn memstat_mtl_alloc "void" +.Ft "struct memory_type *" +.Fn memstat_mtl_first "struct memory_type_list *list" +.Ft "struct memory_type *" +.Fn memstat_mtl_next "struct memory_type *mtp" +.Ft "struct memory_type *" +.Fo memstat_mtl_find +.Fa "struct memory_type_list *list" "int allocator" "const char *name" +.Fc +.Ft void +.Fn memstat_mtl_free "struct memory_type_list *list" +.Ft int +.Fn memstat_mtl_geterror "struct memory_type_list *list" +.Ss Allocator Query Functions +.Ft int +.Fn memstat_kvm_all "struct memory_type_list *list" "void *kvm_handle" +.Ft int +.Fn memstat_kvm_malloc "struct memory_type_list *list" "void *kvm_handle" +.Ft int +.Fn memstat_kvm_uma "struct memory_type_list *list" "void *kvm_handle" +.Ft int +.Fn memstat_sysctl_all "struct memory_type_list *list" "int flags" +.Ft int +.Fn memstat_sysctl_malloc "struct memory_type_list *list" "int flags" +.Ft int +.Fn memstat_sysctl_uma "struct memory_type_list *list" "int flags" +.Ss Memory Type Accessor Methods +.Ft "const char *" +.Fn memstat_get_name "const struct memory_type *mtp" +.Ft int +.Fn memstat_get_allocator "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_countlimit "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_byteslimit "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_sizemask "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_size "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_memalloced "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_memfreed "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_numallocs "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_numfrees "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_bytes "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_count "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_free "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_failures "const struct memory_type *mtp" +.Ft "void *" +.Fn memstat_get_caller_pointer "const struct memory_type *mtp" "int index" +.Ft void +.Fo memstat_set_caller_pointer +.Fa "struct memory_type *mtp" "int index" "void *value" +.Fc +.Ft uint64_t +.Fn memstat_get_caller_uint64 "const struct memory_type *mtp" "int index" +.Ft void +.Fo memstat_set_caller_uint64 +.Fa "struct memory_type *mtp" "int index" "uint64_t value" +.Fc +.Ft uint64_t +.Fn memstat_get_zonefree "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_kegfree "const struct memory_type *mtp" +.Ft uint64_t +.Fn memstat_get_percpu_memalloced "const struct memory_type *mtp" "int cpu" +.Ft uint64_t +.Fn memstat_get_percpu_memfreed "const struct memory_type *mtp" "int cpu" +.Ft uint64_t +.Fn memstat_get_percpu_numallocs "const struct memory_type *mtp" "int cpu" +.Ft uint64_t +.Fn memstat_get_percpu_numfrees "const struct memory_type *mtp" "int cpu" +.Ft uint64_t +.Fn memstat_get_percpu_sizemask "const struct memory_type *mtp" "int cpu" +.Ft "void *" +.Fo memstat_get_percpu_caller_pointer +.Fa "const struct memory_type *mtp" "int cpu" "int index" +.Fc +.Ft void +.Fo memstat_set_percpu_caller_pointer +.Fa "struct memory_type *mtp" "int cpu" "int index" "void *value" +.Fc +.Ft uint64_t +.Fo memstat_get_percpu_caller_uint64 +.Fa "const struct memory_type *mtp" "int cpu" "int index" +.Fc +.Ft void +.Fo memstat_set_percpu_caller_uint64 +.Fa "struct memory_type *mtp" "int cpu" "int index" "uint64_t value" +.Fc +.Ft uint64_t +.Fn memstat_get_percpu_free "const struct memory_type *mtp" "int cpu" +.Sh DESCRIPTION +.Nm +provides an interface to retrieve kernel memory allocator statistics, for +the purposes of debugging and system monitoring, insulating applications +from implementation details of the allocators, and allowing a tool to +transparently support multiple allocators. +.Nm +supports both retrieving a single statistics snapshot, as well as +incrementally updating statistics for long-term monitoring. +.Pp +.Nm +describes each memory type using a +.Vt "struct memory_type" , +an opaque memory type accessed by the application using accessor functions +in the library. +.Nm +returns and updates chains of +.Vt "struct memory_type" +via a +.Vt "struct memory_type_list" , +which will be allocated by calling +.Fn memstat_mtl_alloc , +and freed on completion using +.Fn memstat_mtl_free . +Lists of memory types are populated via calls that query the kernel for +statistics information; currently: +.Fn memstat_kvm_all , +.Fn memstat_kvm_malloc , +.Fn memstat_kvm_uma , +.Fn memstat_sysctl_all , +.Fn memstat_sysctl_uma , +and +.Fn memstat_sysctl_malloc . +Repeated calls will incrementally update the list of memory types, permitting +tracking over time without recreating all list state. +If an error is detected during a query call, error condition information may +be retrieved using +.Fn memstat_mtl_geterror , +and converted to a user-readable string using +.Fn memstat_strerror . +.Pp +Freeing the list will free all memory type data in the list, and so +invalidates any outstanding pointers to entries in the list. +.Vt "struct memory_type" +entries in the list may be iterated over using +.Fn memstat_mtl_first +and +.Fn memstat_mtl_next , +which respectively return the first entry in a list, and the next entry in a +list. +.Fn memstat_mtl_find , +which will return a pointer to the first entry matching the passed +parameters. +.Pp +A series of accessor methods is provided to access fields of the structure, +including retrieving statistics and properties, as well as setting of caller +owned fields. +Direct application access to the data structure fields is not supported. +.Ss Library Vt memory_type Ss Fields +Each +.Vt "struct memory_type" +holds a description of the memory type, including its name and the allocator +it is managed by, as well as current statistics on use. +Some statistics are directly measured, others are derived from directly +measured statistics. +Certain high level statistics are present across all available allocators, +such as the number of allocation and free operations; other measurements, +such as the quantity of free items in per-CPU caches, or administrative +limit on the number of allocations, is available only for specific +allocators. +.Ss Caller Vt memory_type Ss Fields +.Vt "struct memory_type" +includes fields to allow the application to store data, in the form of +pointers and 64-bit integers, with memory types. +For example, the application author might make use of one of the caller +pointers to reference a more complex data structure tracking long-term +behavior of the memory type, or a window system object that is used to +render the state of the memory type. +General and per-CPU storage is provided with each +.Vt "struct memory_type" +in the form of an array of pointers and integers. +The array entries are accessed via the +.Fa index +argument to the get and set accessor methods. +Possible values of +.Fa index +range between +0 +and +.Dv MEMSTAT_MAXCALLER . +.Pp +Caller-owned fields are initialized to +0 +or +.Dv NULL +when a new +.Vt "struct memory_type" +is allocated and attached to a memory type list; these fields retain their +values across queries that update library-owned fields. +.Ss Allocator Types +Currently, +.Nm +supports two kernel allocators: +.Dv ALLOCATOR_UMA +for +.Xr uma 9 , +and +.Dv ALLOCATOR_MALLOC +for +.Xr malloc 9 . +These values may be passed to +.Fn memstat_mtl_find , +and will be returned by +.Fn memstat_get_allocator . +Two additional constants in the allocator name space are defined: +.Dv ALLOCATOR_UNKNOWN , +which will only be returned as a result of a library error, and +.Dv ALLOCATOR_ANY , +which can be used to specify that returning types matching any allocator is +permittable from +.Fn memstat_mtl_find . +.Ss Access Method List +The following accessor methods are defined, of which some will be valid for +a given memory type: +.Bl -tag -width indent +.It Fn memstat_get_name +Return a pointer to the name of the memory type. +Memory for the name is owned by +.Nm +and will be valid through a call to +.Fn memstat_mtl_free . +Note that names will be unique with respect to a single allocator, but that +the same name might be used by different memory types owned by different +memory allocators. +.It Fn memstat_get_allocator +Return an integer identifier for the memory allocator that owns the memory +type. +.It Fn memstat_get_countlimit +If the memory type has an administrative limit on the number of simultaneous +allocations, return it. +.It Fn memstat_get_byteslimit +If the memory type has an administrative limit on the number of bytes of +memory that may be simultaneously allocated for the memory type, return it. +.It Fn memstat_get_sizemask +If the memory type supports variable allocation sizes, return a bitmask of +sizes allocated for the memory type. +.It Fn memstat_get_size +If the memory type supports a fixed allocation size, return that size. +.It Fn memstat_get_memalloced +Return the total number of bytes allocated for the memory type over its +lifetime. +.It Fn memstat_get_memfreed +Return the total number of bytes freed for the memory type over its lifetime. +.It Fn memstat_get_numallocs +Return the total number of allocations for the memory type over its lifetime. +.It Fn memstat_get_numfrees +Return the total number of frees for the memory type over its lifetime. +.It Fn memstat_get_bytes +Return the current number of bytes allocated to the memory type. +.It Fn memstat_get_count +Return the current number of allocations for the memory type. +.It Fn memstat_get_free +If the memory allocator supports a cache, return the number of items in the +cache. +.It Fn memstat_get_failures +If the memory allocator and type permit allocation failures, return the +number of allocation failures measured. +.It Fn memstat_get_caller_pointer +Return a caller-owned pointer for the memory type. +.It Fn memstat_set_caller_pointer +Set a caller-owned pointer for the memory type. +.It Fn memstat_get_caller_uint64 +Return a caller-owned integer for the memory type. +.It Fn memstat_set_caller_uint64 +Set a caller-owned integer for the memory type. +.It Fn memstat_get_zonefree +If the memory allocator supports a multi-level allocation structure, return +the number of cached items in the zone. +These items will be in a fully constructed state available for immediate +use. +.It Fn memstat_get_kegfree +If the memory allocator supports a multi-level allocation structure, return +the number of cached items in the keg. +These items may be in a partially constructed state, and may require further +processing before they can be made available for use. +.It Fn memstat_get_percpu_memalloced +If the memory allocator supports per-CPU statistics, return the number of +bytes of memory allocated for the memory type on the CPU over its lifetime. +.It Fn memstat_get_percpu_memfreed +If the memory allocator supports per-CPU statistics, return the number of +bytes of memory freed from the memory type on the CPU over its lifetime. +.It Fn memstat_get_percpu_numallocs +If the memory allocator supports per-CPU statistics, return the number of +allocations for the memory type on the CPU over its lifetime. +.It Fn memstat_get_percpu_numfrees +If the memory allocator supports per-CPU statistics, return the number of +frees for the memory type on the CPU over its lifetime. +.It Fn memstat_get_percpu_sizemask +If the memory allocator supports variable size memory allocation and per-CPU +statistics, return the size bitmask for the memory type on the CPU. +.It Fn memstat_get_percpu_caller_pointer +Return a caller-owned per-CPU pointer for the memory type. +.It Fn memstat_set_percpu_caller_pointer +Set a caller-owned per-CPU pointer for the memory type. +.It Fn memstat_get_percpu_caller_uint64 +Return a caller-owned per-CPU integer for the memory type. +.It Fn memstat_set_percpu_caller_uint64 +Set a caller-owned per-CPU integer for the memory type. +.It Fn memstat_get_percpu_free +If the memory allocator supports a per-CPU cache, return the number of free +items in the per-CPU cache of the designated CPU. +.El +.Sh RETURN VALUES +.Nm +functions fall into three categories: functions returning a pointer to an +object, functions returning an integer return value, and functions +implementing accessor methods returning data from a +.Vt "struct memory_type" . +.Pp +Functions returning a pointer to an object will generally return +.Dv NULL +on failure. +.Fn memstat_mtl_alloc +will return an error value via +.Va errno , +which will consist of the value +.Er ENOMEM . +Functions +.Fn memstat_mtl_first , +.Fn memstat_mtl_next , +and +.Fn memstat_mtl_find +will return +.Dv NULL +when there is no entry or match in the list; however, this is not considered +a failure mode and no error value is available. +.Pp +Functions returning an integer success value will return +0 +on success, or +\-1 +on failure. +If a failure is returned, the list error access method, +.Fn memstat_mtl_geterror , +may be used to retrieve the error state. +The string representation of the error may be retrieved using +.Fn memstat_strerror . +Possible error values are: +.Bl -tag -width ".Dv MEMSTAT_ERROR_KVM_SHORTREAD" +.It Dv MEMSTAT_ERROR_UNDEFINED +Undefined error. +Occurs if +.Fn memstat_mtl_geterror +is called on a list before an error associated with the list has occurred. +.It Dv MEMSTAT_ERROR_NOMEMORY +Insufficient memory. +Occurs if library calls to +.Xr malloc 3 +fail, or if a system call to retrieve kernel statistics fails with +.Er ENOMEM . +.It Dv MEMSTAT_ERROR_VERSION +Returned if the current version of +.Nm +is unable to interpret the statistics data returned by the kernel due to an +explicit version mismatch, or to differences in data structures that cannot +be reconciled. +.It Dv MEMSTAT_ERROR_PERMISSION +Returned if a statistics source returns +.Va errno +values of +.Er EACCES +or +.Er EPERM . +.It Dv MEMSTAT_ERROR_DATAERROR +Returned if +.Nm +is unable to interpret statistics data returned by the data source, even +though there does not appear to be a version problem. +.It Dv MEMSTAT_ERROR_KVM +Returned if +.Nm +experiences an error while using +.Xr kvm 3 +interfaces to query statistics data. +Use +.Xr kvm_geterr 3 +to retrieve the error. +.It Dv MEMSTAT_ERROR_KVM_NOSYMBOL +Returned if +.Nm +is unable to read a required symbol from the kernel being operated on. +.It Dv MEMSTAT_ERROR_KVM_SHORTREAD +Returned if +.Nm +attempts to read data from a live memory image or kernel core dump and +insufficient data is returned. +.El +.Pp +Finally, functions returning data from a +.Vt "struct memory_type" +pointer are not permitted to fail, and directly return either a statistic +or pointer to a string. +.Sh EXAMPLES +Create a memory type list, query the +.Xr uma 9 +memory allocator for available statistics, and print out the number of +allocations performed by the +.Dv mbuf +zone. +.Bd -literal -offset indent +struct memory_type_list *mtlp; +struct memory_type *mtp; +uint64_t mbuf_count; + +mtlp = memstat_mtl_alloc(); +if (mtlp == NULL) + err(-1, "memstat_mtl_alloc"); +if (memstat_sysctl_uma(mtlp, 0) < 0) + err(-1, "memstat_sysctl_uma"); +mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, "mbuf"); +if (mtp == NULL) + errx(-1, "memstat_mtl_find: mbuf not found"); +mbuf_count = memstat_get_count(mtp); +memstat_mtl_free(mtlp); + +printf("mbufs: %llu\en", (unsigned long long)mbuf_count); +.Ed +.Sh SEE ALSO +.Xr malloc 9 , +.Xr uma 9 +.Sh HISTORY +The +.Nm +library appeared in +.Fx 6.0 . +.Sh AUTHORS +The kernel memory allocator changes necessary to support a general purpose +monitoring library, along with the library, were written by +.An Robert Watson Aq rwatson@FreeBSD.org . +.Sh BUGS +There are memory allocators in the kernel, such as the VM page allocator +and +.Nm sf_buf +allocator, which are not currently supported by +.Nm . +.Pp +Once a memory type is present on a memory type list, it will not be removed +even if the kernel no longer presents information on the type via its +monitoring interfaces. +In order to flush removed memory types, it is necessary to free the entire +list and allocate a new one. diff --git a/lib/libuinet_memstat/memstat.c b/lib/libuinet_memstat/memstat.c new file mode 100644 index 0000000..cd875bd --- /dev/null +++ b/lib/libuinet_memstat/memstat.c @@ -0,0 +1,434 @@ +/*- + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat.c 224569 2011-08-01 09:43:35Z pluknet $ + */ + +#include +#include +//#include + +#include +#include +#include +#include +#include + +#include "memstat.h" +#include "memstat_internal.h" + +const char * +memstat_strerror(int error) +{ + + switch (error) { + case MEMSTAT_ERROR_NOMEMORY: + return ("Cannot allocate memory"); + case MEMSTAT_ERROR_VERSION: + return ("Version mismatch"); + case MEMSTAT_ERROR_PERMISSION: + return ("Permission denied"); + case MEMSTAT_ERROR_DATAERROR: + return ("Data format error"); + case MEMSTAT_ERROR_KVM: + return ("KVM error"); + case MEMSTAT_ERROR_KVM_NOSYMBOL: + return ("KVM unable to find symbol"); + case MEMSTAT_ERROR_KVM_SHORTREAD: + return ("KVM short read"); + case MEMSTAT_ERROR_UNDEFINED: + default: + return ("Unknown error"); + } +} + +struct memory_type_list * +memstat_mtl_alloc(void) +{ + struct memory_type_list *mtlp; + + mtlp = malloc(sizeof(*mtlp)); + if (mtlp == NULL) + return (NULL); + + LIST_INIT(&mtlp->mtl_list); + mtlp->mtl_error = MEMSTAT_ERROR_UNDEFINED; + return (mtlp); +} + +struct memory_type * +memstat_mtl_first(struct memory_type_list *list) +{ + + return (LIST_FIRST(&list->mtl_list)); +} + +struct memory_type * +memstat_mtl_next(struct memory_type *mtp) +{ + + return (LIST_NEXT(mtp, mt_list)); +} + +void +_memstat_mtl_empty(struct memory_type_list *list) +{ + struct memory_type *mtp; + + while ((mtp = LIST_FIRST(&list->mtl_list))) { + free(mtp->mt_percpu_alloc); + free(mtp->mt_percpu_cache); + LIST_REMOVE(mtp, mt_list); + free(mtp); + } +} + +void +memstat_mtl_free(struct memory_type_list *list) +{ + + _memstat_mtl_empty(list); + free(list); +} + +int +memstat_mtl_geterror(struct memory_type_list *list) +{ + + return (list->mtl_error); +} + +/* + * Look for an existing memory_type entry in a memory_type list, based on the + * allocator and name of the type. If not found, return NULL. No errno or + * memstat error. + */ +struct memory_type * +memstat_mtl_find(struct memory_type_list *list, int allocator, + const char *name) +{ + struct memory_type *mtp; + + LIST_FOREACH(mtp, &list->mtl_list, mt_list) { + if ((mtp->mt_allocator == allocator || + allocator == ALLOCATOR_ANY) && + strcmp(mtp->mt_name, name) == 0) + return (mtp); + } + return (NULL); +} + +/* + * Allocate a new memory_type with the specificed allocator type and name, + * then insert into the list. The structure will be zero'd. + * + * libmemstat(3) internal function. + */ +struct memory_type * +_memstat_mt_allocate(struct memory_type_list *list, int allocator, + const char *name, int maxcpus) +{ + struct memory_type *mtp; + + mtp = malloc(sizeof(*mtp)); + if (mtp == NULL) + return (NULL); + + bzero(mtp, sizeof(*mtp)); + + mtp->mt_allocator = allocator; + mtp->mt_percpu_alloc = malloc(sizeof(struct mt_percpu_alloc_s) * + maxcpus); + mtp->mt_percpu_cache = malloc(sizeof(struct mt_percpu_cache_s) * + maxcpus); + strlcpy(mtp->mt_name, name, MEMTYPE_MAXNAME); + LIST_INSERT_HEAD(&list->mtl_list, mtp, mt_list); + return (mtp); +} + +/* + * Reset any libmemstat(3)-owned statistics in a memory_type record so that + * it can be reused without incremental addition problems. Caller-owned + * memory is left "as-is", and must be updated by the caller if desired. + * + * libmemstat(3) internal function. + */ +void +_memstat_mt_reset_stats(struct memory_type *mtp, int maxcpus) +{ + int i; + + mtp->mt_countlimit = 0; + mtp->mt_byteslimit = 0; + mtp->mt_sizemask = 0; + mtp->mt_size = 0; + + mtp->mt_memalloced = 0; + mtp->mt_memfreed = 0; + mtp->mt_numallocs = 0; + mtp->mt_numfrees = 0; + mtp->mt_bytes = 0; + mtp->mt_count = 0; + mtp->mt_free = 0; + mtp->mt_failures = 0; + mtp->mt_sleeps = 0; + + mtp->mt_zonefree = 0; + mtp->mt_kegfree = 0; + + for (i = 0; i < maxcpus; i++) { + mtp->mt_percpu_alloc[i].mtp_memalloced = 0; + mtp->mt_percpu_alloc[i].mtp_memfreed = 0; + mtp->mt_percpu_alloc[i].mtp_numallocs = 0; + mtp->mt_percpu_alloc[i].mtp_numfrees = 0; + mtp->mt_percpu_alloc[i].mtp_sizemask = 0; + mtp->mt_percpu_cache[i].mtp_free = 0; + } +} + +/* + * Accessor methods for struct memory_type. Avoids encoding the structure + * ABI into the application. + */ +const char * +memstat_get_name(const struct memory_type *mtp) +{ + + return (mtp->mt_name); +} + +int +memstat_get_allocator(const struct memory_type *mtp) +{ + + return (mtp->mt_allocator); +} + +uint64_t +memstat_get_countlimit(const struct memory_type *mtp) +{ + + return (mtp->mt_countlimit); +} + +uint64_t +memstat_get_byteslimit(const struct memory_type *mtp) +{ + + return (mtp->mt_byteslimit); +} + +uint64_t +memstat_get_sizemask(const struct memory_type *mtp) +{ + + return (mtp->mt_sizemask); +} + +uint64_t +memstat_get_size(const struct memory_type *mtp) +{ + + return (mtp->mt_size); +} + +uint64_t +memstat_get_memalloced(const struct memory_type *mtp) +{ + + return (mtp->mt_memalloced); +} + +uint64_t +memstat_get_memfreed(const struct memory_type *mtp) +{ + + return (mtp->mt_memfreed); +} + +uint64_t +memstat_get_numallocs(const struct memory_type *mtp) +{ + + return (mtp->mt_numallocs); +} + +uint64_t +memstat_get_numfrees(const struct memory_type *mtp) +{ + + return (mtp->mt_numfrees); +} + +uint64_t +memstat_get_bytes(const struct memory_type *mtp) +{ + + return (mtp->mt_bytes); +} + +uint64_t +memstat_get_count(const struct memory_type *mtp) +{ + + return (mtp->mt_count); +} + +uint64_t +memstat_get_free(const struct memory_type *mtp) +{ + + return (mtp->mt_free); +} + +uint64_t +memstat_get_failures(const struct memory_type *mtp) +{ + + return (mtp->mt_failures); +} + +uint64_t +memstat_get_sleeps(const struct memory_type *mtp) +{ + + return (mtp->mt_sleeps); +} + +void * +memstat_get_caller_pointer(const struct memory_type *mtp, int index) +{ + + return (mtp->mt_caller_pointer[index]); +} + +void +memstat_set_caller_pointer(struct memory_type *mtp, int index, void *value) +{ + + mtp->mt_caller_pointer[index] = value; +} + +uint64_t +memstat_get_caller_uint64(const struct memory_type *mtp, int index) +{ + + return (mtp->mt_caller_uint64[index]); +} + +void +memstat_set_caller_uint64(struct memory_type *mtp, int index, uint64_t value) +{ + + mtp->mt_caller_uint64[index] = value; +} + +uint64_t +memstat_get_zonefree(const struct memory_type *mtp) +{ + + return (mtp->mt_zonefree); +} + +uint64_t +memstat_get_kegfree(const struct memory_type *mtp) +{ + + return (mtp->mt_kegfree); +} + +uint64_t +memstat_get_percpu_memalloced(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_memalloced); +} + +uint64_t +memstat_get_percpu_memfreed(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_memfreed); +} + +uint64_t +memstat_get_percpu_numallocs(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_numallocs); +} + +uint64_t +memstat_get_percpu_numfrees(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_numfrees); +} + +uint64_t +memstat_get_percpu_sizemask(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_sizemask); +} + +void * +memstat_get_percpu_caller_pointer(const struct memory_type *mtp, int cpu, + int index) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_caller_pointer[index]); +} + +void +memstat_set_percpu_caller_pointer(struct memory_type *mtp, int cpu, + int index, void *value) +{ + + mtp->mt_percpu_alloc[cpu].mtp_caller_pointer[index] = value; +} + +uint64_t +memstat_get_percpu_caller_uint64(const struct memory_type *mtp, int cpu, + int index) +{ + + return (mtp->mt_percpu_alloc[cpu].mtp_caller_uint64[index]); +} + +void +memstat_set_percpu_caller_uint64(struct memory_type *mtp, int cpu, int index, + uint64_t value) +{ + + mtp->mt_percpu_alloc[cpu].mtp_caller_uint64[index] = value; +} + +uint64_t +memstat_get_percpu_free(const struct memory_type *mtp, int cpu) +{ + + return (mtp->mt_percpu_cache[cpu].mtp_free); +} diff --git a/lib/libuinet_memstat/memstat.h b/lib/libuinet_memstat/memstat.h new file mode 100644 index 0000000..0f88005 --- /dev/null +++ b/lib/libuinet_memstat/memstat.h @@ -0,0 +1,168 @@ +/*- + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat.h 224569 2011-08-01 09:43:35Z pluknet $ + */ + +#ifndef _MEMSTAT_H_ +#define _MEMSTAT_H_ + +/* + * Amount of caller data to maintain for each caller data slot. Applications + * must not request more than this number of caller save data, or risk + * corrupting internal libmemstat(3) data structures. A compile time check + * in the application is probably appropriate. + */ +#define MEMSTAT_MAXCALLER 16 + +/* + * libmemstat(3) is able to extract memory data from different allocators; + * when it does so, it tags which allocator it got the data from so that + * consumers can determine which fields are usable, as data returned varies + * some. + */ +#define ALLOCATOR_UNKNOWN 0 +#define ALLOCATOR_MALLOC 1 +#define ALLOCATOR_UMA 2 +#define ALLOCATOR_ANY 255 + +/* + * Library maximum type name. Should be max(set of name maximums over + * various allocators). + */ +#define MEMTYPE_MAXNAME 32 + +/* + * Library error conditions, mostly from the underlying data sources. On + * failure, functions typically return (-1) or (NULL); on success, (0) or a + * valid data pointer. The error from the last operation is stored in + * struct memory_type_list, and accessed via memstat_get_error(list). + */ +#define MEMSTAT_ERROR_UNDEFINED 0 /* Initialization value. */ +#define MEMSTAT_ERROR_NOMEMORY 1 /* Out of memory. */ +#define MEMSTAT_ERROR_VERSION 2 /* Unsupported version. */ +#define MEMSTAT_ERROR_PERMISSION 3 /* Permission denied. */ +#define MEMSTAT_ERROR_DATAERROR 5 /* Error in stat data. */ +#define MEMSTAT_ERROR_KVM 6 /* See kvm_geterr() for err. */ +#define MEMSTAT_ERROR_KVM_NOSYMBOL 7 /* Symbol not available. */ +#define MEMSTAT_ERROR_KVM_SHORTREAD 8 /* Short kvm_read return. */ + +/* + * Forward declare struct memory_type, which holds per-type properties and + * statistics. This is an opaque type, to be frobbed only from within the + * library, in order to avoid building ABI assumptions into the application. + * Accessor methods should be used to get and sometimes set the fields from + * consumers of the library. + */ +struct memory_type; + +/* + * struct memory_type_list is the head of a list of memory types and + * statistics. + */ +struct memory_type_list; + +__BEGIN_DECLS +/* + * Functions that operate without memory type or memory type list context. + */ +const char *memstat_strerror(int error); + +/* + * Functions for managing memory type and statistics data. + */ +struct memory_type_list *memstat_mtl_alloc(void); +struct memory_type *memstat_mtl_first(struct memory_type_list *list); +struct memory_type *memstat_mtl_next(struct memory_type *mtp); +struct memory_type *memstat_mtl_find(struct memory_type_list *list, + int allocator, const char *name); +void memstat_mtl_free(struct memory_type_list *list); +int memstat_mtl_geterror(struct memory_type_list *list); + +/* + * Functions to retrieve data from a live kernel using sysctl. + */ +int memstat_sysctl_all(struct memory_type_list *list, int flags); +int memstat_sysctl_malloc(struct memory_type_list *list, int flags); +int memstat_sysctl_uma(struct memory_type_list *list, int flags); + +/* + * Functions to retrieve data from a kernel core (or /dev/kmem). + */ +int memstat_kvm_all(struct memory_type_list *list, void *kvm_handle); +int memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle); +int memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle); + +/* + * Accessor methods for struct memory_type. + */ +const char *memstat_get_name(const struct memory_type *mtp); +int memstat_get_allocator(const struct memory_type *mtp); +uint64_t memstat_get_countlimit(const struct memory_type *mtp); +uint64_t memstat_get_byteslimit(const struct memory_type *mtp); +uint64_t memstat_get_sizemask(const struct memory_type *mtp); +uint64_t memstat_get_size(const struct memory_type *mtp); +uint64_t memstat_get_memalloced(const struct memory_type *mtp); +uint64_t memstat_get_memfreed(const struct memory_type *mtp); +uint64_t memstat_get_numallocs(const struct memory_type *mtp); +uint64_t memstat_get_numfrees(const struct memory_type *mtp); +uint64_t memstat_get_bytes(const struct memory_type *mtp); +uint64_t memstat_get_count(const struct memory_type *mtp); +uint64_t memstat_get_free(const struct memory_type *mtp); +uint64_t memstat_get_failures(const struct memory_type *mtp); +uint64_t memstat_get_sleeps(const struct memory_type *mtp); +void *memstat_get_caller_pointer(const struct memory_type *mtp, + int index); +void memstat_set_caller_pointer(struct memory_type *mtp, + int index, void *value); +uint64_t memstat_get_caller_uint64(const struct memory_type *mtp, + int index); +void memstat_set_caller_uint64(struct memory_type *mtp, int index, + uint64_t value); +uint64_t memstat_get_zonefree(const struct memory_type *mtp); +uint64_t memstat_get_kegfree(const struct memory_type *mtp); +uint64_t memstat_get_percpu_memalloced(const struct memory_type *mtp, + int cpu); +uint64_t memstat_get_percpu_memfreed(const struct memory_type *mtp, + int cpu); +uint64_t memstat_get_percpu_numallocs(const struct memory_type *mtp, + int cpu); +uint64_t memstat_get_percpu_numfrees(const struct memory_type *mtp, + int cpu); +uint64_t memstat_get_percpu_sizemask(const struct memory_type *mtp, + int cpu); +void *memstat_get_percpu_caller_pointer( + const struct memory_type *mtp, int cpu, int index); +void memstat_set_percpu_caller_pointer(struct memory_type *mtp, + int cpu, int index, void *value); +uint64_t memstat_get_percpu_caller_uint64( + const struct memory_type *mtp, int cpu, int index); +void memstat_set_percpu_caller_uint64(struct memory_type *mtp, + int cpu, int index, uint64_t value); +uint64_t memstat_get_percpu_free(const struct memory_type *mtp, + int cpu); +__END_DECLS + +#endif /* !_MEMSTAT_H_ */ diff --git a/lib/libuinet_memstat/memstat_all.c b/lib/libuinet_memstat/memstat_all.c new file mode 100644 index 0000000..ef5f475 --- /dev/null +++ b/lib/libuinet_memstat/memstat_all.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat_all.c 148789 2005-08-06 13:54:03Z rwatson $ + */ + +#include +#include + +#include "memstat.h" + +/* + * Query all available memory allocator sources. Currently this consists of + * malloc(9) and UMA(9). + */ +int +memstat_sysctl_all(struct memory_type_list *mtlp, int flags) +{ + + if (memstat_sysctl_malloc(mtlp, flags) < 0) + return (-1); + if (memstat_sysctl_uma(mtlp, flags) < 0) + return (-1); + return (0); +} + +int +memstat_kvm_all(struct memory_type_list *mtlp, void *kvm_handle) +{ + + if (memstat_kvm_malloc(mtlp, kvm_handle) < 0) + return (-1); + if (memstat_kvm_uma(mtlp, kvm_handle) < 0) + return (-1); + return (0); +} diff --git a/lib/libuinet_memstat/memstat_internal.h b/lib/libuinet_memstat/memstat_internal.h new file mode 100644 index 0000000..8053c9b --- /dev/null +++ b/lib/libuinet_memstat/memstat_internal.h @@ -0,0 +1,126 @@ +/*- + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat_internal.h 224569 2011-08-01 09:43:35Z pluknet $ + */ + +#ifndef _MEMSTAT_INTERNAL_H_ +#define _MEMSTAT_INTERNAL_H_ + +/* + * memstat maintains its own internal notion of statistics on each memory + * type, common across UMA and kernel malloc. Some fields are straight from + * the allocator statistics, others are derived when extracted from the + * kernel. A struct memory_type will describe each type supported by an + * allocator. memory_type structures can be chained into lists. + */ +struct memory_type { + /* + * Static properties of type. + */ + int mt_allocator; /* malloc(9), uma(9), etc. */ + char mt_name[MEMTYPE_MAXNAME]; /* name of memory type. */ + + /* + * (Relatively) static zone settings, that don't uniquely identify + * the zone, but also don't change much. + */ + uint64_t mt_countlimit; /* 0, or maximum allocations. */ + uint64_t mt_byteslimit; /* 0, or maximum bytes. */ + uint64_t mt_sizemask; /* malloc: allocated size bitmask. */ + uint64_t mt_size; /* uma: size of objects. */ + + /* + * Zone or type information that includes all caches and any central + * zone state. Depending on the allocator, this may be synthesized + * from several sources, or directly measured. + */ + uint64_t mt_memalloced; /* Bytes allocated over life time. */ + uint64_t mt_memfreed; /* Bytes freed over life time. */ + uint64_t mt_numallocs; /* Allocations over life time. */ + uint64_t mt_numfrees; /* Frees over life time. */ + uint64_t mt_bytes; /* Bytes currently allocated. */ + uint64_t mt_count; /* Number of current allocations. */ + uint64_t mt_free; /* Number of cached free items. */ + uint64_t mt_failures; /* Number of allocation failures. */ + uint64_t mt_sleeps; /* Number of allocation sleeps. */ + + /* + * Caller-owned memory. + */ + void *mt_caller_pointer[MEMSTAT_MAXCALLER]; /* Pointers. */ + uint64_t mt_caller_uint64[MEMSTAT_MAXCALLER]; /* Integers. */ + + /* + * For allocators making use of per-CPU caches, we also provide raw + * statistics from the central allocator and each per-CPU cache, + * which (combined) sometimes make up the above general statistics. + * + * First, central zone/type state, all numbers excluding any items + * cached in per-CPU caches. + * + * XXXRW: Might be desirable to separately expose allocation stats + * from zone, which should (combined with per-cpu) add up to the + * global stats above. + */ + uint64_t mt_zonefree; /* Free items in zone. */ + uint64_t mt_kegfree; /* Free items in keg. */ + + /* + * Per-CPU measurements fall into two categories: per-CPU allocation, + * and per-CPU cache state. + */ + struct mt_percpu_alloc_s { + uint64_t mtp_memalloced;/* Per-CPU mt_memalloced. */ + uint64_t mtp_memfreed; /* Per-CPU mt_memfreed. */ + uint64_t mtp_numallocs; /* Per-CPU mt_numallocs. */ + uint64_t mtp_numfrees; /* Per-CPU mt_numfrees. */ + uint64_t mtp_sizemask; /* Per-CPU mt_sizemask. */ + void *mtp_caller_pointer[MEMSTAT_MAXCALLER]; + uint64_t mtp_caller_uint64[MEMSTAT_MAXCALLER]; + } *mt_percpu_alloc; + + struct mt_percpu_cache_s { + uint64_t mtp_free; /* Per-CPU cache free items. */ + } *mt_percpu_cache; + + LIST_ENTRY(memory_type) mt_list; /* List of types. */ +}; + +/* + * Description of struct memory_type_list is in memstat.h. + */ +struct memory_type_list { + LIST_HEAD(, memory_type) mtl_list; + int mtl_error; +}; + +void _memstat_mtl_empty(struct memory_type_list *list); +struct memory_type *_memstat_mt_allocate(struct memory_type_list *list, + int allocator, const char *name, int maxcpus); +void _memstat_mt_reset_stats(struct memory_type *mtp, + int maxcpus); + +#endif /* !_MEMSTAT_INTERNAL_H_ */ diff --git a/lib/libuinet_memstat/memstat_malloc.c b/lib/libuinet_memstat/memstat_malloc.c new file mode 100644 index 0000000..d02d40c --- /dev/null +++ b/lib/libuinet_memstat/memstat_malloc.c @@ -0,0 +1,405 @@ +/*- + * Copyright (c) 2005 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat_malloc.c 224569 2011-08-01 09:43:35Z pluknet $ + */ + +#include +#include +#include +#include + +#include +#include +//#include +#include +#include +#include +#include + +#include "memstat.h" +#include "memstat_internal.h" + +static struct nlist namelist[] = { +#define X_KMEMSTATISTICS 0 + { .n_name = "_kmemstatistics" }, +#define X_MP_MAXCPUS 1 + { .n_name = "_mp_maxcpus" }, + { .n_name = "" }, +}; + +/* + * Extract malloc(9) statistics from the running kernel, and store all memory + * type information in the passed list. For each type, check the list for an + * existing entry with the right name/allocator -- if present, update that + * entry. Otherwise, add a new entry. On error, the entire list will be + * cleared, as entries will be in an inconsistent state. + * + * To reduce the level of work for a list that starts empty, we keep around a + * hint as to whether it was empty when we began, so we can avoid searching + * the list for entries to update. Updates are O(n^2) due to searching for + * each entry before adding it. + */ +int +memstat_sysctl_malloc(struct memory_type_list *list, int flags) +{ + struct malloc_type_stream_header *mtshp; + struct malloc_type_header *mthp; + struct malloc_type_stats *mtsp; + struct memory_type *mtp; + int count, hint_dontsearch, i, j, maxcpus; + char *buffer, *p; + size_t size; + + hint_dontsearch = LIST_EMPTY(&list->mtl_list); + + /* + * Query the number of CPUs, number of malloc types so that we can + * guess an initial buffer size. We loop until we succeed or really + * fail. Note that the value of maxcpus we query using sysctl is not + * the version we use when processing the real data -- that is read + * from the header. + */ +retry: + size = sizeof(maxcpus); + if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) { + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + if (size != sizeof(maxcpus)) { + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + + size = sizeof(count); + if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) { + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_VERSION; + return (-1); + } + if (size != sizeof(count)) { + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + + size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) * + maxcpus); + + buffer = malloc(size); + if (buffer == NULL) { + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) { + /* + * XXXRW: ENOMEM is an ambiguous return, we should bound the + * number of loops, perhaps. + */ + if (errno == ENOMEM) { + free(buffer); + goto retry; + } + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + + if (size == 0) { + free(buffer); + return (0); + } + + if (size < sizeof(*mtshp)) { + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + p = buffer; + mtshp = (struct malloc_type_stream_header *)p; + p += sizeof(*mtshp); + + if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) { + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + + /* + * For the remainder of this function, we are quite trusting about + * the layout of structures and sizes, since we've determined we have + * a matching version and acceptable CPU count. + */ + maxcpus = mtshp->mtsh_maxcpus; + count = mtshp->mtsh_count; + for (i = 0; i < count; i++) { + mthp = (struct malloc_type_header *)p; + p += sizeof(*mthp); + + if (hint_dontsearch == 0) { + mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, + mthp->mth_name); + } else + mtp = NULL; + if (mtp == NULL) + mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC, + mthp->mth_name, maxcpus); + if (mtp == NULL) { + _memstat_mtl_empty(list); + free(buffer); + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + /* + * Reset the statistics on a current node. + */ + _memstat_mt_reset_stats(mtp, maxcpus); + + for (j = 0; j < maxcpus; j++) { + mtsp = (struct malloc_type_stats *)p; + p += sizeof(*mtsp); + + /* + * Sumarize raw statistics across CPUs into coalesced + * statistics. + */ + mtp->mt_memalloced += mtsp->mts_memalloced; + mtp->mt_memfreed += mtsp->mts_memfreed; + mtp->mt_numallocs += mtsp->mts_numallocs; + mtp->mt_numfrees += mtsp->mts_numfrees; + mtp->mt_sizemask |= mtsp->mts_size; + + /* + * Copies of per-CPU statistics. + */ + mtp->mt_percpu_alloc[j].mtp_memalloced = + mtsp->mts_memalloced; + mtp->mt_percpu_alloc[j].mtp_memfreed = + mtsp->mts_memfreed; + mtp->mt_percpu_alloc[j].mtp_numallocs = + mtsp->mts_numallocs; + mtp->mt_percpu_alloc[j].mtp_numfrees = + mtsp->mts_numfrees; + mtp->mt_percpu_alloc[j].mtp_sizemask = + mtsp->mts_size; + } + + /* + * Derived cross-CPU statistics. + */ + mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; + mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; + } + + free(buffer); + + return (0); +} + +#if 0 +static int +kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, + size_t offset) +{ + ssize_t ret; + + ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, + size); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != size) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + return (0); +} + +static int +kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) +{ + ssize_t ret; + int i; + + for (i = 0; i < buflen; i++) { + ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) + + i, &(buffer[i]), sizeof(char)); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != sizeof(char)) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + if (buffer[i] == '\0') + return (0); + } + /* Truncate. */ + buffer[i-1] = '\0'; + return (0); +} + +static int +kread_symbol(kvm_t *kvm, int index, void *address, size_t size, + size_t offset) +{ + ssize_t ret; + + ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != size) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + return (0); +} + +int +memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle) +{ + struct memory_type *mtp; + void *kmemstatistics; + int hint_dontsearch, j, mp_maxcpus, ret; + char name[MEMTYPE_MAXNAME]; + struct malloc_type_stats *mts, *mtsp; + struct malloc_type_internal *mtip; + struct malloc_type type, *typep; + kvm_t *kvm; + + kvm = (kvm_t *)kvm_handle; + + hint_dontsearch = LIST_EMPTY(&list->mtl_list); + + if (kvm_nlist(kvm, namelist) != 0) { + list->mtl_error = MEMSTAT_ERROR_KVM; + return (-1); + } + + if (namelist[X_KMEMSTATISTICS].n_type == 0 || + namelist[X_KMEMSTATISTICS].n_value == 0) { + list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; + return (-1); + } + + ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus, + sizeof(mp_maxcpus), 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + + ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics, + sizeof(kmemstatistics), 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + + mts = malloc(sizeof(struct malloc_type_stats) * mp_maxcpus); + if (mts == NULL) { + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) { + ret = kread(kvm, typep, &type, sizeof(type), 0); + if (ret != 0) { + _memstat_mtl_empty(list); + free(mts); + list->mtl_error = ret; + return (-1); + } + ret = kread_string(kvm, (void *)type.ks_shortdesc, name, + MEMTYPE_MAXNAME); + if (ret != 0) { + _memstat_mtl_empty(list); + free(mts); + list->mtl_error = ret; + return (-1); + } + + /* + * Since our compile-time value for MAXCPU may differ from the + * kernel's, we populate our own array. + */ + mtip = type.ks_handle; + ret = kread(kvm, mtip->mti_stats, mts, mp_maxcpus * + sizeof(struct malloc_type_stats), 0); + if (ret != 0) { + _memstat_mtl_empty(list); + free(mts); + list->mtl_error = ret; + return (-1); + } + + if (hint_dontsearch == 0) { + mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name); + } else + mtp = NULL; + if (mtp == NULL) + mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC, + name, mp_maxcpus); + if (mtp == NULL) { + _memstat_mtl_empty(list); + free(mts); + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + /* + * This logic is replicated from kern_malloc.c, and should + * be kept in sync. + */ + _memstat_mt_reset_stats(mtp, mp_maxcpus); + for (j = 0; j < mp_maxcpus; j++) { + mtsp = &mts[j]; + mtp->mt_memalloced += mtsp->mts_memalloced; + mtp->mt_memfreed += mtsp->mts_memfreed; + mtp->mt_numallocs += mtsp->mts_numallocs; + mtp->mt_numfrees += mtsp->mts_numfrees; + mtp->mt_sizemask |= mtsp->mts_size; + + mtp->mt_percpu_alloc[j].mtp_memalloced = + mtsp->mts_memalloced; + mtp->mt_percpu_alloc[j].mtp_memfreed = + mtsp->mts_memfreed; + mtp->mt_percpu_alloc[j].mtp_numallocs = + mtsp->mts_numallocs; + mtp->mt_percpu_alloc[j].mtp_numfrees = + mtsp->mts_numfrees; + mtp->mt_percpu_alloc[j].mtp_sizemask = + mtsp->mts_size; + } + + mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; + mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; + } + + return (0); +} + +#endif diff --git a/lib/libuinet_memstat/memstat_uma.c b/lib/libuinet_memstat/memstat_uma.c new file mode 100644 index 0000000..06e1074 --- /dev/null +++ b/lib/libuinet_memstat/memstat_uma.c @@ -0,0 +1,477 @@ +/*- + * Copyright (c) 2005-2006 Robert N. M. Watson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: stable/9/lib/libmemstat/memstat_uma.c 242365 2012-10-30 17:05:12Z mdf $ + */ + +#include +#include +//#include + +//#include +//#include + +//#include +//#include + +#include "l_uma.h" +#include "l_uma_int.h" +#include "u_sysctl.h" + +#include +#include +//#include +#include +#include +#include +#include +#include +#include + +#include "memstat.h" +#include "memstat_internal.h" + +static struct nlist namelist[] = { +#define X_UMA_KEGS 0 + { .n_name = "_uma_kegs" }, +#define X_MP_MAXID 1 + { .n_name = "_mp_maxid" }, +#define X_ALL_CPUS 2 + { .n_name = "_all_cpus" }, + { .n_name = "" }, +}; + +/* + * Extract uma(9) statistics from the running kernel, and store all memory + * type information in the passed list. For each type, check the list for an + * existing entry with the right name/allocator -- if present, update that + * entry. Otherwise, add a new entry. On error, the entire list will be + * cleared, as entries will be in an inconsistent state. + * + * To reduce the level of work for a list that starts empty, we keep around a + * hint as to whether it was empty when we began, so we can avoid searching + * the list for entries to update. Updates are O(n^2) due to searching for + * each entry before adding it. + */ +int +memstat_sysctl_uma(struct memory_type_list *list, int flags) +{ + struct uma_stream_header *ushp; + struct uma_type_header *uthp; + struct uma_percpu_stat *upsp; + struct memory_type *mtp; + int count, hint_dontsearch, i, j, maxcpus, maxid; + char *buffer, *p; + size_t size; + int fd; + + fd = u_sysctl_open(); + if (fd < 0) { + fprintf(stderr, "%s: u_sysctl_open() failed\n", __func__); + return (-1); + } + + hint_dontsearch = LIST_EMPTY(&list->mtl_list); + + /* + * Query the number of CPUs, number of malloc types so that we can + * guess an initial buffer size. We loop until we succeed or really + * fail. Note that the value of maxcpus we query using sysctl is not + * the version we use when processing the real data -- that is read + * from the header. + */ +retry: + size = sizeof(maxid); + if (u_sysctlbyname(fd, "kern.smp.maxid", &maxid, &size, NULL, 0) < 0) { + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + if (size != sizeof(maxid)) { + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + + size = sizeof(count); + if (u_sysctlbyname(fd, "vm.zone_count", &count, &size, NULL, 0) < 0) { + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_VERSION; + return (-1); + } + if (size != sizeof(count)) { + list->mtl_error = MEMSTAT_ERROR_DATAERROR; + return (-1); + } + + size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) * + (maxid + 1)); + + buffer = malloc(size); + if (buffer == NULL) { + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + if (u_sysctlbyname(fd, "vm.zone_stats", buffer, &size, NULL, 0) < 0) { + /* + * XXXRW: ENOMEM is an ambiguous return, we should bound the + * number of loops, perhaps. + */ + if (errno == ENOMEM) { + free(buffer); + goto retry; + } + if (errno == EACCES || errno == EPERM) + list->mtl_error = MEMSTAT_ERROR_PERMISSION; + else + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + + if (size == 0) { + free(buffer); + return (0); + } + + if (size < sizeof(*ushp)) { + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + p = buffer; + ushp = (struct uma_stream_header *)p; + p += sizeof(*ushp); + + if (ushp->ush_version != UMA_STREAM_VERSION) { + list->mtl_error = MEMSTAT_ERROR_VERSION; + free(buffer); + return (-1); + } + + /* + * For the remainder of this function, we are quite trusting about + * the layout of structures and sizes, since we've determined we have + * a matching version and acceptable CPU count. + */ + maxcpus = ushp->ush_maxcpus; + count = ushp->ush_count; + for (i = 0; i < count; i++) { + uthp = (struct uma_type_header *)p; + p += sizeof(*uthp); + + if (hint_dontsearch == 0) { + mtp = memstat_mtl_find(list, ALLOCATOR_UMA, + uthp->uth_name); + } else + mtp = NULL; + if (mtp == NULL) + mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, + uthp->uth_name, maxid + 1); + if (mtp == NULL) { + _memstat_mtl_empty(list); + free(buffer); + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + + /* + * Reset the statistics on a current node. + */ + _memstat_mt_reset_stats(mtp, maxid + 1); + + mtp->mt_numallocs = uthp->uth_allocs; + mtp->mt_numfrees = uthp->uth_frees; + mtp->mt_failures = uthp->uth_fails; + mtp->mt_sleeps = uthp->uth_sleeps; + + for (j = 0; j < maxcpus; j++) { + upsp = (struct uma_percpu_stat *)p; + p += sizeof(*upsp); + + mtp->mt_percpu_cache[j].mtp_free = + upsp->ups_cache_free; + mtp->mt_free += upsp->ups_cache_free; + mtp->mt_numallocs += upsp->ups_allocs; + mtp->mt_numfrees += upsp->ups_frees; + } + + mtp->mt_size = uthp->uth_size; + mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size; + mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size; + mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; + mtp->mt_countlimit = uthp->uth_limit; + mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size; + + mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; + mtp->mt_zonefree = uthp->uth_zone_free; + + /* + * UMA secondary zones share a keg with the primary zone. To + * avoid double-reporting of free items, report keg free + * items only in the primary zone. + */ + if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) { + mtp->mt_kegfree = uthp->uth_keg_free; + mtp->mt_free += mtp->mt_kegfree; + } + mtp->mt_free += mtp->mt_zonefree; + } + + free(buffer); + + return (0); +} + +#if 0 +static int +kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size, + size_t offset) +{ + ssize_t ret; + + ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address, + size); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != size) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + return (0); +} + +static int +kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen) +{ + ssize_t ret; + int i; + + for (i = 0; i < buflen; i++) { + ret = kvm_read(kvm, (unsigned long)kvm_pointer + i, + &(buffer[i]), sizeof(char)); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != sizeof(char)) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + if (buffer[i] == '\0') + return (0); + } + /* Truncate. */ + buffer[i-1] = '\0'; + return (0); +} + +static int +kread_symbol(kvm_t *kvm, int index, void *address, size_t size, + size_t offset) +{ + ssize_t ret; + + ret = kvm_read(kvm, namelist[index].n_value + offset, address, size); + if (ret < 0) + return (MEMSTAT_ERROR_KVM); + if ((size_t)ret != size) + return (MEMSTAT_ERROR_KVM_SHORTREAD); + return (0); +} + +/* + * memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts + * UMA(9) statistics from a kernel core/memory file. + */ +int +memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) +{ + LIST_HEAD(, uma_keg) uma_kegs; + struct memory_type *mtp; + struct uma_bucket *ubp, ub; + struct uma_cache *ucp, *ucp_array; + struct uma_zone *uzp, uz; + struct uma_keg *kzp, kz; + int hint_dontsearch, i, mp_maxid, ret; + char name[MEMTYPE_MAXNAME]; + cpuset_t all_cpus; + long cpusetsize; + kvm_t *kvm; + + kvm = (kvm_t *)kvm_handle; + hint_dontsearch = LIST_EMPTY(&list->mtl_list); + if (kvm_nlist(kvm, namelist) != 0) { + list->mtl_error = MEMSTAT_ERROR_KVM; + return (-1); + } + if (namelist[X_UMA_KEGS].n_type == 0 || + namelist[X_UMA_KEGS].n_value == 0) { + list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; + return (-1); + } + ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { + list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; + return (-1); + } + CPU_ZERO(&all_cpus); + ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } + ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1)); + if (ucp_array == NULL) { + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp = + LIST_NEXT(&kz, uk_link)) { + ret = kread(kvm, kzp, &kz, sizeof(kz), 0); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp = + LIST_NEXT(&uz, uz_link)) { + ret = kread(kvm, uzp, &uz, sizeof(uz), 0); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + ret = kread(kvm, uzp, ucp_array, + sizeof(struct uma_cache) * (mp_maxid + 1), + offsetof(struct uma_zone, uz_cpu[0])); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + ret = kread_string(kvm, uz.uz_name, name, + MEMTYPE_MAXNAME); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + if (hint_dontsearch == 0) { + mtp = memstat_mtl_find(list, ALLOCATOR_UMA, + name); + } else + mtp = NULL; + if (mtp == NULL) + mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA, + name, mp_maxid + 1); + if (mtp == NULL) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = MEMSTAT_ERROR_NOMEMORY; + return (-1); + } + /* + * Reset the statistics on a current node. + */ + _memstat_mt_reset_stats(mtp, mp_maxid + 1); + mtp->mt_numallocs = uz.uz_allocs; + mtp->mt_numfrees = uz.uz_frees; + mtp->mt_failures = uz.uz_fails; + mtp->mt_sleeps = uz.uz_sleeps; + if (kz.uk_flags & UMA_ZFLAG_INTERNAL) + goto skip_percpu; + for (i = 0; i < mp_maxid + 1; i++) { + if (!CPU_ISSET(i, &all_cpus)) + continue; + ucp = &ucp_array[i]; + mtp->mt_numallocs += ucp->uc_allocs; + mtp->mt_numfrees += ucp->uc_frees; + + if (ucp->uc_allocbucket != NULL) { + ret = kread(kvm, ucp->uc_allocbucket, + &ub, sizeof(ub), 0); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + mtp->mt_free += ub.ub_cnt; + } + if (ucp->uc_freebucket != NULL) { + ret = kread(kvm, ucp->uc_freebucket, + &ub, sizeof(ub), 0); + if (ret != 0) { + free(ucp_array); + _memstat_mtl_empty(list); + list->mtl_error = ret; + return (-1); + } + mtp->mt_free += ub.ub_cnt; + } + } +skip_percpu: + mtp->mt_size = kz.uk_size; + mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size; + mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size; + mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed; + if (kz.uk_ppera > 1) + mtp->mt_countlimit = kz.uk_maxpages / + kz.uk_ipers; + else + mtp->mt_countlimit = kz.uk_maxpages * + kz.uk_ipers; + mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; + mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; + for (ubp = LIST_FIRST(&uz.uz_full_bucket); ubp != + NULL; ubp = LIST_NEXT(&ub, ub_link)) { + ret = kread(kvm, ubp, &ub, sizeof(ub), 0); + mtp->mt_zonefree += ub.ub_cnt; + } + if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && + LIST_FIRST(&kz.uk_zones) != uzp)) { + mtp->mt_kegfree = kz.uk_free; + mtp->mt_free += mtp->mt_kegfree; + } + mtp->mt_free += mtp->mt_zonefree; + } + } + free(ucp_array); + return (0); +} +#endif diff --git a/lib/libuinet_memstat/u_sysctl.c b/lib/libuinet_memstat/u_sysctl.c new file mode 100644 index 0000000..645c534 --- /dev/null +++ b/lib/libuinet_memstat/u_sysctl.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include /* for round_page() */ + +#include + +#include "uinet_host_sysctl_api.h" +#include "uinet_nv.h" + +/* + * XXX TODO: + * + * + the sysctl shm stuff should be a transaction based thing + * + the API should be modified so it returns the buffer, and then + * has a "finish" function that frees it if appropriate - that + * way for shm buffers we don't need to double allocate things. + * + .. we shouldn't be doing all the mmap / munmap stuff - it + * will cause IPI shootdowns as the memory map in the libuinet + * using code has its memory map change. I'll solve that + * later. + */ + +static int +u_sysctl_do_sysctl(struct nvlist *nvl, int ns, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl_resp = NULL; + int retval = 0; + int r_errno; + const char *rbuf; + size_t r_len; + + /* XXX Eventually this should be in a sysctl transaction struct */ + int shm_fd = -1; + char *shm_mem = NULL; + size_t shm_len = 0; + char shm_path[128]; + + /* Setup request and response buffer information */ + + /* + * If the requested size is provided and it's greater than the + * maximum size allowed, we'll flip to using shm + */ + if (oldlenp != NULL && *oldlenp >= U_SYSCTL_MAX_REQ_BUF_LEN) { + /* Construct a shm path */ + /* XXX should make this less guessable */ + snprintf(shm_path, 128, "/sysctl.%ld", (long) arc4random()); + + /* Open it */ + shm_fd = shm_open(shm_path, O_CREAT | O_RDWR, 0640); + if (shm_fd < 0) { + warn("shm_open (%s)", shm_path); + retval = -1; + goto done; + } + + /* + * Calculate a mmap size that's a multiple of + * the system page length. + */ + shm_len = round_page(*oldlenp); + + /* make it that big! */ + if (ftruncate(shm_fd, shm_len) < 0) { + warn("ftruncate"); + goto done; + } + + /* mmap it */ + shm_mem = mmap(NULL, shm_len, PROT_READ | PROT_WRITE, + 0, shm_fd, 0); + if (shm_mem == NULL) { + warn("mmap"); + goto done; + } + + /* add the shm path to the outbound request */ + nvlist_add_string(nvl, "sysctl_respbuf_shm_path", shm_path); + nvlist_add_number(nvl, "sysctl_respbuf_shm_len", shm_len); + } + + /* + * Writing a value may pass in a NULL oldlenp, so only conditionally + * send it. + */ + if (oldlenp != NULL) + nvlist_add_number(nvl, "sysctl_respbuf_len", *oldlenp); + + if (newlen > 0) { + nvlist_add_binary(nvl, "sysctl_reqbuf", newp, newlen); + } + + /* Send command */ + if (nvlist_send(ns, nvl) < 0) { + warn("nvlist_send"); + retval = -1; + goto done; + } + + /* Read response */ + nvl_resp = nvlist_recv(ns); + if (nvl_resp == NULL) { + warn("nvlist_recv"); + retval = -1; + goto done; + } + + if (! nvlist_exists_number(nvl_resp, "sysctl_errno")) { + fprintf(stderr, "response: no errno?\n"); + goto done; + } + r_errno = (int) nvlist_get_number(nvl_resp, "sysctl_errno"); + + /* XXX validate r_len versus oldlenp */ + if (nvlist_exists_binary(nvl_resp, "sysctl_respbuf")) { + rbuf = nvlist_get_binary(nvl_resp, "sysctl_respbuf", &r_len); + memcpy(oldp, rbuf, r_len); + } else if (shm_mem != NULL) { + memcpy(oldp, shm_mem, r_len); + r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_shm_len"); + } else if (nvlist_exists_number(nvl_resp, "sysctl_respbuf_len")) { + r_len = nvlist_get_number(nvl_resp, "sysctl_respbuf_len"); + } else { + r_len = 0; + } + + if (oldlenp != NULL) + *oldlenp = r_len; + + if (r_errno == 0) { + retval = 0; + } else { + retval = -1; + errno = r_errno; + } + +done: + if (shm_mem != NULL) + munmap(shm_mem, shm_len); + if (shm_fd != -1) { + close(shm_fd); + shm_unlink(shm_path); + } + if (nvl_resp) + nvlist_destroy(nvl_resp); + return (retval); +} + +int +u_sysctlbyname(int ns, + const char *name, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl = NULL; + int retval = 0; + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_str request */ + nvlist_add_string(nvl, "type", "sysctl_str"); + nvlist_add_string(nvl, "sysctl_str", name); + + /* XXX this sets errno as appropriate */ + retval = u_sysctl_do_sysctl(nvl, ns, oldp, oldlenp, newp, newlen); + +done: + if (nvl) + nvlist_destroy(nvl); + return (retval); +} + +int +u_sysctl(int ns, + int *oid, + u_int namelen, + void *oldp, + size_t *oldlenp, + const void *newp, + size_t newlen) +{ + nvlist_t *nvl = NULL, *nvl_resp = NULL; + int retval = 0; + const char *rbuf; + size_t r_len; + int r_errno; + +#if 0 + printf("sysctl: nl=%d, oldp=%p, oldlen=%d, newp=%p, newlen=%d\n", + namelen, + oldp, + (int) *oldlenp, + newp, + (int) newlen); +#endif + + /* Create nvlist to populate the request into */ + nvl = nvlist_create(0); + if (nvl == NULL) { + warn("nvlist_create"); + retval = -1; + goto done; + } + + /* Create nvlist for a sysctl_oid request */ + nvlist_add_string(nvl, "type", "sysctl_oid"); + nvlist_add_binary(nvl, "sysctl_oid", oid, namelen * sizeof(int)); + + /* XXX this sets errno as appropriate */ + retval = u_sysctl_do_sysctl(nvl, ns, oldp, oldlenp, newp, newlen); + +done: + if (nvl) + nvlist_destroy(nvl); + return (retval); +} + +int +u_sysctl_open(void) +{ + int s; + struct sockaddr_un sun; + int r; + + /* Connect to the destination socket */ + bzero(&sun, sizeof(sun)); + + strcpy(sun.sun_path, "/tmp/sysctl.sock"); + sun.sun_len = 0; + sun.sun_family = AF_UNIX; + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s < 0) { + err(1, "socket"); + } + + r = connect(s, (struct sockaddr *) &sun, sizeof(struct sockaddr_un)); + if (r < 0) { + err(1, "connect"); + } + + return (s); +} diff --git a/lib/libuinet_memstat/u_sysctl.h b/lib/libuinet_memstat/u_sysctl.h new file mode 100644 index 0000000..3f1b20e --- /dev/null +++ b/lib/libuinet_memstat/u_sysctl.h @@ -0,0 +1,12 @@ +#ifndef __U_SYSCTL_H__ +#define __U_SYSCTL_H__ + +extern int u_sysctlbyname(int ns, const char *name, void *oldp, + size_t *oldlenp, const void *newp, size_t newlen); + +extern int u_sysctl(int ns, int *oid, u_int namelen, void *oldp, + size_t *oldlenp, const void *newp, size_t newlen); + +extern int u_sysctl_open(void); + +#endif /* __U_SYSCTL_H__ */ From 021e5748b057a235a66162f5e6b7430f7d7e4e47 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 08:48:00 -0700 Subject: [PATCH 085/148] Migrate the malloc path to use u_sysctl. --- lib/libuinet_memstat/memstat_malloc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/libuinet_memstat/memstat_malloc.c b/lib/libuinet_memstat/memstat_malloc.c index d02d40c..fea63fa 100644 --- a/lib/libuinet_memstat/memstat_malloc.c +++ b/lib/libuinet_memstat/memstat_malloc.c @@ -29,7 +29,7 @@ #include #include #include -#include +//#include #include #include @@ -42,6 +42,9 @@ #include "memstat.h" #include "memstat_internal.h" +#include "u_sysctl.h" +#include "uinet_host_sysctl_api.h" + static struct nlist namelist[] = { #define X_KMEMSTATISTICS 0 { .n_name = "_kmemstatistics" }, @@ -72,6 +75,13 @@ memstat_sysctl_malloc(struct memory_type_list *list, int flags) int count, hint_dontsearch, i, j, maxcpus; char *buffer, *p; size_t size; + int fd; + + fd = u_sysctl_open(); + if (fd < 0) { + fprintf(stderr, "%s: u_sysctl_open() failed\n", __func__); + return (-1); + } hint_dontsearch = LIST_EMPTY(&list->mtl_list); @@ -84,7 +94,7 @@ memstat_sysctl_malloc(struct memory_type_list *list, int flags) */ retry: size = sizeof(maxcpus); - if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) { + if (u_sysctlbyname(fd, "kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) { if (errno == EACCES || errno == EPERM) list->mtl_error = MEMSTAT_ERROR_PERMISSION; else @@ -97,7 +107,7 @@ memstat_sysctl_malloc(struct memory_type_list *list, int flags) } size = sizeof(count); - if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) { + if (u_sysctlbyname(fd, "kern.malloc_count", &count, &size, NULL, 0) < 0) { if (errno == EACCES || errno == EPERM) list->mtl_error = MEMSTAT_ERROR_PERMISSION; else @@ -118,7 +128,7 @@ memstat_sysctl_malloc(struct memory_type_list *list, int flags) return (-1); } - if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) { + if (u_sysctlbyname(fd, "kern.malloc_stats", buffer, &size, NULL, 0) < 0) { /* * XXXRW: ENOMEM is an ambiguous return, we should bound the * number of loops, perhaps. From 602a81d08e34599e5711001057d4a2d338bcaa86 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 09:30:40 -0700 Subject: [PATCH 086/148] fix off-by-one comparison. --- lib/libuinet/uinet_host_sysctl_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index eabcf62..83c39f1 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -255,7 +255,7 @@ passive_sysctl_handle_resp(struct u_sysctl_state_t *us) * is within bounds for the response back to the * client. */ - if (us->wbuf != NULL && us->error == 0 && us->rval >= us->wbuf_len) { + if (us->wbuf != NULL && us->error == 0 && us->rval > us->wbuf_len) { UINET_SYSCTL_DPRINTF("%s: fd %d: rval (%llu) > wbuf_len (%llu)\n", __func__, us->ns, From 8146ed875910493072afc49d92931c10b3238acf Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 09:30:48 -0700 Subject: [PATCH 087/148] Add SNMP maxid. Required for memstat consumer. --- lib/libuinet/uinet_subr_smp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/libuinet/uinet_subr_smp.c b/lib/libuinet/uinet_subr_smp.c index 6394681..f43252d 100644 --- a/lib/libuinet/uinet_subr_smp.c +++ b/lib/libuinet/uinet_subr_smp.c @@ -37,6 +37,7 @@ #include #include #include +#include /* This is used in modules that need to work in both SMP and UP. */ @@ -49,6 +50,9 @@ int mp_maxcpus = MAXCPU; volatile int smp_started; u_int mp_maxid; +SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, "Kernel SMP"); +SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, + "Max CPU ID."); /* XXX temporary until final pcpu approach is determined */ struct mtx uinet_pcpu_locks[MAXCPU]; From c06361cbbd5f76f7c0e9fcc703f424131baf2045 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 09:57:54 -0700 Subject: [PATCH 088/148] Add in malloc stats - in case libuinet implements malloc tracking. --- bin/vmstat/vmstat_main.c | 15 +++++++++++ bin/vmstat/vmstat_memstat.c | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 bin/vmstat/vmstat_main.c create mode 100644 bin/vmstat/vmstat_memstat.c diff --git a/bin/vmstat/vmstat_main.c b/bin/vmstat/vmstat_main.c new file mode 100644 index 0000000..89a0d3d --- /dev/null +++ b/bin/vmstat/vmstat_main.c @@ -0,0 +1,15 @@ + +#include +#include + +extern int domemstat_zone(); +extern int domemstat_malloc(); + +int +main(int argc, const char *argv[]) +{ + + /* libuinet doesn't track malloc statistics at the moment */ + //domemstat_malloc(); + domemstat_zone(); +} diff --git a/bin/vmstat/vmstat_memstat.c b/bin/vmstat/vmstat_memstat.c new file mode 100644 index 0000000..ec032eb --- /dev/null +++ b/bin/vmstat/vmstat_memstat.c @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include +#include + +#include +#include "memstat.h" + +void +domemstat_malloc(void) +{ + struct memory_type_list *mtlp; + struct memory_type *mtp; + int error, first, i; + + mtlp = memstat_mtl_alloc(); + if (mtlp == NULL) { + warn("memstat_mtl_alloc"); + return; + } + if (memstat_sysctl_malloc(mtlp, 0) < 0) { + warnx("memstat_sysctl_malloc: %s", + memstat_strerror(memstat_mtl_geterror(mtlp))); + return; + } + printf("%13s %5s %6s %7s %8s Size(s)\n", "Type", "InUse", "MemUse", + "HighUse", "Requests"); + for (mtp = memstat_mtl_first(mtlp); mtp != NULL; + mtp = memstat_mtl_next(mtp)) { + if (memstat_get_numallocs(mtp) == 0 && + memstat_get_count(mtp) == 0) + continue; + printf("%13s %5llu %5llu K %7s %8llu ", + memstat_get_name(mtp), + (unsigned long long) memstat_get_count(mtp), + (unsigned long long) (memstat_get_bytes(mtp) + 1023) / 1024, "-", + (unsigned long long) memstat_get_numallocs(mtp)); + first = 1; + for (i = 0; i < 32; i++) { + if (memstat_get_sizemask(mtp) & (1 << i)) { + if (!first) + printf(","); + printf("%d", 1 << (i + 4)); + first = 0; + } + } + printf("\n"); + } + memstat_mtl_free(mtlp); +} From c2885e01b65e22da8b5c20d4dc683487363331fa Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 17 Jul 2014 10:03:08 -0700 Subject: [PATCH 089/148] Add missing files! --- bin/vmstat/Makefile | 14 + bin/vmstat/vmstat.c | 1356 ++++++++++++++++++++++++++++++++++ bin/vmstat/vmstat_zmemstat.c | 48 ++ 3 files changed, 1418 insertions(+) create mode 100644 bin/vmstat/Makefile create mode 100644 bin/vmstat/vmstat.c create mode 100644 bin/vmstat/vmstat_zmemstat.c diff --git a/bin/vmstat/Makefile b/bin/vmstat/Makefile new file mode 100644 index 0000000..168d718 --- /dev/null +++ b/bin/vmstat/Makefile @@ -0,0 +1,14 @@ +TOPDIR?=${CURDIR}/../.. + +PROG=vmstat + +SRCS=vmstat_zmemstat.c vmstat_memstat.c vmstat_main.c + +UINET_LIBS=uinet + +CFLAGS= -I${TOPDIR}/lib/libuinet_memstat +LDADD= ${TOPDIR}/lib/libuinet_memstat/libuinet_memstat.a ${TOPDIR}/lib/libuinetnv/libuinetnv.a + +DEBUG_FLAGS=-g -O0 + +include ${TOPDIR}/mk/prog.mk diff --git a/bin/vmstat/vmstat.c b/bin/vmstat/vmstat.c new file mode 100644 index 0000000..c3a68d3 --- /dev/null +++ b/bin/vmstat/vmstat.c @@ -0,0 +1,1356 @@ +/* + * Copyright (c) 1980, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static const char copyright[] = +"@(#) Copyright (c) 1980, 1986, 1991, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#if 0 +#ifndef lint +static char sccsid[] = "@(#)vmstat.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ +#endif + +#include +__FBSDID("$FreeBSD: stable/10/usr.bin/vmstat/vmstat.c 246034 2013-01-28 12:58:37Z zont $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static char da[] = "da"; + +static struct nlist namelist[] = { +#define X_SUM 0 + { "_cnt" }, +#define X_HZ 1 + { "_hz" }, +#define X_STATHZ 2 + { "_stathz" }, +#define X_NCHSTATS 3 + { "_nchstats" }, +#define X_INTRNAMES 4 + { "_intrnames" }, +#define X_SINTRNAMES 5 + { "_sintrnames" }, +#define X_INTRCNT 6 + { "_intrcnt" }, +#define X_SINTRCNT 7 + { "_sintrcnt" }, +#ifdef notyet +#define X_DEFICIT XXX + { "_deficit" }, +#define X_REC XXX + { "_rectime" }, +#define X_PGIN XXX + { "_pgintime" }, +#define X_XSTATS XXX + { "_xstats" }, +#define X_END XXX +#else +#define X_END 8 +#endif + { "" }, +}; + +static struct statinfo cur, last; +static int num_devices, maxshowdevs; +static long generation; +static struct device_selection *dev_select; +static int num_selected; +static struct devstat_match *matches; +static int num_matches = 0; +static int num_devices_specified, num_selections; +static long select_generation; +static char **specified_devices; +static devstat_select_mode select_mode; + +static struct vmmeter sum, osum; + +#define VMSTAT_DEFAULT_LINES 20 /* Default number of `winlines'. */ +volatile sig_atomic_t wresized; /* Tty resized, when non-zero. */ +static int winlines = VMSTAT_DEFAULT_LINES; /* Current number of tty rows. */ + +static int aflag; +static int nflag; +static int Pflag; +static int hflag; + +static kvm_t *kd; + +#define FORKSTAT 0x01 +#define INTRSTAT 0x02 +#define MEMSTAT 0x04 +#define SUMSTAT 0x08 +#define TIMESTAT 0x10 +#define VMSTAT 0x20 +#define ZMEMSTAT 0x40 + +static void cpustats(void); +static void pcpustats(int, u_long, int); +static void devstats(void); +static void doforkst(void); +static void dointr(void); +static void dosum(void); +static void dovmstat(unsigned int, int); +static void domemstat_malloc(void); +static void domemstat_zone(void); +static void kread(int, void *, size_t); +static void kreado(int, void *, size_t, size_t); +static char *kgetstr(const char *); +static void needhdr(int); +static void needresize(int); +static void doresize(void); +static void printhdr(int, u_long); +static void usage(void); + +static long pct(long, long); +static long getuptime(void); + +static char **getdrivedata(char **); + +int +main(int argc, char *argv[]) +{ + int c, todo; + unsigned int interval; + float f; + int reps; + char *memf, *nlistf; + char errbuf[_POSIX2_LINE_MAX]; + + memf = nlistf = NULL; + interval = reps = todo = 0; + maxshowdevs = 2; + hflag = isatty(1); + while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:Pp:stw:z")) != -1) { + switch (c) { + case 'a': + aflag++; + break; + case 'c': + reps = atoi(optarg); + break; + case 'P': + Pflag++; + break; + case 'f': + todo |= FORKSTAT; + break; + case 'h': + hflag = 1; + break; + case 'H': + hflag = 0; + break; + case 'i': + todo |= INTRSTAT; + break; + case 'M': + memf = optarg; + break; + case 'm': + todo |= MEMSTAT; + break; + case 'N': + nlistf = optarg; + break; + case 'n': + nflag = 1; + maxshowdevs = atoi(optarg); + if (maxshowdevs < 0) + errx(1, "number of devices %d is < 0", + maxshowdevs); + break; + case 'p': + if (devstat_buildmatch(optarg, &matches, &num_matches) != 0) + errx(1, "%s", devstat_errbuf); + break; + case 's': + todo |= SUMSTAT; + break; + case 't': +#ifdef notyet + todo |= TIMESTAT; +#else + errx(EX_USAGE, "sorry, -t is not (re)implemented yet"); +#endif + break; + case 'w': + /* Convert to milliseconds. */ + f = atof(optarg); + interval = f * 1000; + break; + case 'z': + todo |= ZMEMSTAT; + break; + case '?': + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (todo == 0) + todo = VMSTAT; + + if (memf != NULL) { + kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf); + if (kd == NULL) + errx(1, "kvm_openfiles: %s", errbuf); + } + + if (kd != NULL && (c = kvm_nlist(kd, namelist)) != 0) { + if (c > 0) { + warnx("undefined symbols:"); + for (c = 0; + c < (int)(sizeof(namelist)/sizeof(namelist[0])); + c++) + if (namelist[c].n_type == 0) + (void)fprintf(stderr, " %s", + namelist[c].n_name); + (void)fputc('\n', stderr); + } else + warnx("kvm_nlist: %s", kvm_geterr(kd)); + exit(1); + } + if (kd && Pflag) + errx(1, "Cannot use -P with crash dumps"); + + if (todo & VMSTAT) { + /* + * Make sure that the userland devstat version matches the + * kernel devstat version. If not, exit and print a + * message informing the user of his mistake. + */ + if (devstat_checkversion(NULL) < 0) + errx(1, "%s", devstat_errbuf); + + + argv = getdrivedata(argv); + } + +#define BACKWARD_COMPATIBILITY +#ifdef BACKWARD_COMPATIBILITY + if (*argv) { + f = atof(*argv); + interval = f * 1000; + if (*++argv) + reps = atoi(*argv); + } +#endif + + if (interval) { + if (!reps) + reps = -1; + } else if (reps) + interval = 1 * 1000; + + if (todo & FORKSTAT) + doforkst(); + if (todo & MEMSTAT) + domemstat_malloc(); + if (todo & ZMEMSTAT) + domemstat_zone(); + if (todo & SUMSTAT) + dosum(); +#ifdef notyet + if (todo & TIMESTAT) + dotimes(); +#endif + if (todo & INTRSTAT) + dointr(); + if (todo & VMSTAT) + dovmstat(interval, reps); + exit(0); +} + +static int +mysysctl(const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int error; + + error = sysctlbyname(name, oldp, oldlenp, newp, newlen); + if (error != 0 && errno != ENOMEM) + err(1, "sysctl(%s)", name); + return (error); +} + +static char ** +getdrivedata(char **argv) +{ + if ((num_devices = devstat_getnumdevs(NULL)) < 0) + errx(1, "%s", devstat_errbuf); + + cur.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); + last.dinfo = (struct devinfo *)calloc(1, sizeof(struct devinfo)); + + if (devstat_getdevs(NULL, &cur) == -1) + errx(1, "%s", devstat_errbuf); + + num_devices = cur.dinfo->numdevs; + generation = cur.dinfo->generation; + + specified_devices = (char **)malloc(sizeof(char *)); + for (num_devices_specified = 0; *argv; ++argv) { + if (isdigit(**argv)) + break; + num_devices_specified++; + specified_devices = (char **)realloc(specified_devices, + sizeof(char *) * + num_devices_specified); + specified_devices[num_devices_specified - 1] = *argv; + } + dev_select = NULL; + + if (nflag == 0 && maxshowdevs < num_devices_specified) + maxshowdevs = num_devices_specified; + + /* + * People are generally only interested in disk statistics when + * they're running vmstat. So, that's what we're going to give + * them if they don't specify anything by default. We'll also give + * them any other random devices in the system so that we get to + * maxshowdevs devices, if that many devices exist. If the user + * specifies devices on the command line, either through a pattern + * match or by naming them explicitly, we will give the user only + * those devices. + */ + if ((num_devices_specified == 0) && (num_matches == 0)) { + if (devstat_buildmatch(da, &matches, &num_matches) != 0) + errx(1, "%s", devstat_errbuf); + + select_mode = DS_SELECT_ADD; + } else + select_mode = DS_SELECT_ONLY; + + /* + * At this point, selectdevs will almost surely indicate that the + * device list has changed, so we don't look for return values of 0 + * or 1. If we get back -1, though, there is an error. + */ + if (devstat_selectdevs(&dev_select, &num_selected, &num_selections, + &select_generation, generation, cur.dinfo->devices, + num_devices, matches, num_matches, specified_devices, + num_devices_specified, select_mode, + maxshowdevs, 0) == -1) + errx(1, "%s", devstat_errbuf); + + return(argv); +} + +static long +getuptime(void) +{ + struct timespec sp; + + (void)clock_gettime(CLOCK_MONOTONIC, &sp); + + return(sp.tv_sec); +} + +static void +fill_pcpu(struct pcpu ***pcpup, int* maxcpup) +{ + struct pcpu **pcpu; + + int maxcpu, i; + + *pcpup = NULL; + + if (kd == NULL) + return; + + maxcpu = kvm_getmaxcpu(kd); + if (maxcpu < 0) + errx(1, "kvm_getmaxcpu: %s", kvm_geterr(kd)); + + pcpu = calloc(maxcpu, sizeof(struct pcpu *)); + if (pcpu == NULL) + err(1, "calloc"); + + for (i = 0; i < maxcpu; i++) { + pcpu[i] = kvm_getpcpu(kd, i); + if (pcpu[i] == (struct pcpu *)-1) + errx(1, "kvm_getpcpu: %s", kvm_geterr(kd)); + } + + *maxcpup = maxcpu; + *pcpup = pcpu; +} + +static void +free_pcpu(struct pcpu **pcpu, int maxcpu) +{ + int i; + + for (i = 0; i < maxcpu; i++) + free(pcpu[i]); + free(pcpu); +} + +static void +fill_vmmeter(struct vmmeter *vmmp) +{ + struct pcpu **pcpu; + int maxcpu, i; + + if (kd != NULL) { + kread(X_SUM, vmmp, sizeof(*vmmp)); + fill_pcpu(&pcpu, &maxcpu); + for (i = 0; i < maxcpu; i++) { + if (pcpu[i] == NULL) + continue; +#define ADD_FROM_PCPU(i, name) \ + vmmp->name += pcpu[i]->pc_cnt.name + ADD_FROM_PCPU(i, v_swtch); + ADD_FROM_PCPU(i, v_trap); + ADD_FROM_PCPU(i, v_syscall); + ADD_FROM_PCPU(i, v_intr); + ADD_FROM_PCPU(i, v_soft); + ADD_FROM_PCPU(i, v_vm_faults); + ADD_FROM_PCPU(i, v_io_faults); + ADD_FROM_PCPU(i, v_cow_faults); + ADD_FROM_PCPU(i, v_cow_optim); + ADD_FROM_PCPU(i, v_zfod); + ADD_FROM_PCPU(i, v_ozfod); + ADD_FROM_PCPU(i, v_swapin); + ADD_FROM_PCPU(i, v_swapout); + ADD_FROM_PCPU(i, v_swappgsin); + ADD_FROM_PCPU(i, v_swappgsout); + ADD_FROM_PCPU(i, v_vnodein); + ADD_FROM_PCPU(i, v_vnodeout); + ADD_FROM_PCPU(i, v_vnodepgsin); + ADD_FROM_PCPU(i, v_vnodepgsout); + ADD_FROM_PCPU(i, v_intrans); + ADD_FROM_PCPU(i, v_tfree); + ADD_FROM_PCPU(i, v_forks); + ADD_FROM_PCPU(i, v_vforks); + ADD_FROM_PCPU(i, v_rforks); + ADD_FROM_PCPU(i, v_kthreads); + ADD_FROM_PCPU(i, v_forkpages); + ADD_FROM_PCPU(i, v_vforkpages); + ADD_FROM_PCPU(i, v_rforkpages); + ADD_FROM_PCPU(i, v_kthreadpages); +#undef ADD_FROM_PCPU + } + free_pcpu(pcpu, maxcpu); + } else { + size_t size = sizeof(unsigned int); +#define GET_VM_STATS(cat, name) \ + mysysctl("vm.stats." #cat "." #name, &vmmp->name, &size, NULL, 0) + /* sys */ + GET_VM_STATS(sys, v_swtch); + GET_VM_STATS(sys, v_trap); + GET_VM_STATS(sys, v_syscall); + GET_VM_STATS(sys, v_intr); + GET_VM_STATS(sys, v_soft); + + /* vm */ + GET_VM_STATS(vm, v_vm_faults); + GET_VM_STATS(vm, v_io_faults); + GET_VM_STATS(vm, v_cow_faults); + GET_VM_STATS(vm, v_cow_optim); + GET_VM_STATS(vm, v_zfod); + GET_VM_STATS(vm, v_ozfod); + GET_VM_STATS(vm, v_swapin); + GET_VM_STATS(vm, v_swapout); + GET_VM_STATS(vm, v_swappgsin); + GET_VM_STATS(vm, v_swappgsout); + GET_VM_STATS(vm, v_vnodein); + GET_VM_STATS(vm, v_vnodeout); + GET_VM_STATS(vm, v_vnodepgsin); + GET_VM_STATS(vm, v_vnodepgsout); + GET_VM_STATS(vm, v_intrans); + GET_VM_STATS(vm, v_reactivated); + GET_VM_STATS(vm, v_pdwakeups); + GET_VM_STATS(vm, v_pdpages); + GET_VM_STATS(vm, v_tcached); + GET_VM_STATS(vm, v_dfree); + GET_VM_STATS(vm, v_pfree); + GET_VM_STATS(vm, v_tfree); + GET_VM_STATS(vm, v_page_size); + GET_VM_STATS(vm, v_page_count); + GET_VM_STATS(vm, v_free_reserved); + GET_VM_STATS(vm, v_free_target); + GET_VM_STATS(vm, v_free_min); + GET_VM_STATS(vm, v_free_count); + GET_VM_STATS(vm, v_wire_count); + GET_VM_STATS(vm, v_active_count); + GET_VM_STATS(vm, v_inactive_target); + GET_VM_STATS(vm, v_inactive_count); + GET_VM_STATS(vm, v_cache_count); + GET_VM_STATS(vm, v_cache_min); + GET_VM_STATS(vm, v_cache_max); + GET_VM_STATS(vm, v_pageout_free_min); + GET_VM_STATS(vm, v_interrupt_free_min); + /*GET_VM_STATS(vm, v_free_severe);*/ + GET_VM_STATS(vm, v_forks); + GET_VM_STATS(vm, v_vforks); + GET_VM_STATS(vm, v_rforks); + GET_VM_STATS(vm, v_kthreads); + GET_VM_STATS(vm, v_forkpages); + GET_VM_STATS(vm, v_vforkpages); + GET_VM_STATS(vm, v_rforkpages); + GET_VM_STATS(vm, v_kthreadpages); +#undef GET_VM_STATS + } +} + +static void +fill_vmtotal(struct vmtotal *vmtp) +{ + if (kd != NULL) { + /* XXX fill vmtp */ + errx(1, "not implemented"); + } else { + size_t size = sizeof(*vmtp); + mysysctl("vm.vmtotal", vmtp, &size, NULL, 0); + if (size != sizeof(*vmtp)) + errx(1, "vm.total size mismatch"); + } +} + +/* Determine how many cpu columns, and what index they are in kern.cp_times */ +static int +getcpuinfo(u_long *maskp, int *maxidp) +{ + int maxcpu; + int maxid; + int ncpus; + int i, j; + int empty; + size_t size; + long *times; + u_long mask; + + if (kd != NULL) + errx(1, "not implemented"); + mask = 0; + ncpus = 0; + size = sizeof(maxcpu); + mysysctl("kern.smp.maxcpus", &maxcpu, &size, NULL, 0); + if (size != sizeof(maxcpu)) + errx(1, "sysctl kern.smp.maxcpus"); + size = sizeof(long) * maxcpu * CPUSTATES; + times = malloc(size); + if (times == NULL) + err(1, "malloc %zd bytes", size); + mysysctl("kern.cp_times", times, &size, NULL, 0); + maxid = (size / CPUSTATES / sizeof(long)) - 1; + for (i = 0; i <= maxid; i++) { + empty = 1; + for (j = 0; empty && j < CPUSTATES; j++) { + if (times[i * CPUSTATES + j] != 0) + empty = 0; + } + if (!empty) { + mask |= (1ul << i); + ncpus++; + } + } + if (maskp) + *maskp = mask; + if (maxidp) + *maxidp = maxid; + return (ncpus); +} + + +static void +prthuman(u_int64_t val, int size) +{ + char buf[10]; + int flags; + + if (size < 5 || size > 9) + errx(1, "doofus"); + flags = HN_B | HN_NOSPACE | HN_DECIMAL; + humanize_number(buf, size, val, "", HN_AUTOSCALE, flags); + printf("%*s", size, buf); +} + +static int hz, hdrcnt; + +static long *cur_cp_times; +static long *last_cp_times; +static size_t size_cp_times; + +static void +dovmstat(unsigned int interval, int reps) +{ + struct vmtotal total; + time_t uptime, halfuptime; + struct devinfo *tmp_dinfo; + size_t size; + int ncpus, maxid; + u_long cpumask; + int rate_adj; + + uptime = getuptime(); + halfuptime = uptime / 2; + rate_adj = 1; + + /* + * If the user stops the program (control-Z) and then resumes it, + * print out the header again. + */ + (void)signal(SIGCONT, needhdr); + + /* + * If our standard output is a tty, then install a SIGWINCH handler + * and set wresized so that our first iteration through the main + * vmstat loop will peek at the terminal's current rows to find out + * how many lines can fit in a screenful of output. + */ + if (isatty(fileno(stdout)) != 0) { + wresized = 1; + (void)signal(SIGWINCH, needresize); + } else { + wresized = 0; + winlines = VMSTAT_DEFAULT_LINES; + } + + if (kd != NULL) { + if (namelist[X_STATHZ].n_type != 0 && + namelist[X_STATHZ].n_value != 0) + kread(X_STATHZ, &hz, sizeof(hz)); + if (!hz) + kread(X_HZ, &hz, sizeof(hz)); + } else { + struct clockinfo clockrate; + + size = sizeof(clockrate); + mysysctl("kern.clockrate", &clockrate, &size, NULL, 0); + if (size != sizeof(clockrate)) + errx(1, "clockrate size mismatch"); + hz = clockrate.hz; + } + + if (Pflag) { + ncpus = getcpuinfo(&cpumask, &maxid); + size_cp_times = sizeof(long) * (maxid + 1) * CPUSTATES; + cur_cp_times = calloc(1, size_cp_times); + last_cp_times = calloc(1, size_cp_times); + } + for (hdrcnt = 1;;) { + if (!--hdrcnt) + printhdr(ncpus, cpumask); + if (kd != NULL) { + if (kvm_getcptime(kd, cur.cp_time) < 0) + errx(1, "kvm_getcptime: %s", kvm_geterr(kd)); + } else { + size = sizeof(cur.cp_time); + mysysctl("kern.cp_time", &cur.cp_time, &size, NULL, 0); + if (size != sizeof(cur.cp_time)) + errx(1, "cp_time size mismatch"); + } + if (Pflag) { + size = size_cp_times; + mysysctl("kern.cp_times", cur_cp_times, &size, NULL, 0); + if (size != size_cp_times) + errx(1, "cp_times mismatch"); + } + + tmp_dinfo = last.dinfo; + last.dinfo = cur.dinfo; + cur.dinfo = tmp_dinfo; + last.snap_time = cur.snap_time; + + /* + * Here what we want to do is refresh our device stats. + * getdevs() returns 1 when the device list has changed. + * If the device list has changed, we want to go through + * the selection process again, in case a device that we + * were previously displaying has gone away. + */ + switch (devstat_getdevs(NULL, &cur)) { + case -1: + errx(1, "%s", devstat_errbuf); + break; + case 1: { + int retval; + + num_devices = cur.dinfo->numdevs; + generation = cur.dinfo->generation; + + retval = devstat_selectdevs(&dev_select, &num_selected, + &num_selections, &select_generation, + generation, cur.dinfo->devices, + num_devices, matches, num_matches, + specified_devices, + num_devices_specified, select_mode, + maxshowdevs, 0); + switch (retval) { + case -1: + errx(1, "%s", devstat_errbuf); + break; + case 1: + printhdr(ncpus, cpumask); + break; + default: + break; + } + } + default: + break; + } + + fill_vmmeter(&sum); + fill_vmtotal(&total); + (void)printf("%2d %1d %1d", + total.t_rq - 1, total.t_dw + total.t_pw, total.t_sw); +#define vmstat_pgtok(a) ((a) * (sum.v_page_size >> 10)) +#define rate(x) (((x) * rate_adj + halfuptime) / uptime) /* round */ + if (hflag) { + printf(" "); + prthuman(total.t_avm * (u_int64_t)sum.v_page_size, 7); + printf(" "); + prthuman(total.t_free * (u_int64_t)sum.v_page_size, 6); + printf(" "); + } else { + printf(" %7d ", vmstat_pgtok(total.t_avm)); + printf(" %6d ", vmstat_pgtok(total.t_free)); + } + (void)printf("%5lu ", + (unsigned long)rate(sum.v_vm_faults - osum.v_vm_faults)); + (void)printf("%3lu ", + (unsigned long)rate(sum.v_reactivated - osum.v_reactivated)); + (void)printf("%3lu ", + (unsigned long)rate(sum.v_swapin + sum.v_vnodein - + (osum.v_swapin + osum.v_vnodein))); + (void)printf("%3lu ", + (unsigned long)rate(sum.v_swapout + sum.v_vnodeout - + (osum.v_swapout + osum.v_vnodeout))); + (void)printf("%5lu ", + (unsigned long)rate(sum.v_tfree - osum.v_tfree)); + (void)printf("%3lu ", + (unsigned long)rate(sum.v_pdpages - osum.v_pdpages)); + devstats(); + (void)printf("%4lu %4lu %4lu", + (unsigned long)rate(sum.v_intr - osum.v_intr), + (unsigned long)rate(sum.v_syscall - osum.v_syscall), + (unsigned long)rate(sum.v_swtch - osum.v_swtch)); + if (Pflag) + pcpustats(ncpus, cpumask, maxid); + else + cpustats(); + (void)printf("\n"); + (void)fflush(stdout); + if (reps >= 0 && --reps <= 0) + break; + osum = sum; + uptime = interval; + rate_adj = 1000; + /* + * We round upward to avoid losing low-frequency events + * (i.e., >= 1 per interval but < 1 per millisecond). + */ + if (interval != 1) + halfuptime = (uptime + 1) / 2; + else + halfuptime = 0; + (void)usleep(interval * 1000); + } +} + +static void +printhdr(int ncpus, u_long cpumask) +{ + int i, num_shown; + + num_shown = (num_selected < maxshowdevs) ? num_selected : maxshowdevs; + (void)printf(" procs memory page%*s", 19, ""); + if (num_shown > 1) + (void)printf(" disks %*s", num_shown * 4 - 7, ""); + else if (num_shown == 1) + (void)printf("disk"); + (void)printf(" faults "); + if (Pflag) { + for (i = 0; i < ncpus; i++) { + if (cpumask & (1ul << i)) + printf("cpu%-2d ", i); + } + printf("\n"); + } else + printf("cpu\n"); + (void)printf(" r b w avm fre flt re pi po fr sr "); + for (i = 0; i < num_devices; i++) + if ((dev_select[i].selected) + && (dev_select[i].selected <= maxshowdevs)) + (void)printf("%c%c%d ", dev_select[i].device_name[0], + dev_select[i].device_name[1], + dev_select[i].unit_number); + (void)printf(" in sy cs"); + if (Pflag) { + for (i = 0; i < ncpus; i++) + printf(" us sy id"); + printf("\n"); + } else + printf(" us sy id\n"); + if (wresized != 0) + doresize(); + hdrcnt = winlines; +} + +/* + * Force a header to be prepended to the next output. + */ +static void +needhdr(int dummy __unused) +{ + + hdrcnt = 1; +} + +/* + * When the terminal is resized, force an update of the maximum number of rows + * printed between each header repetition. Then force a new header to be + * prepended to the next output. + */ +void +needresize(int signo) +{ + + wresized = 1; + hdrcnt = 1; +} + +/* + * Update the global `winlines' count of terminal rows. + */ +void +doresize(void) +{ + int status; + struct winsize w; + + for (;;) { + status = ioctl(fileno(stdout), TIOCGWINSZ, &w); + if (status == -1 && errno == EINTR) + continue; + else if (status == -1) + err(1, "ioctl"); + if (w.ws_row > 3) + winlines = w.ws_row - 3; + else + winlines = VMSTAT_DEFAULT_LINES; + break; + } + + /* + * Inhibit doresize() calls until we are rescheduled by SIGWINCH. + */ + wresized = 0; +} + +#ifdef notyet +static void +dotimes(void) +{ + unsigned int pgintime, rectime; + + kread(X_REC, &rectime, sizeof(rectime)); + kread(X_PGIN, &pgintime, sizeof(pgintime)); + kread(X_SUM, &sum, sizeof(sum)); + (void)printf("%u reclaims, %u total time (usec)\n", + sum.v_pgrec, rectime); + (void)printf("average: %u usec / reclaim\n", rectime / sum.v_pgrec); + (void)printf("\n"); + (void)printf("%u page ins, %u total time (msec)\n", + sum.v_pgin, pgintime / 10); + (void)printf("average: %8.1f msec / page in\n", + pgintime / (sum.v_pgin * 10.0)); +} +#endif + +static long +pct(long top, long bot) +{ + long ans; + + if (bot == 0) + return(0); + ans = (quad_t)top * 100 / bot; + return (ans); +} + +#define PCT(top, bot) pct((long)(top), (long)(bot)) + +static void +dosum(void) +{ + struct nchstats lnchstats; + long nchtotal; + + fill_vmmeter(&sum); + (void)printf("%9u cpu context switches\n", sum.v_swtch); + (void)printf("%9u device interrupts\n", sum.v_intr); + (void)printf("%9u software interrupts\n", sum.v_soft); + (void)printf("%9u traps\n", sum.v_trap); + (void)printf("%9u system calls\n", sum.v_syscall); + (void)printf("%9u kernel threads created\n", sum.v_kthreads); + (void)printf("%9u fork() calls\n", sum.v_forks); + (void)printf("%9u vfork() calls\n", sum.v_vforks); + (void)printf("%9u rfork() calls\n", sum.v_rforks); + (void)printf("%9u swap pager pageins\n", sum.v_swapin); + (void)printf("%9u swap pager pages paged in\n", sum.v_swappgsin); + (void)printf("%9u swap pager pageouts\n", sum.v_swapout); + (void)printf("%9u swap pager pages paged out\n", sum.v_swappgsout); + (void)printf("%9u vnode pager pageins\n", sum.v_vnodein); + (void)printf("%9u vnode pager pages paged in\n", sum.v_vnodepgsin); + (void)printf("%9u vnode pager pageouts\n", sum.v_vnodeout); + (void)printf("%9u vnode pager pages paged out\n", sum.v_vnodepgsout); + (void)printf("%9u page daemon wakeups\n", sum.v_pdwakeups); + (void)printf("%9u pages examined by the page daemon\n", sum.v_pdpages); + (void)printf("%9u pages reactivated\n", sum.v_reactivated); + (void)printf("%9u copy-on-write faults\n", sum.v_cow_faults); + (void)printf("%9u copy-on-write optimized faults\n", sum.v_cow_optim); + (void)printf("%9u zero fill pages zeroed\n", sum.v_zfod); + (void)printf("%9u zero fill pages prezeroed\n", sum.v_ozfod); + (void)printf("%9u intransit blocking page faults\n", sum.v_intrans); + (void)printf("%9u total VM faults taken\n", sum.v_vm_faults); + (void)printf("%9u page faults requiring I/O\n", sum.v_io_faults); + (void)printf("%9u pages affected by kernel thread creation\n", sum.v_kthreadpages); + (void)printf("%9u pages affected by fork()\n", sum.v_forkpages); + (void)printf("%9u pages affected by vfork()\n", sum.v_vforkpages); + (void)printf("%9u pages affected by rfork()\n", sum.v_rforkpages); + (void)printf("%9u pages cached\n", sum.v_tcached); + (void)printf("%9u pages freed\n", sum.v_tfree); + (void)printf("%9u pages freed by daemon\n", sum.v_dfree); + (void)printf("%9u pages freed by exiting processes\n", sum.v_pfree); + (void)printf("%9u pages active\n", sum.v_active_count); + (void)printf("%9u pages inactive\n", sum.v_inactive_count); + (void)printf("%9u pages in VM cache\n", sum.v_cache_count); + (void)printf("%9u pages wired down\n", sum.v_wire_count); + (void)printf("%9u pages free\n", sum.v_free_count); + (void)printf("%9u bytes per page\n", sum.v_page_size); + if (kd != NULL) { + kread(X_NCHSTATS, &lnchstats, sizeof(lnchstats)); + } else { + size_t size = sizeof(lnchstats); + mysysctl("vfs.cache.nchstats", &lnchstats, &size, NULL, 0); + if (size != sizeof(lnchstats)) + errx(1, "vfs.cache.nchstats size mismatch"); + } + nchtotal = lnchstats.ncs_goodhits + lnchstats.ncs_neghits + + lnchstats.ncs_badhits + lnchstats.ncs_falsehits + + lnchstats.ncs_miss + lnchstats.ncs_long; + (void)printf("%9ld total name lookups\n", nchtotal); + (void)printf( + "%9s cache hits (%ld%% pos + %ld%% neg) system %ld%% per-directory\n", + "", PCT(lnchstats.ncs_goodhits, nchtotal), + PCT(lnchstats.ncs_neghits, nchtotal), + PCT(lnchstats.ncs_pass2, nchtotal)); + (void)printf("%9s deletions %ld%%, falsehits %ld%%, toolong %ld%%\n", "", + PCT(lnchstats.ncs_badhits, nchtotal), + PCT(lnchstats.ncs_falsehits, nchtotal), + PCT(lnchstats.ncs_long, nchtotal)); +} + +static void +doforkst(void) +{ + fill_vmmeter(&sum); + (void)printf("%u forks, %u pages, average %.2f\n", + sum.v_forks, sum.v_forkpages, + sum.v_forks == 0 ? 0.0 : + (double)sum.v_forkpages / sum.v_forks); + (void)printf("%u vforks, %u pages, average %.2f\n", + sum.v_vforks, sum.v_vforkpages, + sum.v_vforks == 0 ? 0.0 : + (double)sum.v_vforkpages / sum.v_vforks); + (void)printf("%u rforks, %u pages, average %.2f\n", + sum.v_rforks, sum.v_rforkpages, + sum.v_rforks == 0 ? 0.0 : + (double)sum.v_rforkpages / sum.v_rforks); +} + +static void +devstats(void) +{ + int dn, state; + long double transfers_per_second; + long double busy_seconds; + long tmp; + + for (state = 0; state < CPUSTATES; ++state) { + tmp = cur.cp_time[state]; + cur.cp_time[state] -= last.cp_time[state]; + last.cp_time[state] = tmp; + } + + busy_seconds = cur.snap_time - last.snap_time; + + for (dn = 0; dn < num_devices; dn++) { + int di; + + if ((dev_select[dn].selected == 0) + || (dev_select[dn].selected > maxshowdevs)) + continue; + + di = dev_select[dn].position; + + if (devstat_compute_statistics(&cur.dinfo->devices[di], + &last.dinfo->devices[di], busy_seconds, + DSM_TRANSFERS_PER_SECOND, &transfers_per_second, + DSM_NONE) != 0) + errx(1, "%s", devstat_errbuf); + + (void)printf("%3.0Lf ", transfers_per_second); + } +} + +static void +percent(double pct, int *over) +{ + char buf[10]; + int l; + + l = snprintf(buf, sizeof(buf), "%.0f", pct); + if (l == 1 && *over) { + printf("%s", buf); + (*over)--; + } else + printf("%2s", buf); + if (l > 2) + (*over)++; +} + +static void +cpustats(void) +{ + int state, over; + double lpct, total; + + total = 0; + for (state = 0; state < CPUSTATES; ++state) + total += cur.cp_time[state]; + if (total) + lpct = 100.0 / total; + else + lpct = 0.0; + over = 0; + printf(" "); + percent((cur.cp_time[CP_USER] + cur.cp_time[CP_NICE]) * lpct, &over); + printf(" "); + percent((cur.cp_time[CP_SYS] + cur.cp_time[CP_INTR]) * lpct, &over); + printf(" "); + percent(cur.cp_time[CP_IDLE] * lpct, &over); +} + +static void +pcpustats(int ncpus, u_long cpumask, int maxid) +{ + int state, i; + double lpct, total; + long tmp; + int over; + + /* devstats does this for cp_time */ + for (i = 0; i <= maxid; i++) { + if ((cpumask & (1ul << i)) == 0) + continue; + for (state = 0; state < CPUSTATES; ++state) { + tmp = cur_cp_times[i * CPUSTATES + state]; + cur_cp_times[i * CPUSTATES + state] -= last_cp_times[i * CPUSTATES + state]; + last_cp_times[i * CPUSTATES + state] = tmp; + } + } + + over = 0; + for (i = 0; i <= maxid; i++) { + if ((cpumask & (1ul << i)) == 0) + continue; + total = 0; + for (state = 0; state < CPUSTATES; ++state) + total += cur_cp_times[i * CPUSTATES + state]; + if (total) + lpct = 100.0 / total; + else + lpct = 0.0; + printf(" "); + percent((cur_cp_times[i * CPUSTATES + CP_USER] + + cur_cp_times[i * CPUSTATES + CP_NICE]) * lpct, &over); + printf(" "); + percent((cur_cp_times[i * CPUSTATES + CP_SYS] + + cur_cp_times[i * CPUSTATES + CP_INTR]) * lpct, &over); + printf(" "); + percent(cur_cp_times[i * CPUSTATES + CP_IDLE] * lpct, &over); + } +} + +static void +dointr(void) +{ + unsigned long *intrcnt, uptime; + uint64_t inttotal; + size_t clen, inamlen, intrcntlen, istrnamlen; + unsigned int i, nintr; + char *intrname, *tintrname; + + uptime = getuptime(); + if (kd != NULL) { + kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen)); + kread(X_SINTRNAMES, &inamlen, sizeof(inamlen)); + if ((intrcnt = malloc(intrcntlen)) == NULL || + (intrname = malloc(inamlen)) == NULL) + err(1, "malloc()"); + kread(X_INTRCNT, intrcnt, intrcntlen); + kread(X_INTRNAMES, intrname, inamlen); + } else { + for (intrcnt = NULL, intrcntlen = 1024; ; intrcntlen *= 2) { + if ((intrcnt = reallocf(intrcnt, intrcntlen)) == NULL) + err(1, "reallocf()"); + if (mysysctl("hw.intrcnt", + intrcnt, &intrcntlen, NULL, 0) == 0) + break; + } + for (intrname = NULL, inamlen = 1024; ; inamlen *= 2) { + if ((intrname = reallocf(intrname, inamlen)) == NULL) + err(1, "reallocf()"); + if (mysysctl("hw.intrnames", + intrname, &inamlen, NULL, 0) == 0) + break; + } + } + nintr = intrcntlen / sizeof(unsigned long); + tintrname = intrname; + istrnamlen = strlen("interrupt"); + for (i = 0; i < nintr; i++) { + clen = strlen(tintrname); + if (clen > istrnamlen) + istrnamlen = clen; + tintrname += clen + 1; + } + (void)printf("%-*s %20s %10s\n", (int)istrnamlen, "interrupt", "total", + "rate"); + inttotal = 0; + for (i = 0; i < nintr; i++) { + if (intrname[0] != '\0' && (*intrcnt != 0 || aflag)) + (void)printf("%-*s %20lu %10lu\n", (int)istrnamlen, + intrname, *intrcnt, *intrcnt / uptime); + intrname += strlen(intrname) + 1; + inttotal += *intrcnt++; + } + (void)printf("%-*s %20" PRIu64 " %10" PRIu64 "\n", (int)istrnamlen, + "Total", inttotal, inttotal / uptime); +} + +static void +domemstat_malloc(void) +{ + struct memory_type_list *mtlp; + struct memory_type *mtp; + int error, first, i; + + mtlp = memstat_mtl_alloc(); + if (mtlp == NULL) { + warn("memstat_mtl_alloc"); + return; + } + if (kd == NULL) { + if (memstat_sysctl_malloc(mtlp, 0) < 0) { + warnx("memstat_sysctl_malloc: %s", + memstat_strerror(memstat_mtl_geterror(mtlp))); + return; + } + } else { + if (memstat_kvm_malloc(mtlp, kd) < 0) { + error = memstat_mtl_geterror(mtlp); + if (error == MEMSTAT_ERROR_KVM) + warnx("memstat_kvm_malloc: %s", + kvm_geterr(kd)); + else + warnx("memstat_kvm_malloc: %s", + memstat_strerror(error)); + } + } + printf("%13s %5s %6s %7s %8s Size(s)\n", "Type", "InUse", "MemUse", + "HighUse", "Requests"); + for (mtp = memstat_mtl_first(mtlp); mtp != NULL; + mtp = memstat_mtl_next(mtp)) { + if (memstat_get_numallocs(mtp) == 0 && + memstat_get_count(mtp) == 0) + continue; + printf("%13s %5" PRIu64 " %5" PRIu64 "K %7s %8" PRIu64 " ", + memstat_get_name(mtp), memstat_get_count(mtp), + (memstat_get_bytes(mtp) + 1023) / 1024, "-", + memstat_get_numallocs(mtp)); + first = 1; + for (i = 0; i < 32; i++) { + if (memstat_get_sizemask(mtp) & (1 << i)) { + if (!first) + printf(","); + printf("%d", 1 << (i + 4)); + first = 0; + } + } + printf("\n"); + } + memstat_mtl_free(mtlp); +} + +static void +domemstat_zone(void) +{ + struct memory_type_list *mtlp; + struct memory_type *mtp; + char name[MEMTYPE_MAXNAME + 1]; + int error; + + mtlp = memstat_mtl_alloc(); + if (mtlp == NULL) { + warn("memstat_mtl_alloc"); + return; + } + if (kd == NULL) { + if (memstat_sysctl_uma(mtlp, 0) < 0) { + warnx("memstat_sysctl_uma: %s", + memstat_strerror(memstat_mtl_geterror(mtlp))); + return; + } + } else { + if (memstat_kvm_uma(mtlp, kd) < 0) { + error = memstat_mtl_geterror(mtlp); + if (error == MEMSTAT_ERROR_KVM) + warnx("memstat_kvm_uma: %s", + kvm_geterr(kd)); + else + warnx("memstat_kvm_uma: %s", + memstat_strerror(error)); + } + } + printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE", + "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP"); + for (mtp = memstat_mtl_first(mtlp); mtp != NULL; + mtp = memstat_mtl_next(mtp)) { + strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); + strcat(name, ":"); + printf("%-20s %6" PRIu64 ", %6" PRIu64 ",%8" PRIu64 ",%8" PRIu64 + ",%8" PRIu64 ",%4" PRIu64 ",%4" PRIu64 "\n", name, + memstat_get_size(mtp), memstat_get_countlimit(mtp), + memstat_get_count(mtp), memstat_get_free(mtp), + memstat_get_numallocs(mtp), memstat_get_failures(mtp), + memstat_get_sleeps(mtp)); + } + memstat_mtl_free(mtlp); + printf("\n"); +} + +/* + * kread reads something from the kernel, given its nlist index. + */ +static void +kreado(int nlx, void *addr, size_t size, size_t offset) +{ + const char *sym; + + if (namelist[nlx].n_type == 0 || namelist[nlx].n_value == 0) { + sym = namelist[nlx].n_name; + if (*sym == '_') + ++sym; + errx(1, "symbol %s not defined", sym); + } + if ((size_t)kvm_read(kd, namelist[nlx].n_value + offset, addr, + size) != size) { + sym = namelist[nlx].n_name; + if (*sym == '_') + ++sym; + errx(1, "%s: %s", sym, kvm_geterr(kd)); + } +} + +static void +kread(int nlx, void *addr, size_t size) +{ + kreado(nlx, addr, size, 0); +} + +static char * +kgetstr(const char *strp) +{ + int n = 0, size = 1; + char *ret = NULL; + + do { + if (size == n + 1) { + ret = realloc(ret, size); + if (ret == NULL) + err(1, "%s: realloc", __func__); + size *= 2; + } + if (kvm_read(kd, (u_long)strp + n, &ret[n], 1) != 1) + errx(1, "%s: %s", __func__, kvm_geterr(kd)); + } while (ret[n++] != '\0'); + return (ret); +} + +static void +usage(void) +{ + (void)fprintf(stderr, "%s%s", + "usage: vmstat [-afHhimPsz] [-c count] [-M core [-N system]] [-w wait]\n", + " [-n devs] [-p type,if,pass] [disks]\n"); + exit(1); +} diff --git a/bin/vmstat/vmstat_zmemstat.c b/bin/vmstat/vmstat_zmemstat.c new file mode 100644 index 0000000..b0ec680 --- /dev/null +++ b/bin/vmstat/vmstat_zmemstat.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include + +#include +#include "memstat.h" + +void +domemstat_zone(void) +{ + struct memory_type_list *mtlp; + struct memory_type *mtp; + char name[MEMTYPE_MAXNAME + 1]; + int error; + + mtlp = memstat_mtl_alloc(); + if (mtlp == NULL) { + warn("memstat_mtl_alloc"); + return; + } + if (memstat_sysctl_uma(mtlp, 0) < 0) { + warnx("memstat_sysctl_uma: %s", + memstat_strerror(memstat_mtl_geterror(mtlp))); + return; + } + + printf("%-20s %6s %6s %8s %8s %8s %4s %4s\n\n", "ITEM", "SIZE", + "LIMIT", "USED", "FREE", "REQ", "FAIL", "SLEEP"); + for (mtp = memstat_mtl_first(mtlp); mtp != NULL; + mtp = memstat_mtl_next(mtp)) { + strlcpy(name, memstat_get_name(mtp), MEMTYPE_MAXNAME); + strcat(name, ":"); + printf("%-20s %6llu, %6llu,%8llu,%8llu,%8llu,%4llu,%4llu\n", + name, + (unsigned long long) memstat_get_size(mtp), + (unsigned long long) memstat_get_countlimit(mtp), + (unsigned long long) memstat_get_count(mtp), + (unsigned long long) memstat_get_free(mtp), + (unsigned long long) memstat_get_numallocs(mtp), + (unsigned long long) memstat_get_failures(mtp), + (unsigned long long) memstat_get_sleeps(mtp)); + } + memstat_mtl_free(mtlp); + printf("\n"); +} From be0916c4e9e34d0ecf63b0313d33cae53d7aedfb Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 06:47:38 -0700 Subject: [PATCH 090/148] Max number of CPUs. --- lib/libuinet/uinet_subr_smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libuinet/uinet_subr_smp.c b/lib/libuinet/uinet_subr_smp.c index f43252d..97be01d 100644 --- a/lib/libuinet/uinet_subr_smp.c +++ b/lib/libuinet/uinet_subr_smp.c @@ -53,6 +53,8 @@ u_int mp_maxid; SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, "Kernel SMP"); SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, "Max CPU ID."); +SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus, + 0, "Max number of CPUs that the system was compiled for."); /* XXX temporary until final pcpu approach is determined */ struct mtx uinet_pcpu_locks[MAXCPU]; From 3f192051afbb8dbf1b148222cb6341dc04532224 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:18:46 -0700 Subject: [PATCH 091/148] Add kdb definitions for use when witness and other debugging options are enabled. --- lib/libuinet/uinet_subr_kdb.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 lib/libuinet/uinet_subr_kdb.c diff --git a/lib/libuinet/uinet_subr_kdb.c b/lib/libuinet/uinet_subr_kdb.c new file mode 100644 index 0000000..83a3a53 --- /dev/null +++ b/lib/libuinet/uinet_subr_kdb.c @@ -0,0 +1,22 @@ +#include +#include +#include +#include +#include +#include + +int kdb_active = 0; + +void +kdb_backtrace(void) +{ + + printf("%s: called\n", __func__); +} + +void +kdb_backtrace_thread(struct thread *td) +{ + + printf("%s: called; thr=%p\n", __func__, td); +} From fdaf754e4ecaff6c6bda65e8880a33aa9bd45297 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:19:17 -0700 Subject: [PATCH 092/148] WITNESS assumes GIANT is held and will unlock/relock it during startup. So, just wrap all of the early SYSINIT linker set stuff in GIANT. --- lib/libuinet/uinet_init_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/libuinet/uinet_init_main.c b/lib/libuinet/uinet_init_main.c index 7351376..ebea05d 100644 --- a/lib/libuinet/uinet_init_main.c +++ b/lib/libuinet/uinet_init_main.c @@ -296,8 +296,10 @@ mi_startup(void) } #endif + mtx_lock(&Giant); /* Call function */ (*((*sipp)->func))((*sipp)->udata); + mtx_unlock(&Giant); #if defined(VERBOSE_SYSINIT) if (verbose) From 4530e967712fd1849ec90468ec74659d15fa0bdb Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:20:39 -0700 Subject: [PATCH 093/148] Expose a stack unwind function function that uses libunwind. It'll be a no-op unless UINET_STACK_UNWIND is defined. --- lib/libuinet/uinet_host_interface.c | 29 +++++++++++++++++++++++++++++ lib/libuinet/uinet_host_interface.h | 1 + 2 files changed, 30 insertions(+) diff --git a/lib/libuinet/uinet_host_interface.c b/lib/libuinet/uinet_host_interface.c index fc208f1..ee2c01f 100644 --- a/lib/libuinet/uinet_host_interface.c +++ b/lib/libuinet/uinet_host_interface.c @@ -94,6 +94,11 @@ typedef cpu_set_t cpuset_t; static struct itimerval prof_itimer; #endif /* UINET_PROFILE */ +#if defined(UINET_STACK_UNWIND) +#define UNW_LOCAL_ONLY +#include +#endif /* UINET_STACK_UNWIND */ + static pthread_key_t thread_specific_data_key; static unsigned int uhi_num_cpus; @@ -1082,3 +1087,27 @@ uhi_msg_rsp_wait(struct uhi_msg *msg, void *payload) { return (uhi_msg_sock_read(msg->fds[0], payload, msg->rsp_size)); } + +int +uhi_get_stacktrace(uintptr_t *pcs, int npcs) +{ +#if defined(UINET_STACK_UNWIND) + unw_cursor_t cursor; + unw_context_t uc; + unw_word_t ip, sp; + int i = 0; + + unw_getcontext(&uc); + unw_init_local(&cursor, &uc); + while (unw_step(&cursor) > 0 && i < npcs) { + unw_get_reg(&cursor, UNW_REG_IP, &ip); + unw_get_reg(&cursor, UNW_REG_SP, &sp); + pcs[i] = (uintptr_t) ip; +// printf ("ip = %lx, sp = %lx\n", (long) ip, (long) sp); + i++; + } + return (i); +#else + return (0); +#endif +} diff --git a/lib/libuinet/uinet_host_interface.h b/lib/libuinet/uinet_host_interface.h index c117f17..de97395 100644 --- a/lib/libuinet/uinet_host_interface.h +++ b/lib/libuinet/uinet_host_interface.h @@ -180,5 +180,6 @@ int uhi_msg_wait(struct uhi_msg *msg, void *payload); int uhi_msg_rsp_send(struct uhi_msg *msg, void *payload); int uhi_msg_rsp_wait(struct uhi_msg *msg, void *payload); +int uhi_get_stacktrace(uintptr_t *pcs, int npcs); #endif /* _UINET_HOST_INTERFACE_H_ */ From 73b3b2a883a9541dacaa4d61485d9e8646325f56 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:21:15 -0700 Subject: [PATCH 094/148] Add a subr_stack.c stub that uses the uinet host interface to expose the stack unwinding provided by the host environment. --- lib/libuinet/uinet_subr_stack.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 lib/libuinet/uinet_subr_stack.c diff --git a/lib/libuinet/uinet_subr_stack.c b/lib/libuinet/uinet_subr_stack.c new file mode 100644 index 0000000..a8aa538 --- /dev/null +++ b/lib/libuinet/uinet_subr_stack.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include +#include +#include + +void +stack_save_td(struct stack *st, struct thread *td) +{ + +} + +void +stack_save(struct stack *st) +{ + int i, n; + + uintptr_t pcs[STACK_MAX]; + + n = uhi_get_stacktrace(pcs, STACK_MAX); + for (i = 0; i < n; i++) { + st->pcs[i] = (vm_offset_t) pcs[i]; + } + st->depth = n; +} From e757314bcb535a4163f8e0661de0fb94ad0da9b9 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:21:51 -0700 Subject: [PATCH 095/148] Add panicstr - required by WITNESS and other things that call panic(). --- lib/libuinet/uinet_kern_shutdown.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_kern_shutdown.c b/lib/libuinet/uinet_kern_shutdown.c index c8110af..bbc6186 100644 --- a/lib/libuinet/uinet_kern_shutdown.c +++ b/lib/libuinet/uinet_kern_shutdown.c @@ -30,11 +30,11 @@ */ void panic(const char *, ...) __attribute__((__noreturn__)); - #include #include #include +const char *panicstr = NULL; void panic(const char *fmt, ...) From cb90d9b01ffdc243ff7346cd75f12c1c34653a03 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:22:13 -0700 Subject: [PATCH 096/148] Add rmlock debug function methods. --- lib/libuinet/uinet_kern_rmlock.c | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lib/libuinet/uinet_kern_rmlock.c b/lib/libuinet/uinet_kern_rmlock.c index 19abb63..2ac427c 100644 --- a/lib/libuinet/uinet_kern_rmlock.c +++ b/lib/libuinet/uinet_kern_rmlock.c @@ -111,3 +111,40 @@ _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) _rw_runlock((struct rwlock *)rm, __FILE__, __LINE__); } + + +#if LOCK_DEBUG > 0 +void +_rm_wlock_debug(struct rmlock *rm, const char *file, int line) +{ + + _rw_wlock((struct rwlock *) rm, file, line); +} + +void +_rm_wunlock_debug(struct rmlock *rm, const char *file, int line) +{ + + _rw_wunlock((struct rwlock *) rm, file, line); +} + +int +_rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + int trylock, const char *file, int line) +{ + + if (trylock) + return _rw_try_rlock((struct rwlock *)rm, file, line); + + _rw_rlock((struct rwlock *)rm, file, line); + return (1); +} + +void +_rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, + const char *file, int line) +{ + + _rw_runlock((struct rwlock *)rm, file, line); +} +#endif From 87bdd5ab704e8b6460fc742d54df7f689573258b Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:22:46 -0700 Subject: [PATCH 097/148] sprinkle WITNESS lines appropriate throughout rwlock. --- lib/libuinet/uinet_kern_rwlock.c | 45 +++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/libuinet/uinet_kern_rwlock.c b/lib/libuinet/uinet_kern_rwlock.c index 15038c6..d6e4d2b 100644 --- a/lib/libuinet/uinet_kern_rwlock.c +++ b/lib/libuinet/uinet_kern_rwlock.c @@ -94,7 +94,7 @@ rw_init_flags(struct rwlock *rw, const char *name, int opts) void rw_destroy(struct rwlock *rw) { - + uhi_rwlock_destroy(&rw->rw_lock); } @@ -102,53 +102,80 @@ void _rw_wlock(struct rwlock *rw, const char *file, int line) { + WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, + line, NULL); uhi_rwlock_wlock(&rw->rw_lock); + WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); } int _rw_try_wlock(struct rwlock *rw, const char *file, int line) { - - return (uhi_rwlock_trywlock(&rw->rw_lock)); + int rval; + + rval = uhi_rwlock_trywlock(&rw->rw_lock); + if (rval) { + WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, + file, line); + } + return (rval); } void _rw_wunlock(struct rwlock *rw, const char *file, int line) { - + + WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); uhi_rwlock_wunlock(&rw->rw_lock); } void _rw_rlock(struct rwlock *rw, const char *file, int line) { - + WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); uhi_rwlock_rlock(&rw->rw_lock); + WITNESS_LOCK(&rw->lock_object, 0, file, line); } int _rw_try_rlock(struct rwlock *rw, const char *file, int line) { - - return (uhi_rwlock_tryrlock(&rw->rw_lock)); + int rval; + rval = uhi_rwlock_tryrlock(&rw->rw_lock); + if (rval) { + WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); + } + return (rval); } void _rw_runlock(struct rwlock *rw, const char *file, int line) { - + + WITNESS_UNLOCK(&rw->lock_object, 0, file, line); uhi_rwlock_runlock(&rw->rw_lock); } int _rw_try_upgrade(struct rwlock *rw, const char *file, int line) { - return (uhi_rwlock_tryupgrade(&rw->rw_lock)); + int rval; + + rval = uhi_rwlock_tryupgrade(&rw->rw_lock); + /* 0 means fail; non-zero means success */ + /* XXX uhi_rwlock_tryupgrade always returns 0? */ + if (rval) { + WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, + file, line); + } + + return (rval); } void _rw_downgrade(struct rwlock *rw, const char *file, int line) { + WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); uhi_rwlock_downgrade(&rw->rw_lock); } From 6237715c22c950d5133a94c9380e9db10fdc880b Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 10:23:04 -0700 Subject: [PATCH 098/148] * Add mutex WITNESS bits * Add stubs for spinlocks in case they're used - WITNESS requires that the class at least be totally setup. --- lib/libuinet/uinet_kern_mutex.c | 48 +++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/lib/libuinet/uinet_kern_mutex.c b/lib/libuinet/uinet_kern_mutex.c index 3210a59..7da5047 100644 --- a/lib/libuinet/uinet_kern_mutex.c +++ b/lib/libuinet/uinet_kern_mutex.c @@ -65,6 +65,21 @@ unlock_mtx(struct lock_object *lock) return (0); } +static void +lock_spin(struct lock_object *lock, int how) +{ + + printf("%s: called!\n", __func__); +} + +static int +unlock_spin(struct lock_object *lock) +{ + + printf("%s: called!\n", __func__); + return (0); +} + /* * Lock classes for sleep and spin mutexes. */ @@ -87,7 +102,21 @@ struct lock_class lock_class_mtx_sleep = { /* * XXX should never be used; provided here for linkage with subr_lock.c */ -struct lock_class lock_class_mtx_spin; +struct lock_class lock_class_mtx_spin = { + .lc_name = "spin mutex", + .lc_flags = LC_SPINLOCK | LC_RECURSABLE, + .lc_assert = assert_mtx, + .lc_lock = lock_spin, + .lc_unlock = unlock_spin, +#ifndef UINET +#ifdef DDB + .lc_ddb_show = db_show_mtx, +#endif +#ifdef KDTRACE_HOOKS + .lc_owner = owner_mtx, +#endif +#endif +}; void _thread_lock_flags(struct thread *td, int opts, const char *file, int line) @@ -135,33 +164,48 @@ void _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) { + WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, + file, line, NULL); uhi_mutex_lock(&m->mtx_lock); + WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } void _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) { + WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); uhi_mutex_unlock(&m->mtx_lock); } int _mtx_trylock(struct mtx *m, int opts, const char *file, int line) { + int rval; + + rval = uhi_mutex_trylock(&m->mtx_lock); + if (rval) { + WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, + file, line); + } - return (uhi_mutex_trylock(&m->mtx_lock)); + return (rval); } void _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) { + WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, + file, line, NULL); uhi_mutex_lock(&m->mtx_lock); + WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } void _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) { + WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); uhi_mutex_unlock(&m->mtx_lock); } From 3007cbc5c4c1dd6296244bb67c170159118be2d1 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 11:29:12 -0700 Subject: [PATCH 099/148] * Make witness compile! * Make panic() into WITNESS_PANIC() - and make it just print, rather than constantly panic * Some of the initial locks aren't really sx locks - the libuinet code implements them using mutexes. So it confuses things. * Don't use the interrupt disable/enable code here - instead, just pray that the per-thread data won't change in a nasty way. This seems to work under userspace but only for sleep locks - spinlocks use pcpu data and libuinet absolutely doesn't implement the same semantics for pcpu stuff. --- sys/kern/subr_witness.c | 94 +++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index caab932..dc5c855 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -304,12 +304,14 @@ witness_lock_type_equal(struct witness *w1, struct witness *w2) (w2->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))); } +#if 0 static __inline int witness_lock_order_key_empty(const struct witness_lock_order_key *key) { return (key->from == 0 && key->to == 0); } +#endif static __inline int witness_lock_order_key_equal(const struct witness_lock_order_key *a, @@ -378,6 +380,8 @@ static void witness_setflag(struct lock_object *lock, int flag, int set); SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, NULL, "Witness Locking"); +#define WITNESS_PANIC(...) do { printf(__VA_ARGS__); printf("\n"); } while (0) + /* * If set to 0, lock order checking is disabled. If set to -1, * witness is completely disabled. Otherwise witness performs full @@ -821,15 +825,15 @@ witness_init(struct lock_object *lock, const char *type) class = LOCK_CLASS(lock); if ((lock->lo_flags & LO_RECURSABLE) != 0 && (class->lc_flags & LC_RECURSABLE) == 0) - panic("%s: lock (%s) %s can not be recursable", __func__, + WITNESS_PANIC("%s: lock (%s) %s can not be recursable", __func__, class->lc_name, lock->lo_name); if ((lock->lo_flags & LO_SLEEPABLE) != 0 && (class->lc_flags & LC_SLEEPABLE) == 0) - panic("%s: lock (%s) %s can not be sleepable", __func__, + WITNESS_PANIC("%s: lock (%s) %s can not be sleepable", __func__, class->lc_name, lock->lo_name); if ((lock->lo_flags & LO_UPGRADABLE) != 0 && (class->lc_flags & LC_UPGRADABLE) == 0) - panic("%s: lock (%s) %s can not be upgradable", __func__, + WITNESS_PANIC("%s: lock (%s) %s can not be upgradable", __func__, class->lc_name, lock->lo_name); /* @@ -846,7 +850,7 @@ witness_init(struct lock_object *lock, const char *type) pending_locks[pending_cnt].wh_lock = lock; pending_locks[pending_cnt++].wh_type = type; if (pending_cnt > WITNESS_PENDLIST) - panic("%s: pending locks list is too small, bump it\n", + WITNESS_PANIC("%s: pending locks list is too small, bump it\n", __func__); } else lock->lo_witness = enroll(type, class); @@ -861,7 +865,7 @@ witness_destroy(struct lock_object *lock) class = LOCK_CLASS(lock); if (witness_cold) - panic("lock (%s) %s destroyed while witness_cold", + WITNESS_PANIC("lock (%s) %s destroyed while witness_cold", class->lc_name, lock->lo_name); /* XXX: need to verify that no one holds the lock */ @@ -1062,7 +1066,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file, * all spin locks. */ if (td->td_critnest != 0 && !kdb_active) - panic("blockable sleep lock (%s) %s @ %s:%d", + WITNESS_PANIC("blockable sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); @@ -1106,7 +1110,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file, fixup_filename(file), line); printf("while exclusively locked from %s:%d\n", fixup_filename(lock1->li_file), lock1->li_line); - panic("share->excl"); + WITNESS_PANIC("share->excl"); } if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && (flags & LOP_EXCLUSIVE) != 0) { @@ -1115,7 +1119,7 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file, fixup_filename(file), line); printf("while share locked from %s:%d\n", fixup_filename(lock1->li_file), lock1->li_line); - panic("excl->share"); + WITNESS_PANIC("excl->share"); } return; } @@ -1422,26 +1426,26 @@ witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) class = LOCK_CLASS(lock); if (witness_watch) { if ((lock->lo_flags & LO_UPGRADABLE) == 0) - panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", + WITNESS_PANIC("upgrade of non-upgradable lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((class->lc_flags & LC_SLEEPLOCK) == 0) - panic("upgrade of non-sleep lock (%s) %s @ %s:%d", + WITNESS_PANIC("upgrade of non-sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); } instance = find_instance(curthread->td_sleeplocks, lock); if (instance == NULL) - panic("upgrade of unlocked lock (%s) %s @ %s:%d", + WITNESS_PANIC("upgrade of unlocked lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if (witness_watch) { if ((instance->li_flags & LI_EXCLUSIVE) != 0) - panic("upgrade of exclusive lock (%s) %s @ %s:%d", + WITNESS_PANIC("upgrade of exclusive lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((instance->li_flags & LI_RECURSEMASK) != 0) - panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", + WITNESS_PANIC("upgrade of recursed lock (%s) %s r=%d @ %s:%d", class->lc_name, lock->lo_name, instance->li_flags & LI_RECURSEMASK, fixup_filename(file), line); @@ -1462,26 +1466,26 @@ witness_downgrade(struct lock_object *lock, int flags, const char *file, class = LOCK_CLASS(lock); if (witness_watch) { if ((lock->lo_flags & LO_UPGRADABLE) == 0) - panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", + WITNESS_PANIC("downgrade of non-upgradable lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((class->lc_flags & LC_SLEEPLOCK) == 0) - panic("downgrade of non-sleep lock (%s) %s @ %s:%d", + WITNESS_PANIC("downgrade of non-sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); } instance = find_instance(curthread->td_sleeplocks, lock); if (instance == NULL) - panic("downgrade of unlocked lock (%s) %s @ %s:%d", + WITNESS_PANIC("downgrade of unlocked lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if (witness_watch) { if ((instance->li_flags & LI_EXCLUSIVE) == 0) - panic("downgrade of shared lock (%s) %s @ %s:%d", + WITNESS_PANIC("downgrade of shared lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((instance->li_flags & LI_RECURSEMASK) != 0) - panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", + WITNESS_PANIC("downgrade of recursed lock (%s) %s r=%d @ %s:%d", class->lc_name, lock->lo_name, instance->li_flags & LI_RECURSEMASK, fixup_filename(file), line); @@ -1496,7 +1500,7 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) struct lock_instance *instance; struct lock_class *class; struct thread *td; - register_t s; +// register_t s; int i, j; if (witness_cold || lock->lo_witness == NULL || panicstr != NULL) @@ -1524,9 +1528,9 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) * eventual register locks and remove them. */ if (witness_watch > 0) - panic("lock (%s) %s not locked @ %s:%d", class->lc_name, + WITNESS_PANIC("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, fixup_filename(file), line); - else +// else return; found: @@ -1537,7 +1541,7 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) lock->lo_name, fixup_filename(file), line); printf("while exclusively locked from %s:%d\n", fixup_filename(instance->li_file), instance->li_line); - panic("excl->ushare"); + WITNESS_PANIC("excl->ushare"); } if ((instance->li_flags & LI_EXCLUSIVE) == 0 && witness_watch > 0 && (flags & LOP_EXCLUSIVE) != 0) { @@ -1546,7 +1550,7 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) printf("while share locked from %s:%d\n", fixup_filename(instance->li_file), instance->li_line); - panic("share->uexcl"); + WITNESS_PANIC("share->uexcl"); } /* If we are recursed, unrecurse. */ if ((instance->li_flags & LI_RECURSEMASK) > 0) { @@ -1560,11 +1564,11 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) if ((instance->li_flags & LI_NORELEASE) != 0 && witness_watch > 0) { printf("forbidden unlock of (%s) %s @ %s:%d\n", class->lc_name, lock->lo_name, fixup_filename(file), line); - panic("lock marked norelease"); + WITNESS_PANIC("lock marked norelease"); } /* Otherwise, remove this item from the list. */ - s = intr_disable(); +// s = intr_disable(); CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, td->td_proc->p_pid, instance->li_lock->lo_name, (*lock_list)->ll_count - 1); @@ -1572,7 +1576,7 @@ witness_unlock(struct lock_object *lock, int flags, const char *file, int line) (*lock_list)->ll_children[j] = (*lock_list)->ll_children[j + 1]; (*lock_list)->ll_count--; - intr_restore(s); +// intr_restore(s); /* * In order to reduce contention on w_mtx, we want to keep always an @@ -1615,7 +1619,7 @@ witness_thread_exit(struct thread *td) witness_list_lock(&lle->ll_children[i], printf); } - panic("Thread %p cannot exit while holding sleeplocks\n", td); + WITNESS_PANIC("Thread %p cannot exit while holding sleeplocks\n", td); } witness_lock_list_free(lle); } @@ -1696,7 +1700,7 @@ witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) } else sched_unpin(); if (flags & WARN_PANIC && n) - panic("%s", __func__); + WITNESS_PANIC("%s", __func__); else witness_debugger(n); return (n); @@ -1742,7 +1746,7 @@ enroll(const char *description, struct lock_class *lock_class) } else if ((lock_class->lc_flags & LC_SLEEPLOCK)) typelist = &w_sleep; else - panic("lock class %s is not sleep or spin", + WITNESS_PANIC("lock class %s is not sleep or spin", lock_class->lc_name); mtx_lock_spin(&w_mtx); @@ -1772,8 +1776,14 @@ enroll(const char *description, struct lock_class *lock_class) found: w->w_refcount++; mtx_unlock_spin(&w_mtx); + /* + * XXX libuinet currently shortcuts a bunch of lock types and + * implements them using other lock types. The locking stuff + * needs to be made .. saner. + */ if (lock_class != w->w_class) - panic( +// WITNESS_PANIC( + printf( "lock (%s) %s does not match earlier (%s) lock", description, lock_class->lc_name, w->w_class->lc_name); @@ -1907,7 +1917,7 @@ itismychild(struct witness *parent, struct witness *child) if (!witness_lock_type_equal(parent, child)) { if (witness_cold == 0) mtx_unlock_spin(&w_mtx); - panic("%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " + WITNESS_PANIC("%s: parent \"%s\" (%s) and child \"%s\" (%s) are not " "the same lock type", __func__, parent->w_name, parent->w_class->lc_name, child->w_name, child->w_class->lc_name); @@ -2181,7 +2191,7 @@ witness_save(struct lock_object *lock, const char **filep, int *linep) } instance = find_instance(lock_list, lock); if (instance == NULL) - panic("%s: lock (%s) %s not locked", __func__, + WITNESS_PANIC("%s: lock (%s) %s not locked", __func__, class->lc_name, lock->lo_name); *filep = instance->li_file; *linep = instance->li_line; @@ -2214,7 +2224,7 @@ witness_restore(struct lock_object *lock, const char *file, int line) } instance = find_instance(lock_list, lock); if (instance == NULL) - panic("%s: lock (%s) %s not locked", __func__, + WITNESS_PANIC("%s: lock (%s) %s not locked", __func__, class->lc_name, lock->lo_name); lock->lo_witness->w_file = file; lock->lo_witness->w_line = line; @@ -2237,13 +2247,13 @@ witness_assert(struct lock_object *lock, int flags, const char *file, int line) else if ((class->lc_flags & LC_SPINLOCK) != 0) instance = find_instance(PCPU_GET(spinlocks), lock); else { - panic("Lock (%s) %s is not sleep or spin!", + WITNESS_PANIC("Lock (%s) %s is not sleep or spin!", class->lc_name, lock->lo_name); } switch (flags) { case LA_UNLOCKED: if (instance != NULL) - panic("Lock (%s) %s locked @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s locked @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); break; @@ -2257,34 +2267,34 @@ witness_assert(struct lock_object *lock, int flags, const char *file, int line) case LA_XLOCKED | LA_RECURSED: case LA_XLOCKED | LA_NOTRECURSED: if (instance == NULL) { - panic("Lock (%s) %s not locked @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s not locked @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); break; } if ((flags & LA_XLOCKED) != 0 && (instance->li_flags & LI_EXCLUSIVE) == 0) - panic("Lock (%s) %s not exclusively locked @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s not exclusively locked @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((flags & LA_SLOCKED) != 0 && (instance->li_flags & LI_EXCLUSIVE) != 0) - panic("Lock (%s) %s exclusively locked @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s exclusively locked @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((flags & LA_RECURSED) != 0 && (instance->li_flags & LI_RECURSEMASK) == 0) - panic("Lock (%s) %s not recursed @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s not recursed @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); if ((flags & LA_NOTRECURSED) != 0 && (instance->li_flags & LI_RECURSEMASK) != 0) - panic("Lock (%s) %s recursed @ %s:%d.", + WITNESS_PANIC("Lock (%s) %s recursed @ %s:%d.", class->lc_name, lock->lo_name, fixup_filename(file), line); break; default: - panic("Invalid lock assertion at %s:%d.", + WITNESS_PANIC("Invalid lock assertion at %s:%d.", fixup_filename(file), line); } @@ -2310,7 +2320,7 @@ witness_setflag(struct lock_object *lock, int flag, int set) } instance = find_instance(lock_list, lock); if (instance == NULL) - panic("%s: lock (%s) %s not locked", __func__, + WITNESS_PANIC("%s: lock (%s) %s not locked", __func__, class->lc_name, lock->lo_name); if (set) From c448781a3e63be7301892dd8e75188613f1a0b27 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 11:31:03 -0700 Subject: [PATCH 100/148] Add a blank witness option file. --- lib/libuinet/opt/opt_witness.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 lib/libuinet/opt/opt_witness.h diff --git a/lib/libuinet/opt/opt_witness.h b/lib/libuinet/opt/opt_witness.h new file mode 100644 index 0000000..e69de29 From b82e439e43e4d1b0d954a7f51f84b257afbe3626 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 11:31:12 -0700 Subject: [PATCH 101/148] Add placeholders for witness. --- lib/libuinet/opt/opt_global.h | 1 + lib/libuinet/opt/opt_stack.h | 1 + 2 files changed, 2 insertions(+) create mode 100644 lib/libuinet/opt/opt_stack.h diff --git a/lib/libuinet/opt/opt_global.h b/lib/libuinet/opt/opt_global.h index 0ab3399..710e837 100644 --- a/lib/libuinet/opt/opt_global.h +++ b/lib/libuinet/opt/opt_global.h @@ -2,3 +2,4 @@ #define MUTEX_NOINLINE 1 #define RWLOCK_NOINLINE 1 #define SX_NOINLINE 1 +//#define WITNESS 1 diff --git a/lib/libuinet/opt/opt_stack.h b/lib/libuinet/opt/opt_stack.h new file mode 100644 index 0000000..4dae41d --- /dev/null +++ b/lib/libuinet/opt/opt_stack.h @@ -0,0 +1 @@ +//#define STACK 1 From 8e29acc56ad6fbd803dbab6994c7f468969c29d8 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 11:54:22 -0700 Subject: [PATCH 102/148] Break out the witness bits into a separate source section so we can easily flip it on/off. --- lib/libuinet/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 37838cf..733d426 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -128,10 +128,12 @@ UINET_SRCS+= \ uinet_dev_random.c \ uinet_sched.c \ uinet_subr_bus.c \ + uinet_subr_kdb.c \ uinet_subr_pcpu.c \ uinet_subr_prf.c \ uinet_subr_rtc.c \ uinet_subr_smp.c \ + uinet_subr_stack.c \ uinet_subr_uio.c \ uinet_support.c \ uinet_tty.c \ @@ -207,6 +209,9 @@ KERN_SRCS+= \ uipc_sockbuf.c \ uipc_socket.c +KERN_WITNESS_SRCS= \ + subr_witness.c \ + subr_stack.c \ KERN_MHEADERS+= \ bus_if.m \ @@ -298,6 +303,9 @@ MSRCS= $(patsubst %.m,%.c,${KERN_MSRCS}) SRCS= ${UINET_SRCS} ${CRYPTO_SRCS} ${KERN_SRCS} ${LIBKERN_SRCS} ${MACHINE_SRCS} SRCS+= ${MSRCS} ${NET_SRCS} ${NETINET_SRCS} ${NETINET6_SRCS} ${VM_SRCS} +# If witness is enabled. +# SRCS+= ${KERN_WITNESS_SRCS} + HOST_SRCS = ${UINET_HOST_SRCS} OBJS+= $(patsubst %.c,%.o,${SRCS}) From 81f5af5a70483ad6387000aa001cacc65e1a30a2 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 12:23:09 -0700 Subject: [PATCH 103/148] Allow UINET_LOCK_DEBUG to enable lock debugging. This is used to enable actually compiling in lock debug strings. --- sys/sys/lock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 1a3f1ea..0ccd8b1 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -121,7 +121,7 @@ struct lock_class { * calling conventions for this debugging code in modules so that modules can * work with both debug and non-debug kernels. */ -#if defined(KLD_MODULE) || defined(WITNESS) || defined(INVARIANTS) || defined(INVARIANT_SUPPORT) || defined(KTR) || defined(LOCK_PROFILING) +#if defined(KLD_MODULE) || defined(WITNESS) || defined(INVARIANTS) || defined(INVARIANT_SUPPORT) || defined(KTR) || defined(LOCK_PROFILING) || defined(UINET_LOCK_DEBUG) #define LOCK_DEBUG 1 #else #define LOCK_DEBUG 0 From fa39be208b9d16a3e197b886d58b0456b3009157 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 12:41:31 -0700 Subject: [PATCH 104/148] Add in the ability to log locking operations. --- lib/libuinet/api_include/uinet_api.h | 3 + lib/libuinet/opt/opt_global.h | 1 + lib/libuinet/uinet_api.c | 24 +++++ lib/libuinet/uinet_api.symlist | 3 + lib/libuinet/uinet_host_interface.c | 134 ++++++++++++++++++++++++--- lib/libuinet/uinet_host_interface.h | 62 ++++++++++--- lib/libuinet/uinet_kern_mutex.c | 10 +- lib/libuinet/uinet_kern_rwlock.c | 16 ++-- 8 files changed, 215 insertions(+), 38 deletions(-) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index 005f2a7..8c2c3d9 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -129,6 +129,9 @@ int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname); const char * uinet_mbuf_data(const struct uinet_mbuf *); size_t uinet_mbuf_len(const struct uinet_mbuf *); int uinet_if_xmit(void *cookie, const char *buf, int len); +int uinet_lock_log_set_file(const char *file); +int uinet_lock_log_enable(void); +int uinet_lock_log_disable(void); #ifdef __cplusplus } diff --git a/lib/libuinet/opt/opt_global.h b/lib/libuinet/opt/opt_global.h index 710e837..6f73446 100644 --- a/lib/libuinet/opt/opt_global.h +++ b/lib/libuinet/opt/opt_global.h @@ -2,4 +2,5 @@ #define MUTEX_NOINLINE 1 #define RWLOCK_NOINLINE 1 #define SX_NOINLINE 1 +#define UINET_LOCK_DEBUG 1 //#define WITNESS 1 diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 36bf970..65dcfa5 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1395,3 +1395,27 @@ uinet_if_xmit(void *cookie, const char *buf, int len) ifp = cif->ifp; return ((ifp->if_transmit)(ifp, m)); } + +int +uinet_lock_log_set_file(const char *file) +{ + + uhi_lock_log_set_file(file); + return (0); +} + +int +uinet_lock_log_enable(void) +{ + + uhi_lock_log_enable(); + return (0); +} + +int +uinet_lock_log_disable(void) +{ + + uhi_lock_log_disable(); + return (0); +} diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index 5b8266f..bb0c56c 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -78,3 +78,6 @@ uinet_register_pfil_in uinet_mbuf_data uinet_mbuf_len uinet_if_xmit +uinet_lock_log_set_file +uinet_lock_log_enable +uinet_lock_log_disable diff --git a/lib/libuinet/uinet_host_interface.c b/lib/libuinet/uinet_host_interface.c index ee2c01f..adf76ab 100644 --- a/lib/libuinet/uinet_host_interface.c +++ b/lib/libuinet/uinet_host_interface.c @@ -102,6 +102,88 @@ static struct itimerval prof_itimer; static pthread_key_t thread_specific_data_key; static unsigned int uhi_num_cpus; +static FILE *lock_log_fp = NULL; +static pthread_mutex_t lock_log_mtx; +static int lock_log_enabled = 0; +static char *lock_log_filename = NULL; + +void +uhi_lock_log_init(void) +{ + + pthread_mutex_init(&lock_log_mtx, NULL); +} + +void +uhi_lock_log_set_file(const char *file) +{ + + pthread_mutex_lock(&lock_log_mtx); + if (lock_log_filename) + free(lock_log_filename); + lock_log_filename = strdup(file); + pthread_mutex_unlock(&lock_log_mtx); +} + +void +uhi_lock_log_enable(void) +{ + + pthread_mutex_lock(&lock_log_mtx); + if (lock_log_enabled == 1) { + pthread_mutex_unlock(&lock_log_mtx); + return; + } + + lock_log_fp = fopen(lock_log_filename, "w+"); + lock_log_enabled = 1; + pthread_mutex_unlock(&lock_log_mtx); +} + +void +uhi_lock_log_disable(void) +{ + FILE *fp = NULL; + + pthread_mutex_lock(&lock_log_mtx); + if (lock_log_mtx == 0) { + pthread_mutex_unlock(&lock_log_mtx); + return; + } + fp = lock_log_fp; + lock_log_fp = NULL; + lock_log_enabled = 0; + pthread_mutex_unlock(&lock_log_mtx); + + /* This may take some time, so do it out of the lock */ + fclose(fp); +} + +static void +uhi_lock_log(const char *type, const char *what, void *ptr, const char *file, int line) +{ + struct timespec ts; + + if (lock_log_enabled == 0) + return; + + clock_gettime(CLOCK_MONOTONIC_FAST, &ts); + + pthread_mutex_lock(&lock_log_mtx); + if (lock_log_fp != NULL) { + fprintf(lock_log_fp, + "%llu.%06llu: type %s what %s where %s:%d ptr %p\n", + (unsigned long long) (ts.tv_sec), + (unsigned long long) (ts.tv_nsec / 1000), + type, + what, + file, + line, + ptr); + } + pthread_mutex_unlock(&lock_log_mtx); +} + void uhi_init(void) { @@ -121,6 +203,8 @@ uhi_init(void) if (error != 0) printf("Warning: unable to create pthread key for thread specific data (%d)\n", error); + uhi_lock_log_init(); + #if defined(UINET_PROFILE) printf("getting prof timer\n"); getitimer(ITIMER_PROF, &prof_itimer); @@ -684,8 +768,9 @@ uhi_mutex_destroy(uhi_mutex_t *m) void -uhi_mutex_lock(uhi_mutex_t *m) +_uhi_mutex_lock(uhi_mutex_t *m, const char *file, int line) { + uhi_lock_log("mtx", "lock", m, file, line); pthread_mutex_lock((pthread_mutex_t *)(*m)); } @@ -694,15 +779,20 @@ uhi_mutex_lock(uhi_mutex_t *m) * Returns 0 if the mutex cannot be acquired, non-zero if it can. */ int -uhi_mutex_trylock(uhi_mutex_t *m) +_uhi_mutex_trylock(uhi_mutex_t *m, const char *file, int line) { - return (0 == pthread_mutex_trylock((pthread_mutex_t *)(*m))); + int ret; + ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*m))); + if (ret) + uhi_lock_log("mtx", "trylock", m, file, line); + return (ret); } void -uhi_mutex_unlock(uhi_mutex_t *m) +_uhi_mutex_unlock(uhi_mutex_t *m, const char *file, int line) { + uhi_lock_log("mtx", "unlock", m, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*m)); } @@ -765,65 +855,81 @@ uhi_rwlock_destroy(uhi_rwlock_t *rw) void -uhi_rwlock_wlock(uhi_rwlock_t *rw) +_uhi_rwlock_wlock(uhi_rwlock_t *rw, const char *file, int line) { + uhi_lock_log("rw", "wlock", rw, file, line); pthread_mutex_lock((pthread_mutex_t *)(*rw)); } int -uhi_rwlock_trywlock(uhi_rwlock_t *rw) +_uhi_rwlock_trywlock(uhi_rwlock_t *rw, const char *file, int line) { - return (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); + int ret; + + ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); + if (ret) + uhi_lock_log("rw", "trywlock", rw, file, line); + return (ret); } void -uhi_rwlock_wunlock(uhi_rwlock_t *rw) +_uhi_rwlock_wunlock(uhi_rwlock_t *rw, const char *file, int line) { + uhi_lock_log("rw", "wunlock", rw, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*rw)); } void -uhi_rwlock_rlock(uhi_rwlock_t *rw) +_uhi_rwlock_rlock(uhi_rwlock_t *rw, const char *file, int line) { + uhi_lock_log("rw", "rlock", rw, file, line); pthread_mutex_lock((pthread_mutex_t *)(*rw)); } int -uhi_rwlock_tryrlock(uhi_rwlock_t *rw) +_uhi_rwlock_tryrlock(uhi_rwlock_t *rw, const char *file, int line) { - return (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); + int ret; + + ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); + if (ret) + uhi_lock_log("rw", "tryrlock", rw, file, line); + return (ret); } void -uhi_rwlock_runlock(uhi_rwlock_t *rw) +_uhi_rwlock_runlock(uhi_rwlock_t *rw, const char *file, int line) { + uhi_lock_log("rw", "runlock", rw, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*rw)); } int -uhi_rwlock_tryupgrade(uhi_rwlock_t *rw) +_uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, const char *file, int line) { /* * Always succeeds as this implementation is always an exclusive * lock */ + uhi_lock_log("rw", "tryupgrade", rw, file, line); return (0); } void -uhi_rwlock_downgrade(uhi_rwlock_t *rw) +_uhi_rwlock_downgrade(uhi_rwlock_t *rw, const char *file, int line) { /* * Nothing to do here. In this implementation, there is only one * grade of this lock. */ + uhi_lock_log("rw", "downgrade", rw, file, line); } diff --git a/lib/libuinet/uinet_host_interface.h b/lib/libuinet/uinet_host_interface.h index de97395..a898a64 100644 --- a/lib/libuinet/uinet_host_interface.h +++ b/lib/libuinet/uinet_host_interface.h @@ -110,6 +110,22 @@ struct uhi_msg { unsigned int rsp_size; }; +/* + * Enable to compile in both the lock file/line into the source tree for + * lock debugging. + */ +#if 0 +#define UINET_LOCK_FILE NULL +#define UINET_LOCK_LINE 0 +#else +#define UINET_LOCK_FILE __FILE__ +#define UINET_LOCK_LINE __LINE__ +#endif + +void uhi_lock_log_init(void); +void uhi_lock_log_set_file(const char *file); +void uhi_lock_log_enable(void); +void uhi_lock_log_disable(void); void uhi_init(void) __attribute__((constructor)); void uhi_set_num_cpus(unsigned int n); @@ -149,20 +165,44 @@ void uhi_cond_broadcast(uhi_cond_t *c); int uhi_mutex_init(uhi_mutex_t *m, int opts); void uhi_mutex_destroy(uhi_mutex_t *m); -void uhi_mutex_lock(uhi_mutex_t *m); -int uhi_mutex_trylock(uhi_mutex_t *m); -void uhi_mutex_unlock(uhi_mutex_t *m); +void _uhi_mutex_lock(uhi_mutex_t *m, const char *file, int line); +int _uhi_mutex_trylock(uhi_mutex_t *m, const char *file, int line); +void _uhi_mutex_unlock(uhi_mutex_t *m, const char *file, int line); + +#define uhi_mutex_lock(m) _uhi_mutex_lock((m), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_mutex_trylock(m) _uhi_mutex_trylock((m), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_mutex_unlock(m) _uhi_mutex_unlock((m), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) int uhi_rwlock_init(uhi_rwlock_t *rw, int opts); void uhi_rwlock_destroy(uhi_rwlock_t *rw); -void uhi_rwlock_wlock(uhi_rwlock_t *rw); -int uhi_rwlock_trywlock(uhi_rwlock_t *rw); -void uhi_rwlock_wunlock(uhi_rwlock_t *rw); -void uhi_rwlock_rlock(uhi_rwlock_t *rw); -int uhi_rwlock_tryrlock(uhi_rwlock_t *rw); -void uhi_rwlock_runlock(uhi_rwlock_t *rw); -int uhi_rwlock_tryupgrade(uhi_rwlock_t *rw); -void uhi_rwlock_downgrade(uhi_rwlock_t *rw); +void _uhi_rwlock_wlock(uhi_rwlock_t *rw, const char *file, int line); +int _uhi_rwlock_trywlock(uhi_rwlock_t *rw, const char *file, int line); +void _uhi_rwlock_wunlock(uhi_rwlock_t *rw, const char *file, int line); +void _uhi_rwlock_rlock(uhi_rwlock_t *rw, const char *file, int line); +int _uhi_rwlock_tryrlock(uhi_rwlock_t *rw, const char *file, int line); +void _uhi_rwlock_runlock(uhi_rwlock_t *rw, const char *file, int line); +int _uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, const char *file, int line); +void _uhi_rwlock_downgrade(uhi_rwlock_t *rw, const char *file, int line); + +#define uhi_rwlock_wlock(rw) _uhi_rwlock_wlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_trywlock(rw) _uhi_rwlock_trywlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_wunlock(rw) _uhi_rwlock_wunlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_rlock(rw) _uhi_rwlock_rlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_tryrlock(rw) _uhi_rwlock_tryrlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_runlock(rw) _uhi_rwlock_runlock((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_tryupgrade(rw) _uhi_rwlock_tryupgrade((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) +#define uhi_rwlock_downgrade(rw) _uhi_rwlock_downgrade((rw), \ + UINET_LOCK_FILE, UINET_LOCK_LINE) int uhi_get_ifaddr(const char *ifname, uint8_t *ethaddr); diff --git a/lib/libuinet/uinet_kern_mutex.c b/lib/libuinet/uinet_kern_mutex.c index 7da5047..dbc1c2f 100644 --- a/lib/libuinet/uinet_kern_mutex.c +++ b/lib/libuinet/uinet_kern_mutex.c @@ -166,7 +166,7 @@ _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - uhi_mutex_lock(&m->mtx_lock); + _uhi_mutex_lock(&m->mtx_lock, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } @@ -175,7 +175,7 @@ _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) { WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); - uhi_mutex_unlock(&m->mtx_lock); + _uhi_mutex_unlock(&m->mtx_lock, file, line); } int @@ -183,7 +183,7 @@ _mtx_trylock(struct mtx *m, int opts, const char *file, int line) { int rval; - rval = uhi_mutex_trylock(&m->mtx_lock); + rval = _uhi_mutex_trylock(&m->mtx_lock, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); @@ -198,7 +198,7 @@ _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - uhi_mutex_lock(&m->mtx_lock); + _uhi_mutex_lock(&m->mtx_lock, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } @@ -207,5 +207,5 @@ _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) { WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); - uhi_mutex_unlock(&m->mtx_lock); + _uhi_mutex_unlock(&m->mtx_lock, file, line); } diff --git a/lib/libuinet/uinet_kern_rwlock.c b/lib/libuinet/uinet_kern_rwlock.c index d6e4d2b..7f8ae8d 100644 --- a/lib/libuinet/uinet_kern_rwlock.c +++ b/lib/libuinet/uinet_kern_rwlock.c @@ -104,7 +104,7 @@ _rw_wlock(struct rwlock *rw, const char *file, int line) WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - uhi_rwlock_wlock(&rw->rw_lock); + _uhi_rwlock_wlock(&rw->rw_lock, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); } @@ -113,7 +113,7 @@ _rw_try_wlock(struct rwlock *rw, const char *file, int line) { int rval; - rval = uhi_rwlock_trywlock(&rw->rw_lock); + rval = _uhi_rwlock_trywlock(&rw->rw_lock, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); @@ -126,14 +126,14 @@ _rw_wunlock(struct rwlock *rw, const char *file, int line) { WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); - uhi_rwlock_wunlock(&rw->rw_lock); + _uhi_rwlock_wunlock(&rw->rw_lock, file, line); } void _rw_rlock(struct rwlock *rw, const char *file, int line) { WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); - uhi_rwlock_rlock(&rw->rw_lock); + _uhi_rwlock_rlock(&rw->rw_lock, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); } @@ -141,7 +141,7 @@ int _rw_try_rlock(struct rwlock *rw, const char *file, int line) { int rval; - rval = uhi_rwlock_tryrlock(&rw->rw_lock); + rval = _uhi_rwlock_tryrlock(&rw->rw_lock, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); } @@ -153,7 +153,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) { WITNESS_UNLOCK(&rw->lock_object, 0, file, line); - uhi_rwlock_runlock(&rw->rw_lock); + _uhi_rwlock_runlock(&rw->rw_lock, file, line); } int @@ -161,7 +161,7 @@ _rw_try_upgrade(struct rwlock *rw, const char *file, int line) { int rval; - rval = uhi_rwlock_tryupgrade(&rw->rw_lock); + rval = _uhi_rwlock_tryupgrade(&rw->rw_lock, file, line); /* 0 means fail; non-zero means success */ /* XXX uhi_rwlock_tryupgrade always returns 0? */ if (rval) { @@ -176,6 +176,6 @@ void _rw_downgrade(struct rwlock *rw, const char *file, int line) { WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); - uhi_rwlock_downgrade(&rw->rw_lock); + _uhi_rwlock_downgrade(&rw->rw_lock, file, line); } From 7777166875bde3bd1580101e2cb58d6c18c5defd Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 12:44:40 -0700 Subject: [PATCH 105/148] Oops - don't enable this byd efault. --- lib/libuinet/opt/opt_global.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/opt/opt_global.h b/lib/libuinet/opt/opt_global.h index 6f73446..e997f8d 100644 --- a/lib/libuinet/opt/opt_global.h +++ b/lib/libuinet/opt/opt_global.h @@ -2,5 +2,5 @@ #define MUTEX_NOINLINE 1 #define RWLOCK_NOINLINE 1 #define SX_NOINLINE 1 -#define UINET_LOCK_DEBUG 1 +//#define UINET_LOCK_DEBUG 1 //#define WITNESS 1 From 743ebc9438ec6d85413d8a0903c6ca27488e3c9f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 22 Jul 2014 14:10:49 -0700 Subject: [PATCH 106/148] Add some extra fields for logging - the tid and original lock pointer. curthread->td_tid doesn't work, le sigh, so for now use pthread_self(). Having access to the original lock pointer makes it much, much easier to link it back to a deadlock in a gdb session. --- lib/libuinet/uinet_host_interface.c | 53 ++++++++++++++++------------- lib/libuinet/uinet_host_interface.h | 26 ++++++++------ lib/libuinet/uinet_kern_mutex.c | 10 +++--- lib/libuinet/uinet_kern_rwlock.c | 16 ++++----- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/lib/libuinet/uinet_host_interface.c b/lib/libuinet/uinet_host_interface.c index adf76ab..c053e28 100644 --- a/lib/libuinet/uinet_host_interface.c +++ b/lib/libuinet/uinet_host_interface.c @@ -160,21 +160,26 @@ uhi_lock_log_disable(void) } static void -uhi_lock_log(const char *type, const char *what, void *ptr, const char *file, int line) +uhi_lock_log(const char *type, const char *what, void *lp, uint32_t tid, void *ptr, const char *file, int line) { struct timespec ts; + pthread_t curthr; if (lock_log_enabled == 0) return; clock_gettime(CLOCK_MONOTONIC_FAST, &ts); + curthr = pthread_self(); + pthread_mutex_lock(&lock_log_mtx); if (lock_log_fp != NULL) { fprintf(lock_log_fp, - "%llu.%06llu: type %s what %s where %s:%d ptr %p\n", + "%llu.%06llu: lp %p tid %x type %s what %s where %s:%d ptr %p\n", (unsigned long long) (ts.tv_sec), (unsigned long long) (ts.tv_nsec / 1000), + lp, + (int) curthr, type, what, file, @@ -768,9 +773,9 @@ uhi_mutex_destroy(uhi_mutex_t *m) void -_uhi_mutex_lock(uhi_mutex_t *m, const char *file, int line) +_uhi_mutex_lock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("mtx", "lock", m, file, line); + uhi_lock_log("mtx", "lock", l, tid, m, file, line); pthread_mutex_lock((pthread_mutex_t *)(*m)); } @@ -779,20 +784,20 @@ _uhi_mutex_lock(uhi_mutex_t *m, const char *file, int line) * Returns 0 if the mutex cannot be acquired, non-zero if it can. */ int -_uhi_mutex_trylock(uhi_mutex_t *m, const char *file, int line) +_uhi_mutex_trylock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line) { int ret; ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*m))); if (ret) - uhi_lock_log("mtx", "trylock", m, file, line); + uhi_lock_log("mtx", "trylock", l, tid, m, file, line); return (ret); } void -_uhi_mutex_unlock(uhi_mutex_t *m, const char *file, int line) +_uhi_mutex_unlock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("mtx", "unlock", m, file, line); + uhi_lock_log("mtx", "unlock", l, tid, m, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*m)); } @@ -855,81 +860,81 @@ uhi_rwlock_destroy(uhi_rwlock_t *rw) void -_uhi_rwlock_wlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_wlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("rw", "wlock", rw, file, line); + uhi_lock_log("rw", "wlock", l, tid, rw, file, line); pthread_mutex_lock((pthread_mutex_t *)(*rw)); } int -_uhi_rwlock_trywlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_trywlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { int ret; ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); if (ret) - uhi_lock_log("rw", "trywlock", rw, file, line); + uhi_lock_log("rw", "trywlock", l, tid, rw, file, line); return (ret); } void -_uhi_rwlock_wunlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_wunlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("rw", "wunlock", rw, file, line); + uhi_lock_log("rw", "wunlock", l, tid, rw, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*rw)); } void -_uhi_rwlock_rlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_rlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("rw", "rlock", rw, file, line); + uhi_lock_log("rw", "rlock", l, tid, rw, file, line); pthread_mutex_lock((pthread_mutex_t *)(*rw)); } int -_uhi_rwlock_tryrlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_tryrlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { int ret; ret = (0 == pthread_mutex_trylock((pthread_mutex_t *)(*rw))); if (ret) - uhi_lock_log("rw", "tryrlock", rw, file, line); + uhi_lock_log("rw", "tryrlock", l, tid, rw, file, line); return (ret); } void -_uhi_rwlock_runlock(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_runlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { - uhi_lock_log("rw", "runlock", rw, file, line); + uhi_lock_log("rw", "runlock", l, tid, rw, file, line); pthread_mutex_unlock((pthread_mutex_t *)(*rw)); } int -_uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { /* * Always succeeds as this implementation is always an exclusive * lock */ - uhi_lock_log("rw", "tryupgrade", rw, file, line); + uhi_lock_log("rw", "tryupgrade", l, tid, rw, file, line); return (0); } void -_uhi_rwlock_downgrade(uhi_rwlock_t *rw, const char *file, int line) +_uhi_rwlock_downgrade(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line) { /* * Nothing to do here. In this implementation, there is only one * grade of this lock. */ - uhi_lock_log("rw", "downgrade", rw, file, line); + uhi_lock_log("rw", "downgrade", l, tid, rw, file, line); } diff --git a/lib/libuinet/uinet_host_interface.h b/lib/libuinet/uinet_host_interface.h index a898a64..b62bc8c 100644 --- a/lib/libuinet/uinet_host_interface.h +++ b/lib/libuinet/uinet_host_interface.h @@ -165,28 +165,31 @@ void uhi_cond_broadcast(uhi_cond_t *c); int uhi_mutex_init(uhi_mutex_t *m, int opts); void uhi_mutex_destroy(uhi_mutex_t *m); -void _uhi_mutex_lock(uhi_mutex_t *m, const char *file, int line); -int _uhi_mutex_trylock(uhi_mutex_t *m, const char *file, int line); -void _uhi_mutex_unlock(uhi_mutex_t *m, const char *file, int line); +void _uhi_mutex_lock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line); +int _uhi_mutex_trylock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line); +void _uhi_mutex_unlock(uhi_mutex_t *m, void *l, uint32_t tid, const char *file, int line); +#if 0 #define uhi_mutex_lock(m) _uhi_mutex_lock((m), \ UINET_LOCK_FILE, UINET_LOCK_LINE) #define uhi_mutex_trylock(m) _uhi_mutex_trylock((m), \ UINET_LOCK_FILE, UINET_LOCK_LINE) #define uhi_mutex_unlock(m) _uhi_mutex_unlock((m), \ UINET_LOCK_FILE, UINET_LOCK_LINE) +#endif int uhi_rwlock_init(uhi_rwlock_t *rw, int opts); void uhi_rwlock_destroy(uhi_rwlock_t *rw); -void _uhi_rwlock_wlock(uhi_rwlock_t *rw, const char *file, int line); -int _uhi_rwlock_trywlock(uhi_rwlock_t *rw, const char *file, int line); -void _uhi_rwlock_wunlock(uhi_rwlock_t *rw, const char *file, int line); -void _uhi_rwlock_rlock(uhi_rwlock_t *rw, const char *file, int line); -int _uhi_rwlock_tryrlock(uhi_rwlock_t *rw, const char *file, int line); -void _uhi_rwlock_runlock(uhi_rwlock_t *rw, const char *file, int line); -int _uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, const char *file, int line); -void _uhi_rwlock_downgrade(uhi_rwlock_t *rw, const char *file, int line); +void _uhi_rwlock_wlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +int _uhi_rwlock_trywlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +void _uhi_rwlock_wunlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +void _uhi_rwlock_rlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +int _uhi_rwlock_tryrlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +void _uhi_rwlock_runlock(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +int _uhi_rwlock_tryupgrade(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +void _uhi_rwlock_downgrade(uhi_rwlock_t *rw, void *l, uint32_t tid, const char *file, int line); +#if 0 #define uhi_rwlock_wlock(rw) _uhi_rwlock_wlock((rw), \ UINET_LOCK_FILE, UINET_LOCK_LINE) #define uhi_rwlock_trywlock(rw) _uhi_rwlock_trywlock((rw), \ @@ -203,6 +206,7 @@ void _uhi_rwlock_downgrade(uhi_rwlock_t *rw, const char *file, int line); UINET_LOCK_FILE, UINET_LOCK_LINE) #define uhi_rwlock_downgrade(rw) _uhi_rwlock_downgrade((rw), \ UINET_LOCK_FILE, UINET_LOCK_LINE) +#endif int uhi_get_ifaddr(const char *ifname, uint8_t *ethaddr); diff --git a/lib/libuinet/uinet_kern_mutex.c b/lib/libuinet/uinet_kern_mutex.c index dbc1c2f..2ce9fe9 100644 --- a/lib/libuinet/uinet_kern_mutex.c +++ b/lib/libuinet/uinet_kern_mutex.c @@ -166,7 +166,7 @@ _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - _uhi_mutex_lock(&m->mtx_lock, file, line); + _uhi_mutex_lock(&m->mtx_lock, m, curthread->td_tid, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } @@ -175,7 +175,7 @@ _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) { WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); - _uhi_mutex_unlock(&m->mtx_lock, file, line); + _uhi_mutex_unlock(&m->mtx_lock, m, curthread->td_tid, file, line); } int @@ -183,7 +183,7 @@ _mtx_trylock(struct mtx *m, int opts, const char *file, int line) { int rval; - rval = _uhi_mutex_trylock(&m->mtx_lock, file, line); + rval = _uhi_mutex_trylock(&m->mtx_lock, m, curthread->td_tid, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); @@ -198,7 +198,7 @@ _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - _uhi_mutex_lock(&m->mtx_lock, file, line); + _uhi_mutex_lock(&m->mtx_lock, m, curthread->td_tid, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } @@ -207,5 +207,5 @@ _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line) { WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); - _uhi_mutex_unlock(&m->mtx_lock, file, line); + _uhi_mutex_unlock(&m->mtx_lock, m, curthread->td_tid, file, line); } diff --git a/lib/libuinet/uinet_kern_rwlock.c b/lib/libuinet/uinet_kern_rwlock.c index 7f8ae8d..a89219b 100644 --- a/lib/libuinet/uinet_kern_rwlock.c +++ b/lib/libuinet/uinet_kern_rwlock.c @@ -104,7 +104,7 @@ _rw_wlock(struct rwlock *rw, const char *file, int line) WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - _uhi_rwlock_wlock(&rw->rw_lock, file, line); + _uhi_rwlock_wlock(&rw->rw_lock, rw, curthread->td_tid, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); } @@ -113,7 +113,7 @@ _rw_try_wlock(struct rwlock *rw, const char *file, int line) { int rval; - rval = _uhi_rwlock_trywlock(&rw->rw_lock, file, line); + rval = _uhi_rwlock_trywlock(&rw->rw_lock, rw, curthread->td_tid, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); @@ -126,14 +126,14 @@ _rw_wunlock(struct rwlock *rw, const char *file, int line) { WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); - _uhi_rwlock_wunlock(&rw->rw_lock, file, line); + _uhi_rwlock_wunlock(&rw->rw_lock, rw, curthread->td_tid, file, line); } void _rw_rlock(struct rwlock *rw, const char *file, int line) { WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); - _uhi_rwlock_rlock(&rw->rw_lock, file, line); + _uhi_rwlock_rlock(&rw->rw_lock, rw, curthread->td_tid, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); } @@ -141,7 +141,7 @@ int _rw_try_rlock(struct rwlock *rw, const char *file, int line) { int rval; - rval = _uhi_rwlock_tryrlock(&rw->rw_lock, file, line); + rval = _uhi_rwlock_tryrlock(&rw->rw_lock, rw, curthread->td_tid, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); } @@ -153,7 +153,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line) { WITNESS_UNLOCK(&rw->lock_object, 0, file, line); - _uhi_rwlock_runlock(&rw->rw_lock, file, line); + _uhi_rwlock_runlock(&rw->rw_lock, rw, curthread->td_tid, file, line); } int @@ -161,7 +161,7 @@ _rw_try_upgrade(struct rwlock *rw, const char *file, int line) { int rval; - rval = _uhi_rwlock_tryupgrade(&rw->rw_lock, file, line); + rval = _uhi_rwlock_tryupgrade(&rw->rw_lock, rw, curthread->td_tid, file, line); /* 0 means fail; non-zero means success */ /* XXX uhi_rwlock_tryupgrade always returns 0? */ if (rval) { @@ -176,6 +176,6 @@ void _rw_downgrade(struct rwlock *rw, const char *file, int line) { WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); - _uhi_rwlock_downgrade(&rw->rw_lock, file, line); + _uhi_rwlock_downgrade(&rw->rw_lock, rw, curthread->td_tid, file, line); } From c6fd6e4c3799d552806cf5c738538dd2e078cb8a Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 23 Jul 2014 13:08:01 -0700 Subject: [PATCH 107/148] Disable witness on uio for now - sx locks aren't real sx locks and aren't marked as sleepable, so witness bitches. --- lib/libuinet/uinet_subr_uio.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/libuinet/uinet_subr_uio.c b/lib/libuinet/uinet_subr_uio.c index 7ca72a0..be56445 100644 --- a/lib/libuinet/uinet_subr_uio.c +++ b/lib/libuinet/uinet_subr_uio.c @@ -69,8 +69,8 @@ uiofill(uint8_t val, int n, struct uio *uio) KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiofill proc")); - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Calling uiofill()"); +// WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, +// "Calling uiofill()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; @@ -130,8 +130,9 @@ uiomove(void *cp, int n, struct uio *uio) ("uiomove: mode")); KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, ("uiomove proc")); - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Calling uiomove()"); + +// WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, +// "Calling uiomove()"); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; From c20550bc988f819356f60ab3c921c7f7104d21b4 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 23 Jul 2014 13:08:32 -0700 Subject: [PATCH 108/148] Fix some lock order and unlock-a-not-locked-lock due to how the passive code abuses tcp_do_segment(). tcp_do_segment() drops the locks upon return. The libuinet passive code uses it to process a synthetic syn/ack frame to fake up a passive inp/socket for the other side of the new connection but then locks are dropped when they shouldn't be. This leads to all kinds of issues. This is a total hack to paper over it for the time being until a better solution is prepared. I'd like to see the relevant bits refactored out of tcp_do_segment() so it can be called directly by the passive code and by tcp_do_segment(). --- sys/netinet/tcp_input.c | 43 +++++++++++++++++++++++++++++++------- sys/netinet/tcp_syncache.c | 2 +- sys/netinet/tcp_var.h | 2 +- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 128b623..c8003b3 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1207,7 +1207,7 @@ tcp_input(struct mbuf *m, int off0) * the mbuf chain and unlocks the inpcb. */ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, - iptos, ti_locked); + iptos, ti_locked, 0); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; } @@ -1455,7 +1455,7 @@ tcp_input(struct mbuf *m, int off0) * state. tcp_do_segment() always consumes the mbuf chain, unlocks * the inpcb, and unlocks pcbinfo. */ - tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked); + tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked, 0); INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); return; @@ -1504,10 +1504,27 @@ tcp_input(struct mbuf *m, int off0) m_freem(m); } +/* + * no_unlock is a total hack designed to get around locking issues with + * how libuinet uses tcp_do_segment(). + * + * By default it'll unlock the held inp lock and if it's held, the + * tcbinfo lock. + * + * But the libuinet passive mode uses tcp_do_segment() with an assembled + * synack to setup the passive peer! Here, it can't drop the damned + * locks or it'll confuse the following code that assumes the locks + * are still held. + * + * So this option is just a hack for the specific code path that + * the passive receive socket creation code uses. Eventually the + * relevant bits of tcp_do_segment() should be refactored out and + * used as appropriate. + */ void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, - int ti_locked) + int ti_locked, int no_unlock) { int thflags, acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; @@ -3038,9 +3055,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, return; } } - if (ti_locked == TI_WLOCKED) + if (no_unlock == 0 && ti_locked == TI_WLOCKED) { INP_INFO_WUNLOCK(&V_tcbinfo); - ti_locked = TI_UNLOCKED; + ti_locked = TI_UNLOCKED; + } #ifdef TCPDEBUG if (so->so_options & SO_DEBUG) @@ -3051,20 +3069,29 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, /* * Return any desired output. */ - if (needoutput || (tp->t_flags & TF_ACKNOW)) + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + if (no_unlock) { + //printf("%s: no_unlock set; but calling tcp_output?\n", __func__); + } (void) tcp_output(tp); + } check_delack: KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + if (no_unlock == 0) + INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (tp->t_flags & TF_DELACK) { + if (no_unlock == 0) { + //printf("%s: no_unlock set; but calling tcp_timer_activate()?\n", __func__); + } tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } - INP_WUNLOCK(tp->t_inpcb); + if (no_unlock == 0) + INP_WUNLOCK(tp->t_inpcb); return; dropafterack: diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 0177643..a89c85e 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1259,7 +1259,7 @@ syncache_passive_client_socket(struct syncache *sc, struct socket *lso, struct m th->th_win = ntohs(th->th_win); th->th_urp = ntohs(th->th_urp); - tcp_do_segment(m1, th, so, tp, 0, 0, IPTOS_ECN_NOTECT, TI_WLOCKED); + tcp_do_segment(m1, th, so, tp, 0, 0, IPTOS_ECN_NOTECT, TI_WLOCKED, 1); /* return with inp locked */ return (so); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5ed84a8..910eb2c 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -702,7 +702,7 @@ void tcp_input(struct mbuf *, int); #define TI_WLOCKED 2 void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, - int ti_locked); + int ti_locked, int no_unlock); u_long tcp_maxmtu(struct in_conninfo *, int *); u_long tcp_maxmtu6(struct in_conninfo *, int *); void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, From 4d2c70a89e13587c5a838e8669743036c8420aaa Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Jul 2014 12:58:00 -0700 Subject: [PATCH 109/148] Re-enable zero-copy. --- lib/libuinet/uinet_if_netmap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/libuinet/uinet_if_netmap.c b/lib/libuinet/uinet_if_netmap.c index 1426a0a..c6dc0c5 100644 --- a/lib/libuinet/uinet_if_netmap.c +++ b/lib/libuinet/uinet_if_netmap.c @@ -74,9 +74,7 @@ * Setting IF_NETMAP_RXRING_ZCOPY_FRAC_NUM to zero will disable zero copy * receive. */ -//#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 1 -/* Disable zero-copy for now */ -#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 0 +#define IF_NETMAP_RXRING_ZCOPY_FRAC_NUM 1 #define IF_NETMAP_RXRING_ZCOPY_FRAC_DEN 2 #define IF_NETMAP_THREAD_STOP_CHECK_MS 200 From 49d9c3c72bc4472b801fc6312383596e05191a01 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Jul 2014 13:07:52 -0700 Subject: [PATCH 110/148] Don't compile witness by default. --- lib/libuinet/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 733d426..4858c50 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -209,9 +209,9 @@ KERN_SRCS+= \ uipc_sockbuf.c \ uipc_socket.c -KERN_WITNESS_SRCS= \ - subr_witness.c \ - subr_stack.c \ +#KERN_WITNESS_SRCS= \ +# subr_witness.c \ +# subr_stack.c \ KERN_MHEADERS+= \ bus_if.m \ From 586ea893209c0bf09c67e16cf5f04f0129acb2e7 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 28 Jul 2014 13:07:59 -0700 Subject: [PATCH 111/148] Experiment - use m_copypacket() instead. --- lib/libuinet/uinet_if_bridge.c | 4 ++-- lib/libuinet/uinet_if_span.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 9a4df95..1eb77b4 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -127,7 +127,7 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->ifp == ifp) continue; - mc2 = m_dup(m, M_DONTWAIT); + mc2 = m_copypacket(m, M_DONTWAIT); /* XXX count failure */ if (mc2 == NULL) continue; @@ -137,7 +137,7 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) mtx_unlock(&sc->sc_mtx); /* Duplicate; pass up to the stack */ - mc2 = m_dup(m, M_DONTWAIT); + mc2 = m_copypacket(m, M_DONTWAIT); /* XXX count failure */ if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; diff --git a/lib/libuinet/uinet_if_span.c b/lib/libuinet/uinet_if_span.c index c2cb60d..abc8976 100644 --- a/lib/libuinet/uinet_if_span.c +++ b/lib/libuinet/uinet_if_span.c @@ -97,7 +97,7 @@ if_span_input(struct ifnet *ifp, struct mbuf *m) // printf("%s: m=%p: called\n", __func__, m); /* Duplicate; pass up to the stack */ - mc2 = m_dup(m, M_DONTWAIT); + mc2 = m_copypacket(m, M_DONTWAIT); /* XXX count failure */ if (mc2 != NULL) { mc2->m_pkthdr.rcvif = bifp; From 51b974b4d0210677dce1b5845f16c25873e96228 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 29 Jul 2014 19:20:53 -0700 Subject: [PATCH 112/148] it turns out that doing lots of 6 byte memcpy()'s is dumb on modern intel hardware. The backend gets less annoyed at you if you do 6 one-byte copies rather than a REP MOV for 6 bytes. So, introduce ETHER_ADDR_COPY(). --- sys/net/ethernet.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index dde9b73..9bdc6bd 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -28,6 +28,16 @@ */ #define ETHER_ALIGN 2 /* driver adjust for IP hdr alignment */ +#define ETHER_ADDR_COPY(dst, src) \ + do { \ + ((char *) dst)[0] = ((char *) src)[0]; \ + ((char *) dst)[1] = ((char *) src)[1]; \ + ((char *) dst)[2] = ((char *) src)[2]; \ + ((char *) dst)[3] = ((char *) src)[3]; \ + ((char *) dst)[4] = ((char *) src)[4]; \ + ((char *) dst)[5] = ((char *) src)[5]; \ + } while (0) + /* * Compute the maximum frame size based on ethertype (i.e. possible * encapsulation) and whether or not an FCS is present. From b1f88b5aa035cb213c63b00aae8ab5b84f097b4b Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 29 Jul 2014 19:21:35 -0700 Subject: [PATCH 113/148] * Use ETHER_ADDR_COPY() appropriately * Move the bridge check _above_ the L2 mtag addition and the vlan stripping. What we actually want here is a bridge that implements frame copying _before_ vlan tags are stripped. Otherwise inline mode with 802.1q frames will end up rebroadcasting non-vlan tags to other interfaces. This work is for: * not doing stupid 6 byte memcpy()'s * don't populate the l2 tag information _before_ we do the bridge frame duplication, otherwise m_copypacket() will do a deep copy of the mbuf tags; * don't strip vlans before we pass to BRIDGE_INPUT() so bridge will forward whatever frames are provided. --- sys/net/if_ethersubr.c | 43 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 352c1b0..ec168bf 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -362,10 +362,10 @@ ether_output(struct ifnet *ifp, struct mbuf *m, d = mtod(m, uint8_t *); eh = (struct ether_header *)d; - (void)memcpy(d, l2i->inl2i_foreign_addr, ETHER_ADDR_LEN); + ETHER_ADDR_COPY(d, l2i->inl2i_foreign_addr); d += ETHER_ADDR_LEN; - (void)memcpy(d, l2i->inl2i_local_addr, ETHER_ADDR_LEN); + ETHER_ADDR_COPY(d, l2i->inl2i_local_addr); d += ETHER_ADDR_LEN; if (num_tag_bytes) { @@ -734,8 +734,40 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) } } + /* + * This code is much earlier than previous code - we want + * a VLAN bridge that actually just copies the mbufs before + * VLAN de-encapsulation. + * + * It also needs to be done before the promiscinet information + * is added via mbuf tags or those tags get copied into + * bridge/span interface mbufs during m_copypacket() - and + * that's a lot of wasted time. + */ + + /* + * Allow if_bridge(4) to claim this frame. + * The BRIDGE_INPUT() macro will update ifp if the bridge changed it + * and the frame should be delivered locally. + */ + if (ifp->if_bridge != NULL) { + m->m_flags &= ~M_PROMISC; + BRIDGE_INPUT(ifp, m); + if (m == NULL) { + CURVNET_RESTORE(); + return; + } + } #ifdef PROMISCUOUS_INET + /* + * Span mode debug - we shouldn't see PROMISCINET if if_bridge + * is set. + */ + if ((ifp->if_flags & IFF_PROMISCINET) && (ifp->if_bridge != NULL)) { + printf("wtf?\n"); + } + if (ifp->if_flags & IFF_PROMISCINET) { struct ifl2info *l2info_tag; struct in_l2info *l2info; @@ -760,8 +792,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) l2info = &l2info_tag->ifl2i_info; l2ts = &l2info->inl2i_tagstack; - memcpy(l2info->inl2i_local_addr, eh->ether_dhost, ETHER_ADDR_LEN); - memcpy(l2info->inl2i_foreign_addr, eh->ether_shost, ETHER_ADDR_LEN); + ETHER_ADDR_COPY(l2info->inl2i_local_addr, eh->ether_dhost); + ETHER_ADDR_COPY(l2info->inl2i_foreign_addr, eh->ether_shost); l2ts->inl2t_cnt = 0; /* @@ -902,6 +934,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) } } +#if 0 /* * Allow if_bridge(4) to claim this frame. * The BRIDGE_INPUT() macro will update ifp if the bridge changed it @@ -915,6 +948,8 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) return; } } +#ne +#endif #if defined(INET) || defined(INET6) /* From ecc48273514cc67d14b94c6baa6592dbc58e6b48 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 29 Jul 2014 19:24:47 -0700 Subject: [PATCH 114/148] Disable TCP checksumming. It's chewing far too much time when doing lots of data through passive intercept. I'd like netmap to expose the hardware checksum information so it can be thrown into here and inspected, but unfortunately that doesn't seem to be doable without a bunch of netmap engineering. --- sys/netinet/tcp_input.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index c8003b3..daa9717 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -575,7 +575,7 @@ tcp_input(struct mbuf *m, int off0) u_char *optp = NULL; int optlen = 0; #ifdef INET - int len; +// int len; #endif int tlen = 0, off; int drop_hdrlen; @@ -686,6 +686,16 @@ tcp_input(struct mbuf *m, int off0) th = (struct tcphdr *)((caddr_t)ip + off0); tlen = ip->ip_len; + /* + * TCP checksums are disabled for now. + * + * It's pretty stupid, but the netmap interface + * doesn't seem to provide hardware checksums up to + * the userland code and so this kernel code + * recalculates them. This chews a noticable + * amount of CPU time for large amounts of data. + */ +#if 0 if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; @@ -714,6 +724,8 @@ tcp_input(struct mbuf *m, int off0) TCPSTAT_INC(tcps_rcvbadsum); goto drop; } +#endif + th->th_sum = 0; /* Re-initialization for later version check */ ip->ip_v = IPVERSION; } From 675aca990f073429703fa22ccb06fd81d2795ee9 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 29 Jul 2014 21:41:25 -0700 Subject: [PATCH 115/148] Don't do local interface processing - it's not needed. Just punt it up to the bridge/span interface and be done with it. --- lib/libuinet/uinet_if_bridge.c | 8 ++++++++ lib/libuinet/uinet_if_span.c | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 1eb77b4..edcadd2 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -136,6 +136,13 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) } mtx_unlock(&sc->sc_mtx); + /* We don't do local processing; just punt to the bridge */ + + m->m_pkthdr.rcvif = bifp; + (*bifp->if_input)(bifp, m); + return (NULL); + +#if 0 /* Duplicate; pass up to the stack */ mc2 = m_copypacket(m, M_DONTWAIT); /* XXX count failure */ @@ -146,6 +153,7 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) /* Return the original packet for local processing. */ return (m); +#endif } /* diff --git a/lib/libuinet/uinet_if_span.c b/lib/libuinet/uinet_if_span.c index abc8976..98603ca 100644 --- a/lib/libuinet/uinet_if_span.c +++ b/lib/libuinet/uinet_if_span.c @@ -89,13 +89,19 @@ if_span_input(struct ifnet *ifp, struct mbuf *m) { struct if_span_softc *sc; struct ifnet *bifp; +#if 0 struct mbuf *mc2; +#endif sc = ifp->if_bridge; bifp = sc->sc_ifp; -// printf("%s: m=%p: called\n", __func__, m); + /* Note: We don't need to locally process the frame */ + m->m_pkthdr.rcvif = bifp; + (*bifp->if_input)(bifp, m); + return (NULL); +#if 0 /* Duplicate; pass up to the stack */ mc2 = m_copypacket(m, M_DONTWAIT); /* XXX count failure */ @@ -106,6 +112,7 @@ if_span_input(struct ifnet *ifp, struct mbuf *m) /* Return the original packet for local processing. */ return (m); +#endif } /* From 01474196ecefb0416faf1b09415ab1f02a7ed197 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 1 Aug 2014 12:09:43 -0700 Subject: [PATCH 116/148] Go back to m_dup() for now. It seems that somewhere in the IP stack the IP header is being modified and we end up with invalid header lengths going out on the wire. So yeah, we can't do zero-copy bridging until that's taken care of. Sigh. --- lib/libuinet/uinet_if_bridge.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index edcadd2..4720363 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -127,7 +127,8 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->ifp == ifp) continue; - mc2 = m_copypacket(m, M_DONTWAIT); + //mc2 = m_copypacket(m, M_DONTWAIT); + mc2 = m_dup(m, M_DONTWAIT); /* XXX count failure */ if (mc2 == NULL) continue; From b4a21eb6a2c6110d8e63a8e99d3724025b8c2a1d Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 31 Aug 2014 09:35:09 -0700 Subject: [PATCH 117/148] Disable sysctl debugging for now. --- lib/libuinet/uinet_host_sysctl_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index 83c39f1..8811e03 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -48,7 +48,7 @@ #include "uinet_host_sysctl_api.h" #include "uinet_host_sysctl_api_priv.h" -#define UINET_SYSCTL_DEBUG +//#define UINET_SYSCTL_DEBUG #ifdef UINET_SYSCTL_DEBUG #define UINET_SYSCTL_DPRINTF(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__) From a181f3c947579a2cda01608482451ab3718ee362 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 31 Aug 2014 11:00:46 -0700 Subject: [PATCH 118/148] Make the sysctl socket path configurable. --- lib/libuinet/api_include/uinet_host_sysctl_api_priv.h | 6 ++++++ lib/libuinet/uinet_host_sysctl_api.c | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h b/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h index a5ab2cc..d4cda29 100644 --- a/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h +++ b/lib/libuinet/api_include/uinet_host_sysctl_api_priv.h @@ -26,6 +26,12 @@ #ifndef __SYSCTL_API_PRIV_H__ #define __SYSCTL_API_PRIV_H__ +#define UINET_SYSCTL_MAXPATHLEN 1024 + +struct uinet_host_sysctl_cfg { + char sysctl_sock_path[UINET_SYSCTL_MAXPATHLEN]; +}; + extern void * uinet_host_sysctl_listener_thread(void *arg); #endif diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index 8811e03..56bf893 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -460,13 +460,19 @@ uinet_host_sysctl_listener_thread(void *arg) { int s, r; struct sockaddr_un sun; + struct uinet_host_sysctl_cfg *cfg = arg; + char *path; + path = "/tmp/sysctl.sock"; + if (cfg) { + path = cfg->sysctl_sock_path; + } uinet_initialize_thread(); - (void) unlink("/tmp/sysctl.sock"); + (void) unlink(path); bzero(&sun, sizeof(sun)); - strcpy(sun.sun_path, "/tmp/sysctl.sock"); + strcpy(sun.sun_path, path); sun.sun_len = 0; sun.sun_family = AF_UNIX; From ddc794e53f965a49d9daa982f7e9ccb8535698a9 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 31 Aug 2014 13:17:28 -0700 Subject: [PATCH 119/148] Fix compilation. --- lib/libuinet/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 8929e31..78c4c47 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -150,9 +150,7 @@ UINET_SRCS+= \ uinet_vm_glue.c \ uinet_vm_kern.c \ uinet_vm_meter.c \ - uinet_vm_object.c \ - uinet_if_bridge.c \ - uinet_if_span.c + uinet_vm_object.c ifneq (${HOST_OS},Darwin) UINET_SRCS+= uinet_if_netmap.c From 9f94f5d7323f7e0ebea4d462b7e5be132fc0491f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 31 Aug 2014 13:21:30 -0700 Subject: [PATCH 120/148] Allow the sysctl socket to be set by the SYSCTL_SOCK environment variable. --- bin/sysctl/u_sysctl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bin/sysctl/u_sysctl.c b/bin/sysctl/u_sysctl.c index 645c534..ba91857 100644 --- a/bin/sysctl/u_sysctl.c +++ b/bin/sysctl/u_sysctl.c @@ -245,11 +245,16 @@ u_sysctl_open(void) int s; struct sockaddr_un sun; int r; + char *spath; + + spath = getenv("SYSCTL_SOCK"); + if (spath == NULL) + spath = "/tmp/sysctl.sock"; /* Connect to the destination socket */ bzero(&sun, sizeof(sun)); - strcpy(sun.sun_path, "/tmp/sysctl.sock"); + strcpy(sun.sun_path, spath); sun.sun_len = 0; sun.sun_family = AF_UNIX; From 2d7a976b02215bd8a02e3efb203ac78047f1248b Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sun, 31 Aug 2014 14:46:17 -0700 Subject: [PATCH 121/148] Fix up the new lock types now that the rmlock/sx locks aren't just calling the rwlock methods. This should make the witness stuff less .. creepy. --- lib/libuinet/uinet_kern_rmlock.c | 53 +++++++++++++++++++++++++------- lib/libuinet/uinet_kern_sx.c | 32 ++++++++++++++++--- 2 files changed, 69 insertions(+), 16 deletions(-) diff --git a/lib/libuinet/uinet_kern_rmlock.c b/lib/libuinet/uinet_kern_rmlock.c index 1cf950a..887f84d 100644 --- a/lib/libuinet/uinet_kern_rmlock.c +++ b/lib/libuinet/uinet_kern_rmlock.c @@ -73,6 +73,14 @@ rm_init_flags(struct rmlock *rm, const char *name, int opts) liflags |= LO_WITNESS; if (opts & RM_RECURSE) liflags |= LO_RECURSABLE; + /* XXX validate - do we need more? */ +#if 0 + if (opts & RM_SLEEPABLE) { + liflags |= RM_SLEEPABLE; + sx_init_flags(&rm->rm_lock_sx, "rmlock_sx", SX_RECURSE); + } else + mtx_init(&rm->rm_lock_mtx, name, "rmlock_mtx", MTX_NOWITNESS); +#endif lock_init(&rm->lock_object, &lock_class_rm, name, NULL, liflags); if (0 != uhi_rwlock_init(&rm->rm_lock, opts & RM_RECURSE ? UHI_RW_WRECURSE : 0)) @@ -82,28 +90,32 @@ rm_init_flags(struct rmlock *rm, const char *name, int opts) void rm_destroy(struct rmlock *rm) { + uhi_rwlock_destroy(&rm->rm_lock); } void _rm_wlock(struct rmlock *rm) { - uhi_rwlock_wlock(&rm->rm_lock); + _uhi_rwlock_wlock(&rm->rm_lock, rm, curthread->td_tid, NULL, 0); } void _rm_wunlock(struct rmlock *rm) { - uhi_rwlock_wunlock(&rm->rm_lock); + + _uhi_rwlock_wunlock(&rm->rm_lock, rm, curthread->td_tid, NULL, 0); } int _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) { + if (trylock) - return uhi_rwlock_tryrlock(&rm->rm_lock); + return _uhi_rwlock_tryrlock(&rm->rm_lock, rm, + curthread->td_tid, NULL, 0); - uhi_rwlock_rlock(&rm->rm_lock); + _uhi_rwlock_rlock(&rm->rm_lock, rm, curthread->td_tid, NULL, 0); return (1); } @@ -111,23 +123,36 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) void _rm_runlock(struct rmlock *rm, struct rm_priotracker *tracker) { - uhi_rwlock_runlock(&rm->rm_lock); -} + _uhi_rwlock_runlock(&rm->rm_lock, rm, curthread->td_tid, NULL, 0); +} #if LOCK_DEBUG > 0 void _rm_wlock_debug(struct rmlock *rm, const char *file, int line) { - _rw_wlock((struct rwlock *) rm, file, line); + WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, + line, NULL); + _rm_wlock((struct rwlock *) rm); + if (rm->lock_object.lo_flags & RM_SLEEPABLE) + WITNESS_LOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, + file, line); + else + WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); } void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) { - _rw_wunlock((struct rwlock *) rm, file, line); + if (rm->lock_object.lo_flags & RM_SLEEPABLE) + WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, + file, line); + else + WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); + + _rm_wunlock((struct rwlock *) rm, file, line); } int @@ -135,10 +160,15 @@ _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, int trylock, const char *file, int line) { + if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE)) + WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER, + file, line, NULL); + WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL); + if (trylock) - return _rw_try_rlock((struct rwlock *)rm, file, line); + return _rm_try_rlock((struct rwlock *)rm, file, line); - _rw_rlock((struct rwlock *)rm, file, line); + _rm_rlock((struct rwlock *)rm, file, line); return (1); } @@ -147,6 +177,7 @@ _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, const char *file, int line) { - _rw_runlock((struct rwlock *)rm, file, line); + WITNESS_UNLOCK(&rm->lock_object, 0, file, line); + _rm_runlock((struct rwlock *)rm, file, line); } #endif diff --git a/lib/libuinet/uinet_kern_sx.c b/lib/libuinet/uinet_kern_sx.c index 5f1ba35..174371e 100644 --- a/lib/libuinet/uinet_kern_sx.c +++ b/lib/libuinet/uinet_kern_sx.c @@ -83,33 +83,55 @@ int _sx_xlock(struct sx *sx, int opts, const char *file, int line) { - uhi_rwlock_wlock(&sx->sx_lock); + + WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, + line, NULL); + _uhi_rwlock_wlock(&sx->sx_lock, sx, curthread->td_tid, file, line); + WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); return (0); } int _sx_slock(struct sx *sx, int opts, const char *file, int line) { - uhi_rwlock_rlock(&sx->sx_lock); + + WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL); + _uhi_rwlock_rlock(&sx->sx_lock, sx, curthread->td_tid, file, line); + /* XXX always succeeds, so */ + WITNESS_LOCK(&sx->lock_object, 0, file, line); return (0); } void _sx_xunlock(struct sx *sx, const char *file, int line) { - uhi_rwlock_wunlock(&sx->sx_lock); + + WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); + _uhi_rwlock_wunlock(&sx->sx_lock, sx, curthread->td_tid, file, line); } void _sx_sunlock(struct sx *sx, const char *file, int line) { - uhi_rwlock_runlock(&sx->sx_lock); + + WITNESS_UNLOCK(&sx->lock_object, 0, file, line); + _uhi_rwlock_runlock(&sx->sx_lock, sx, curthread->td_tid, file, line); } int _sx_try_xlock(struct sx *sx, const char *file, int line) { - return (uhi_rwlock_trywlock(&sx->sx_lock)); + int ret; + + ret = (_uhi_rwlock_trywlock(&sx->sx_lock, sx, curthread->td_tid, + file, line)); + + if (ret) { + WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, + file, line); + } + + return (ret); } void From 33ec673e20028024cb966150a350b11e925929d5 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 1 Sep 2014 07:17:09 -0700 Subject: [PATCH 122/148] Make this code compile with witness enabled. rmlocks don't have sx lock support just yet, so comment it out. --- lib/libuinet/uinet_kern_rmlock.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/libuinet/uinet_kern_rmlock.c b/lib/libuinet/uinet_kern_rmlock.c index 887f84d..67a9fb8 100644 --- a/lib/libuinet/uinet_kern_rmlock.c +++ b/lib/libuinet/uinet_kern_rmlock.c @@ -134,11 +134,13 @@ _rm_wlock_debug(struct rmlock *rm, const char *file, int line) WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - _rm_wlock((struct rwlock *) rm); + _rm_wlock(rm); +#if 0 if (rm->lock_object.lo_flags & RM_SLEEPABLE) WITNESS_LOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, file, line); else +#endif WITNESS_LOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); } @@ -146,13 +148,15 @@ void _rm_wunlock_debug(struct rmlock *rm, const char *file, int line) { +#if 0 if (rm->lock_object.lo_flags & RM_SLEEPABLE) WITNESS_UNLOCK(&rm->rm_lock_sx.lock_object, LOP_EXCLUSIVE, file, line); else +#endif WITNESS_UNLOCK(&rm->lock_object, LOP_EXCLUSIVE, file, line); - _rm_wunlock((struct rwlock *) rm, file, line); + _rm_wunlock(rm); } int @@ -160,15 +164,19 @@ _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, int trylock, const char *file, int line) { +#if 0 if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE)) WITNESS_CHECKORDER(&rm->rm_lock_sx.lock_object, LOP_NEWORDER, file, line, NULL); +#endif WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL); +#if 0 if (trylock) - return _rm_try_rlock((struct rwlock *)rm, file, line); + return _rm_try_rlock(rm); +#endif - _rm_rlock((struct rwlock *)rm, file, line); + _rm_rlock(rm, tracker, trylock); return (1); } @@ -178,6 +186,6 @@ _rm_runlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, { WITNESS_UNLOCK(&rm->lock_object, 0, file, line); - _rm_runlock((struct rwlock *)rm, file, line); + _rm_runlock(rm, tracker); } #endif From 6dacab042680740e1ece2eceeeb6a329c94ef1c6 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 1 Sep 2014 07:21:11 -0700 Subject: [PATCH 123/148] This isn't used anymore! --- lib/libuinet_memstat/Makefile.orig | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 lib/libuinet_memstat/Makefile.orig diff --git a/lib/libuinet_memstat/Makefile.orig b/lib/libuinet_memstat/Makefile.orig deleted file mode 100644 index 89c52a8..0000000 --- a/lib/libuinet_memstat/Makefile.orig +++ /dev/null @@ -1,30 +0,0 @@ -# $FreeBSD: stable/9/lib/libmemstat/Makefile 195767 2009-07-19 17:25:24Z kensmith $ - -WARNS?= 3 -LIB= memstat -SHLIB_MAJOR= 3 -DPADD= ${LIBKVM} -LDADD= -lkvm -SRCS+= memstat.c -SRCS+= memstat_all.c -SRCS+= memstat_malloc.c -SRCS+= memstat_uma.c -INCS= memstat.h - -MAN= libmemstat.3 - -MLINKS+= libmemstat.3 memstat_mtl_alloc.3 -MLINKS+= libmemstat.3 memstat_mtl_first.3 -MLINKS+= libmemstat.3 memstat_mtl_next.3 -MLINKS+= libmemstat.3 memstat_mtl_find.3 -MLINKS+= libmemstat.3 memstat_mtl_free.3 -MLINKS+= libmemstat.3 memstat_mtl_geterror.3 -MLINKS+= libmemstat.3 memstat_strerror.3 -MLINKS+= libmemstat.3 memstat_sysctl_all.3 -MLINKS+= libmemstat.3 memstat_sysctl_malloc.3 -MLINKS+= libmemstat.3 memstat_sysctl_uma.3 -MLINKS+= libmemstat.3 memstat_kvm_all.3 -MLINKS+= libmemstat.3 memstat_kvm_malloc.3 -MLINKS+= libmemstat.3 memstat_kvm_uma.3 - -.include From 5411f887a30dbb5175fbd2d9afb1be5e2e903e77 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 1 Sep 2014 08:10:00 -0700 Subject: [PATCH 124/148] Remove this - it's no longer true. --- sys/kern/subr_witness.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index dc5c855..cb8b42b 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -1776,14 +1776,8 @@ enroll(const char *description, struct lock_class *lock_class) found: w->w_refcount++; mtx_unlock_spin(&w_mtx); - /* - * XXX libuinet currently shortcuts a bunch of lock types and - * implements them using other lock types. The locking stuff - * needs to be made .. saner. - */ if (lock_class != w->w_class) -// WITNESS_PANIC( - printf( + WITNESS_PANIC( "lock (%s) %s does not match earlier (%s) lock", description, lock_class->lc_name, w->w_class->lc_name); From 31b21d55bfba1ef3915be308022daf3a4645aa41 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 1 Sep 2014 09:14:56 -0700 Subject: [PATCH 125/148] This is already in uinet_api.h so get rid of it from here. --- lib/libuinet/api_include/uinet_config.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lib/libuinet/api_include/uinet_config.h b/lib/libuinet/api_include/uinet_config.h index 7b7612b..2dd126a 100644 --- a/lib/libuinet/api_include/uinet_config.h +++ b/lib/libuinet/api_include/uinet_config.h @@ -152,18 +152,6 @@ const char *uinet_ifgenericname(uinet_ifcookie_t cookie); */ int uinet_config_blackhole(uinet_blackhole_t action); -/* - * general sysctl interface. - * - * XXX doesn't belong here! - */ -int -uinet_sysctlbyname(char *name, char *oldp, size_t *oldplen, - char *newp, size_t newplen, size_t *retval, int flags); -int -uinet_sysctl(int *name, u_int namelen, void *oid, size_t *oldlenp, - void *new, size_t newlen, size_t *retval, int flags); - #ifdef __cplusplus } #endif From c7d0749d581c360b0c50b65b5c0662f52452bb54 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 1 Sep 2014 09:16:32 -0700 Subject: [PATCH 126/148] It's not really a hack anymore, so don't say it is. --- lib/libuinet/uinet_api.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 1349abd..7200941 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -1273,9 +1273,6 @@ uinet_sysctl(int *name, u_int namelen, void *oldp, size_t *oldplen, return (error); } -/* - * XXX static callback sucks, but it's what I have to go on. - */ static uinet_pfil_cb_t g_uinet_pfil_cb = NULL; static void * g_uinet_pfil_cbdata = NULL; static struct ifnet *g_uinet_pfil_ifp = NULL; @@ -1346,7 +1343,7 @@ uinet_pfil_in_hook_v4(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, } /* - * XXX test hack to play with pfil + * Register a single hook for the AF_INET pfil. */ int uinet_register_pfil_in(uinet_pfil_cb_t cb, void *arg, const char *ifname) From e8982d604c79988b7414e924d6208840a2808614 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 2 Sep 2014 09:42:40 -0700 Subject: [PATCH 127/148] * Correct witness checks in rmlock rlock * Correctly return the return value from the uhi call for rmlock rlock. --- lib/libuinet/uinet_kern_rmlock.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/libuinet/uinet_kern_rmlock.c b/lib/libuinet/uinet_kern_rmlock.c index 67a9fb8..96e403d 100644 --- a/lib/libuinet/uinet_kern_rmlock.c +++ b/lib/libuinet/uinet_kern_rmlock.c @@ -163,6 +163,7 @@ int _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, int trylock, const char *file, int line) { + int ret; #if 0 if (!trylock && (rm->lock_object.lo_flags & RM_SLEEPABLE)) @@ -171,13 +172,11 @@ _rm_rlock_debug(struct rmlock *rm, struct rm_priotracker *tracker, #endif WITNESS_CHECKORDER(&rm->lock_object, LOP_NEWORDER, file, line, NULL); -#if 0 - if (trylock) - return _rm_try_rlock(rm); -#endif + ret = (_rm_rlock(rm, tracker, trylock)); + if (ret) + WITNESS_LOCK(&rm->lock_object, 0, file, line); - _rm_rlock(rm, tracker, trylock); - return (1); + return (ret); } void From 3096315e5c8406426f84c36490b551da2d7a19b3 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 3 Sep 2014 01:13:18 -0700 Subject: [PATCH 128/148] refs #2690 - API to clean tcpstats. --- lib/libuinet/api_include/uinet_api.h | 1 + lib/libuinet/uinet_api.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index 9328ecd..f1aeccc 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -41,6 +41,7 @@ void uinet_finalize_thread(void); int uinet_getl2info(struct uinet_socket *so, struct uinet_in_l2info *l2i); int uinet_getifstat(const char *name, struct uinet_ifstat *stat); void uinet_gettcpstat(struct uinet_tcpstat *stat); +void uinet_cleartcpstat(void); char *uinet_inet_ntoa(struct uinet_in_addr in, char *buf, unsigned int size); const char *uinet_inet_ntop(int af, const void *src, char *dst, unsigned int size); int uinet_inet_pton(int af, const char *src, void *dst); diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 7200941..47b89dc 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -168,6 +168,13 @@ uinet_gettcpstat(struct uinet_tcpstat *stat) *((struct tcpstat *)stat) = tcpstat; } +void +uinet_cleartcpstat(void) +{ + + bzero(&tcpstat, sizeof(tcpstat)); +} + char * uinet_inet_ntoa(struct uinet_in_addr in, char *buf, unsigned int size) From 46b42db1396f96fa3e88a418a9ce23c0f7970173 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 3 Sep 2014 01:24:27 -0700 Subject: [PATCH 129/148] Need to export uinet_cleartcpstat. --- lib/libuinet/uinet_api.symlist | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index 615d8e2..c06ecde 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -1,3 +1,4 @@ +uinet_cleartcpstat uinet_config_blackhole uinet_errno_to_os uinet_finalize_thread From 0e6bc42962c5934e0ec3b47258bb7b49a7497234 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 3 Sep 2014 01:34:42 -0700 Subject: [PATCH 130/148] refs #2690 - add uinet_clearifstat() to clear interface stats. --- lib/libuinet/api_include/uinet_api.h | 1 + lib/libuinet/uinet_api.c | 41 ++++++++++++++++++++++++++++ lib/libuinet/uinet_api.symlist | 1 + 3 files changed, 43 insertions(+) diff --git a/lib/libuinet/api_include/uinet_api.h b/lib/libuinet/api_include/uinet_api.h index f1aeccc..c735b61 100644 --- a/lib/libuinet/api_include/uinet_api.h +++ b/lib/libuinet/api_include/uinet_api.h @@ -37,6 +37,7 @@ extern "C" { #include "uinet_config.h" #include "uinet_queue.h" +int uinet_clearifstat(const char *name); void uinet_finalize_thread(void); int uinet_getl2info(struct uinet_socket *so, struct uinet_in_l2info *l2i); int uinet_getifstat(const char *name, struct uinet_ifstat *stat); diff --git a/lib/libuinet/uinet_api.c b/lib/libuinet/uinet_api.c index 47b89dc..70ce9c5 100644 --- a/lib/libuinet/uinet_api.c +++ b/lib/libuinet/uinet_api.c @@ -120,6 +120,47 @@ uinet_finalize_thread(void) } +int +uinet_clearifstat(const char *name) +{ + struct uinet_config_if *ifcfg; + struct ifnet *ifp; + + ifcfg = uinet_iffind_byname(name); + if (NULL == ifcfg) { + printf("could not find interface %s\n", name); + return (EINVAL); + } + + ifp = ifnet_byindex_ref(ifcfg->ifindex); + if (NULL == ifp) { + printf("could not find interface %s by index\n", name); + return (EINVAL); + } + + ifp->if_data.ifi_ipackets = 0; + ifp->if_data.ifi_ierrors = 0; + ifp->if_data.ifi_opackets = 0; + ifp->if_data.ifi_oerrors = 0; + ifp->if_data.ifi_collisions = 0; + ifp->if_data.ifi_ibytes = 0; + ifp->if_data.ifi_obytes = 0; + ifp->if_data.ifi_imcasts = 0; + ifp->if_data.ifi_omcasts = 0; + ifp->if_data.ifi_iqdrops = 0; + ifp->if_data.ifi_noproto = 0; + ifp->if_data.ifi_hwassist = 0; + ifp->if_data.ifi_epoch = 0; + ifp->if_data.ifi_icopies = 0; + ifp->if_data.ifi_izcopies = 0; + ifp->if_data.ifi_ocopies = 0; + ifp->if_data.ifi_ozcopies = 0; + + if_rele(ifp); + + return (0); +} + int uinet_getifstat(const char *name, struct uinet_ifstat *stat) { diff --git a/lib/libuinet/uinet_api.symlist b/lib/libuinet/uinet_api.symlist index c06ecde..4bbb962 100644 --- a/lib/libuinet/uinet_api.symlist +++ b/lib/libuinet/uinet_api.symlist @@ -1,3 +1,4 @@ +uinet_clearifstat uinet_cleartcpstat uinet_config_blackhole uinet_errno_to_os From 7891209e711ca7159f6fbad67905d70adb8cd400 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 3 Sep 2014 01:35:27 -0700 Subject: [PATCH 131/148] Makefiles should abort when a target fails. Cleanup. --- Makefile | 14 ++------------ lib/Makefile | 10 ++-------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index a4a9bb1..98c169c 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,5 @@ SUBDIRS=lib bin -config: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) config ) ; done - -all: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done - -clean: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done - -install: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done - +config all clean install: + for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@) || exit 1 ; done diff --git a/lib/Makefile b/lib/Makefile index 8fc62a2..1ca15f8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,13 +9,7 @@ default: all config: (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include --prefix="${UINET_DESTDIR}" --includedir="${UINET_DESTDIR}/include/libev" --enable-shared=no ) -all: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done - -clean: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done - -install: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done +all clean install: + for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@ ) || exit 1 ; done default: config all From 2acaccb4909a6dcc58b3191c8279219ad9e74b00 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 3 Sep 2014 13:39:30 +0000 Subject: [PATCH 132/148] add hooks for maintainer-clean --- Makefile | 2 +- bin/Makefile | 14 ++------------ bin/passive/Makefile | 3 +++ lib/Makefile | 2 +- lib/libhttp_parser/Makefile | 2 +- lib/libuinet/Makefile | 4 ++-- lib/libuinet_memstat/Makefile | 2 +- lib/libuinetnv/Makefile | 2 +- 8 files changed, 12 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 98c169c..8d83aff 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SUBDIRS=lib bin -config all clean install: +config all clean install maintainer-clean: for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@) || exit 1 ; done diff --git a/bin/Makefile b/bin/Makefile index 0ed9df5..f40d1ef 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -1,15 +1,5 @@ SUBDIRS=passive -config: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) config ) ; done - -all: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done - -clean: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) clean ) ; done - -install: - for d in $(SUBDIRS); do ( cd $$d; $(MAKE) install ) ; done - +config all clean install maintainer-clean: + for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@) || exit 1 ; done diff --git a/bin/passive/Makefile b/bin/passive/Makefile index 92345dd..b64354f 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -15,4 +15,7 @@ CFLAGS+= -I${TOPDIR}/lib/libhttp_parser -DENABLE_EXTRACT LDADD+= -L${TOPDIR}/lib/libhttp_parser -lhttp_parser -lz endif +maintainer-clean: + $(MAKE) clean + include ${TOPDIR}/mk/prog.mk diff --git a/lib/Makefile b/lib/Makefile index 1ca15f8..4a0a218 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ default: all config: (cd libev ; env CFLAGS="${DEBUG_FLAGS}" ./configure --with-uinet=../libuinet/api_include --prefix="${UINET_DESTDIR}" --includedir="${UINET_DESTDIR}/include/libev" --enable-shared=no ) -all clean install: +all clean install maintainer-clean: for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@ ) || exit 1 ; done default: config all diff --git a/lib/libhttp_parser/Makefile b/lib/libhttp_parser/Makefile index 427e939..e13cf7a 100644 --- a/lib/libhttp_parser/Makefile +++ b/lib/libhttp_parser/Makefile @@ -104,7 +104,7 @@ default: all all: library package -clean: +clean maintainer-clean: rm -f *.o *.a tags test test_fast test_g \ http_parser.tar libhttp_parser.so* \ url_parser url_parser_g parsertrace parsertrace_g diff --git a/lib/libuinet/Makefile b/lib/libuinet/Makefile index 78c4c47..09cf597 100644 --- a/lib/libuinet/Makefile +++ b/lib/libuinet/Makefile @@ -350,8 +350,8 @@ ${OBJS}: %.o: %.c ${IMACROS_FILE} .m.h: ${AWK} -f $S/tools/makeobjops.awk $< -h -.PHONY: clean -clean: +.PHONY: maintainer-clean clean +maintainer-clean clean: rm -f libuinet.a rm -f ${MHEADERS} ${MSRCS} ${HOST_OBJS} ${OBJS} ${PROGRAM} ${IMACROS_FILE} rm -rf ${MACHINE_INCLUDES_ROOT} diff --git a/lib/libuinet_memstat/Makefile b/lib/libuinet_memstat/Makefile index 2149058..298f7d7 100644 --- a/lib/libuinet_memstat/Makefile +++ b/lib/libuinet_memstat/Makefile @@ -15,7 +15,7 @@ all: libuinet_memstat.a libuinet_memstat.a: $(OBJS) $(AR) -c -r libuinet_memstat.a $(OBJS) -clean: +clean maintainer-clean: $(RM) $(OBJS) libuinet_memstat.a install: diff --git a/lib/libuinetnv/Makefile b/lib/libuinetnv/Makefile index f8074d6..486d905 100644 --- a/lib/libuinetnv/Makefile +++ b/lib/libuinetnv/Makefile @@ -14,7 +14,7 @@ all: libuinetnv.a libuinetnv.a: $(OBJS) $(AR) -c -r libuinetnv.a $(OBJS) -clean: +clean maintainer-clean: $(RM) $(OBJS) libuinetnv.a install: From 4be0960fa8dff292f759bc19f88d757f9b1b2346 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 3 Sep 2014 09:00:54 -0700 Subject: [PATCH 133/148] bzero() the cpuset before using it. Thanks Valgrind! --- lib/libuinet/uinet_host_interface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libuinet/uinet_host_interface.c b/lib/libuinet/uinet_host_interface.c index da02a28..80ab50a 100644 --- a/lib/libuinet/uinet_host_interface.c +++ b/lib/libuinet/uinet_host_interface.c @@ -459,6 +459,7 @@ int uhi_thread_bound_cpu() int bound_cpu; int i; + bzero(&cpuset, sizeof(cpuset)); pthread_getaffinity_np(pthread_self(), sizeof(cpuset_t), &cpuset); /* From 5a0910268f10a7abc1d54845c0957a780cd5d6b9 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 3 Sep 2014 09:01:14 -0700 Subject: [PATCH 134/148] Initialise the socklen parameter before passing it to accept(). Thanks Valgrind! --- lib/libuinet/uinet_host_sysctl_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index 56bf893..b943f1a 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -506,6 +506,7 @@ uinet_host_sysctl_listener_thread(void *arg) int ret; const char *type; + sl = sizeof(sun_n); ns = accept(s, (struct sockaddr *) &sun_n, &sl); if (ns < 0) { fprintf(stderr, "%s: accept failed: %d\n", __func__, errno); From 7d31758592c54a9d053e06e2f68393d0d34a73e3 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Thu, 4 Sep 2014 08:04:35 -0700 Subject: [PATCH 135/148] * Be more verbose if the call to RAND_pseudo_bytes returned an error; * Zero-out the buffer being passed to it - it uses the contents of buf to stir its local entropy pool before it returns something, and that's causing valgrind to trip up on uninitialised data. --- lib/libuinet/uinet_host_interface.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/libuinet/uinet_host_interface.c b/lib/libuinet/uinet_host_interface.c index 80ab50a..375879c 100644 --- a/lib/libuinet/uinet_host_interface.c +++ b/lib/libuinet/uinet_host_interface.c @@ -995,11 +995,16 @@ uhi_arc4rand(void *ptr, unsigned int len, int reseed) { #if !defined(__APPLE__) + int ret; + (void)reseed; /* XXX assuming that we don't have to manually seed this */ - RAND_pseudo_bytes(ptr, len); + ret = RAND_pseudo_bytes(ptr, len); + if (ret != 1) { + printf("%s: didn't return random data!\n", __func__); + } #else if (reseed) arc4random_stir(); @@ -1012,7 +1017,7 @@ uhi_arc4rand(void *ptr, unsigned int len, int reseed) uint32_t uhi_arc4random(void) { - uint32_t ret; + uint32_t ret = 0; uhi_arc4rand(&ret, sizeof ret, 0); return ret; From 0a79bfeaf76f8d99f114c877c83b9c6534cacff8 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 8 Sep 2014 10:35:35 -0700 Subject: [PATCH 136/148] g++ is a bad default. Bad gmake, bad. --- mk/prog.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/mk/prog.mk b/mk/prog.mk index 5283198..98f1b97 100644 --- a/mk/prog.mk +++ b/mk/prog.mk @@ -17,6 +17,7 @@ endif ifdef PROG_CXX PROG= ${PROG_CXX} +CXX= c++ endif ifndef PROG From 6a3ec5a446f6b725d26c076d881397507c8a6846 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Mon, 8 Sep 2014 10:37:50 -0700 Subject: [PATCH 137/148] Add cflags.mk to things so the install target works. --- bin/echo++/Makefile | 1 + bin/echo/Makefile | 5 +---- bin/sysctl/Makefile | 1 + bin/tproxy/Makefile | 3 +-- bin/vmstat/Makefile | 1 + 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/bin/echo++/Makefile b/bin/echo++/Makefile index 060f41d..2e0fe20 100644 --- a/bin/echo++/Makefile +++ b/bin/echo++/Makefile @@ -1,6 +1,7 @@ TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG_CXX=echo++ diff --git a/bin/echo/Makefile b/bin/echo/Makefile index 570f284..17a1804 100644 --- a/bin/echo/Makefile +++ b/bin/echo/Makefile @@ -1,14 +1,11 @@ - TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG=echo - UINET_LIBS=uinet - CFLAGS= -I${TOPDIR}/lib/libev LDADD= ${TOPDIR}/lib/libev/.libs/libev.a -lm -lpcap - DEBUG_FLAGS=-g -O0 include ${TOPDIR}/mk/prog.mk diff --git a/bin/sysctl/Makefile b/bin/sysctl/Makefile index b06f101..f48decf 100644 --- a/bin/sysctl/Makefile +++ b/bin/sysctl/Makefile @@ -1,4 +1,5 @@ TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG=sysctl diff --git a/bin/tproxy/Makefile b/bin/tproxy/Makefile index f54a87e..203684d 100644 --- a/bin/tproxy/Makefile +++ b/bin/tproxy/Makefile @@ -1,6 +1,5 @@ - - TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG=tproxy diff --git a/bin/vmstat/Makefile b/bin/vmstat/Makefile index 168d718..a1407df 100644 --- a/bin/vmstat/Makefile +++ b/bin/vmstat/Makefile @@ -1,4 +1,5 @@ TOPDIR?=${CURDIR}/../.. +include ${TOPDIR}/cflags.mk PROG=vmstat From cd68785a8d3016ecab249ba2569ad3eed770e1a1 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 10 Sep 2014 10:32:45 +0000 Subject: [PATCH 138/148] maintainer-clean target. --- mk/prog.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mk/prog.mk b/mk/prog.mk index 98f1b97..7e170a2 100644 --- a/mk/prog.mk +++ b/mk/prog.mk @@ -58,7 +58,7 @@ else endif -clean: +maintainer-clean clean: @rm -f ${PROG} ${OBJS} all: ${PROG} From b71be6c091bb32cce748edada0f11a4cf77db141 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 10 Sep 2014 05:15:42 -0700 Subject: [PATCH 139/148] Cleanly shut down the bridge/netmap interfaces. --- lib/libuinet/uinet_config.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/libuinet/uinet_config.c b/lib/libuinet/uinet_config.c index 0d9298b..8fe4f44 100644 --- a/lib/libuinet/uinet_config.c +++ b/lib/libuinet/uinet_config.c @@ -185,6 +185,12 @@ uinet_ifdestroy_internal(struct uinet_config_if *cfg) case UINET_IFTYPE_PCAP: error = if_pcap_detach(cfg); break; + case UINET_IFTYPE_BRIDGE: + error = if_bridge_detach(cfg); + break; + case UINET_IFTYPE_SPAN: + error = if_span_detach(cfg); + break; default: printf("Error detaching interface %s: unknown interface type %d\n", cfg->name, cfg->type); error = ENXIO; From 5f7fa75e254434dd1eeab5aec4ddc122d675db4b Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Tue, 23 Sep 2014 14:07:34 +0000 Subject: [PATCH 140/148] Include a version file in the build. --- Makefile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Makefile b/Makefile index 8d83aff..1de547c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,22 @@ +include cflags.mk SUBDIRS=lib bin config all clean install maintainer-clean: for d in $(SUBDIRS); do ( cd $$d && $(MAKE) $@) || exit 1 ; done + if [ "$@" = "all" -o "$@" = "install" ] ; then $(MAKE) $@-extra ; fi + #if [ "$@" = "all" ] ; then rm -f version.extended ; $(MAKE) version.extended ; fi + +.PHONY: version.extended + +all-extra: version.extended + +install-extra: + mkdir -p ${UINET_DESTDIR}/libuinet + cp version.extended ${UINET_DESTDIR}/libuinet/version.extended + +version.extended: + echo "buildroot: ${CURDIR}" > $@ + echo "date: `date`" >> $@ + echo "git-sha: `git rev-parse --short HEAD`" >> $@ + echo "git-branch: `git rev-parse --abbrev-ref HEAD`" >> $@ From 04d808317f7509b215a8b7639c01f48854f10165 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Tue, 23 Sep 2014 14:09:51 +0000 Subject: [PATCH 141/148] Fixup install path for libuinet version file. --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1de547c..f62832f 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ config all clean install maintainer-clean: all-extra: version.extended install-extra: - mkdir -p ${UINET_DESTDIR}/libuinet - cp version.extended ${UINET_DESTDIR}/libuinet/version.extended + mkdir -p ${UINET_DESTDIR}/etc/libuinet + cp version.extended ${UINET_DESTDIR}/etc/libuinet/version.extended version.extended: echo "buildroot: ${CURDIR}" > $@ From 4391646abb58a30306eec3980ecba85eea0d115d Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Wed, 24 Sep 2014 13:43:36 -0700 Subject: [PATCH 142/148] Need -lcrypto for FreeBSD 10.1 --- cflags.mk | 3 +++ lib.mk | 3 +++ lib/libev/ev.3 | 2 +- lib/libuinet/Makefile.inc | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cflags.mk b/cflags.mk index 71d417b..e15967a 100644 --- a/cflags.mk +++ b/cflags.mk @@ -2,6 +2,9 @@ DEBUG_FLAGS ?= -O -gdwarf-2 UINET_DESTDIR ?= /usr/local/ +#CFLAGS+= -fPIC +#LDFLAGS+= -fPIC + UINET_INSTALL ?= install UINET_INSTALL_DIR ?= $(UINET_INSTALL) -m 0755 UINET_INSTALL_LIB ?= $(UINET_INSTALL) -m 0644 diff --git a/lib.mk b/lib.mk index b930e1a..8fe71f5 100644 --- a/lib.mk +++ b/lib.mk @@ -9,3 +9,6 @@ LDADD+= -lcrypto else LDADD+= -lssl endif +ifeq "${OSNAME}" "FreeBSD" +LDADD+= -lcrypto +endif diff --git a/lib/libev/ev.3 b/lib/libev/ev.3 index bb8501b..1e85616 100644 --- a/lib/libev/ev.3 +++ b/lib/libev/ev.3 @@ -124,7 +124,7 @@ .\" ======================================================================== .\" .IX Title "LIBEV 3" -.TH LIBEV 3 "2013-02-28" "libev-4.11" "libev - high performance full featured event loop" +.TH LIBEV 3 "2014-09-24" "libev-4.15" "libev - high performance full featured event loop" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l diff --git a/lib/libuinet/Makefile.inc b/lib/libuinet/Makefile.inc index b2a89c9..64277fa 100644 --- a/lib/libuinet/Makefile.inc +++ b/lib/libuinet/Makefile.inc @@ -9,3 +9,6 @@ LDADD+= -lcrypto else LDADD+= -lssl endif +ifeq ($(shell uname -s),FreeBSD) +LDADD+= -lcrypto +endif From ee0756f73c1e9dbc43dfb90fd232db52cabcd670 Mon Sep 17 00:00:00 2001 From: Usability Racoon Date: Fri, 26 Sep 2014 14:37:43 +0000 Subject: [PATCH 143/148] This file is generated remove it. Otherwise builds dirty this file and cause much sadness. --- lib/libev/ev.3 | 5626 ------------------------------------------------ 1 file changed, 5626 deletions(-) delete mode 100644 lib/libev/ev.3 diff --git a/lib/libev/ev.3 b/lib/libev/ev.3 deleted file mode 100644 index 1e85616..0000000 --- a/lib/libev/ev.3 +++ /dev/null @@ -1,5626 +0,0 @@ -.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.16) -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -'br\} -.\" -.\" Escape single quotes in literal strings from groff's Unicode transform. -.ie \n(.g .ds Aq \(aq -.el .ds Aq ' -.\" -.\" If the F register is turned on, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.ie \nF \{\ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. nr % 0 -. rr F -.\} -.el \{\ -. de IX -.. -.\} -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "LIBEV 3" -.TH LIBEV 3 "2014-09-24" "libev-4.15" "libev - high performance full featured event loop" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.if n .ad l -.nh -.SH "NAME" -libev \- a high performance full\-featured event loop written in C -.SH "SYNOPSIS" -.IX Header "SYNOPSIS" -.Vb 1 -\& #include -.Ve -.SS "\s-1EXAMPLE\s0 \s-1PROGRAM\s0" -.IX Subsection "EXAMPLE PROGRAM" -.Vb 2 -\& // a single header file is required -\& #include -\& -\& #include // for puts -\& -\& // every watcher type has its own typedef\*(Aqd struct -\& // with the name ev_TYPE -\& ev_io stdin_watcher; -\& ev_timer timeout_watcher; -\& -\& // all watcher callbacks have a similar signature -\& // this callback is called when data is readable on stdin -\& static void -\& stdin_cb (EV_P_ ev_io *w, int revents) -\& { -\& puts ("stdin ready"); -\& // for one\-shot events, one must manually stop the watcher -\& // with its corresponding stop function. -\& ev_io_stop (EV_A_ w); -\& -\& // this causes all nested ev_run\*(Aqs to stop iterating -\& ev_break (EV_A_ EVBREAK_ALL); -\& } -\& -\& // another callback, this time for a time\-out -\& static void -\& timeout_cb (EV_P_ ev_timer *w, int revents) -\& { -\& puts ("timeout"); -\& // this causes the innermost ev_run to stop iterating -\& ev_break (EV_A_ EVBREAK_ONE); -\& } -\& -\& int -\& main (void) -\& { -\& // use the default event loop unless you have special needs -\& struct ev_loop *loop = EV_DEFAULT; -\& -\& // initialise an io watcher, then start it -\& // this one will watch for stdin to become readable -\& ev_io_init (&stdin_watcher, stdin_cb, /*STDIN_FILENO*/ 0, EV_READ); -\& ev_io_start (loop, &stdin_watcher); -\& -\& // initialise a timer watcher, then start it -\& // simple non\-repeating 5.5 second timeout -\& ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.); -\& ev_timer_start (loop, &timeout_watcher); -\& -\& // now wait for events to arrive -\& ev_run (loop, 0); -\& -\& // break was called, so exit -\& return 0; -\& } -.Ve -.SH "ABOUT THIS DOCUMENT" -.IX Header "ABOUT THIS DOCUMENT" -This document documents the libev software package. -.PP -The newest version of this document is also available as an html-formatted -web page you might find easier to navigate when reading it for the first -time: . -.PP -While this document tries to be as complete as possible in documenting -libev, its usage and the rationale behind its design, it is not a tutorial -on event-based programming, nor will it introduce event-based programming -with libev. -.PP -Familiarity with event based programming techniques in general is assumed -throughout this document. -.SH "WHAT TO READ WHEN IN A HURRY" -.IX Header "WHAT TO READ WHEN IN A HURRY" -This manual tries to be very detailed, but unfortunately, this also makes -it very long. If you just want to know the basics of libev, I suggest -reading \*(L"\s-1ANATOMY\s0 \s-1OF\s0 A \s-1WATCHER\s0\*(R", then the \*(L"\s-1EXAMPLE\s0 \s-1PROGRAM\s0\*(R" above and -look up the missing functions in \*(L"\s-1GLOBAL\s0 \s-1FUNCTIONS\s0\*(R" and the \f(CW\*(C`ev_io\*(C'\fR and -\&\f(CW\*(C`ev_timer\*(C'\fR sections in \*(L"\s-1WATCHER\s0 \s-1TYPES\s0\*(R". -.SH "ABOUT LIBEV" -.IX Header "ABOUT LIBEV" -Libev is an event loop: you register interest in certain events (such as a -file descriptor being readable or a timeout occurring), and it will manage -these event sources and provide your program with events. -.PP -To do this, it must take more or less complete control over your process -(or thread) by executing the \fIevent loop\fR handler, and will then -communicate events via a callback mechanism. -.PP -You register interest in certain events by registering so-called \fIevent -watchers\fR, which are relatively small C structures you initialise with the -details of the event, and then hand it over to libev by \fIstarting\fR the -watcher. -.SS "\s-1FEATURES\s0" -.IX Subsection "FEATURES" -Libev supports \f(CW\*(C`select\*(C'\fR, \f(CW\*(C`poll\*(C'\fR, the Linux-specific \f(CW\*(C`epoll\*(C'\fR, the -BSD-specific \f(CW\*(C`kqueue\*(C'\fR and the Solaris-specific event port mechanisms -for file descriptor events (\f(CW\*(C`ev_io\*(C'\fR), the Linux \f(CW\*(C`inotify\*(C'\fR interface -(for \f(CW\*(C`ev_stat\*(C'\fR), Linux eventfd/signalfd (for faster and cleaner -inter-thread wakeup (\f(CW\*(C`ev_async\*(C'\fR)/signal handling (\f(CW\*(C`ev_signal\*(C'\fR)) relative -timers (\f(CW\*(C`ev_timer\*(C'\fR), absolute timers with customised rescheduling -(\f(CW\*(C`ev_periodic\*(C'\fR), synchronous signals (\f(CW\*(C`ev_signal\*(C'\fR), process status -change events (\f(CW\*(C`ev_child\*(C'\fR), and event watchers dealing with the event -loop mechanism itself (\f(CW\*(C`ev_idle\*(C'\fR, \f(CW\*(C`ev_embed\*(C'\fR, \f(CW\*(C`ev_prepare\*(C'\fR and -\&\f(CW\*(C`ev_check\*(C'\fR watchers) as well as file watchers (\f(CW\*(C`ev_stat\*(C'\fR) and even -limited support for fork events (\f(CW\*(C`ev_fork\*(C'\fR). -.PP -It also is quite fast (see this -benchmark comparing it to libevent -for example). -.SS "\s-1CONVENTIONS\s0" -.IX Subsection "CONVENTIONS" -Libev is very configurable. In this manual the default (and most common) -configuration will be described, which supports multiple event loops. For -more info about various configuration options please have a look at -\&\fB\s-1EMBED\s0\fR section in this manual. If libev was configured without support -for multiple event loops, then all functions taking an initial argument of -name \f(CW\*(C`loop\*(C'\fR (which is always of type \f(CW\*(C`struct ev_loop *\*(C'\fR) will not have -this argument. -.SS "\s-1TIME\s0 \s-1REPRESENTATION\s0" -.IX Subsection "TIME REPRESENTATION" -Libev represents time as a single floating point number, representing -the (fractional) number of seconds since the (\s-1POSIX\s0) epoch (in practice -somewhere near the beginning of 1970, details are complicated, don't -ask). This type is called \f(CW\*(C`ev_tstamp\*(C'\fR, which is what you should use -too. It usually aliases to the \f(CW\*(C`double\*(C'\fR type in C. When you need to do -any calculations on it, you should treat it as some floating point value. -.PP -Unlike the name component \f(CW\*(C`stamp\*(C'\fR might indicate, it is also used for -time differences (e.g. delays) throughout libev. -.SH "ERROR HANDLING" -.IX Header "ERROR HANDLING" -Libev knows three classes of errors: operating system errors, usage errors -and internal errors (bugs). -.PP -When libev catches an operating system error it cannot handle (for example -a system call indicating a condition libev cannot fix), it calls the callback -set via \f(CW\*(C`ev_set_syserr_cb\*(C'\fR, which is supposed to fix the problem or -abort. The default is to print a diagnostic message and to call \f(CW\*(C`abort -()\*(C'\fR. -.PP -When libev detects a usage error such as a negative timer interval, then -it will print a diagnostic message and abort (via the \f(CW\*(C`assert\*(C'\fR mechanism, -so \f(CW\*(C`NDEBUG\*(C'\fR will disable this checking): these are programming errors in -the libev caller and need to be fixed there. -.PP -Libev also has a few internal error-checking \f(CW\*(C`assert\*(C'\fRions, and also has -extensive consistency checking code. These do not trigger under normal -circumstances, as they indicate either a bug in libev or worse. -.SH "GLOBAL FUNCTIONS" -.IX Header "GLOBAL FUNCTIONS" -These functions can be called anytime, even before initialising the -library in any way. -.IP "ev_tstamp ev_time ()" 4 -.IX Item "ev_tstamp ev_time ()" -Returns the current time as libev would use it. Please note that the -\&\f(CW\*(C`ev_now\*(C'\fR function is usually faster and also often returns the timestamp -you actually want to know. Also interesting is the combination of -\&\f(CW\*(C`ev_now_update\*(C'\fR and \f(CW\*(C`ev_now\*(C'\fR. -.IP "ev_sleep (ev_tstamp interval)" 4 -.IX Item "ev_sleep (ev_tstamp interval)" -Sleep for the given interval: The current thread will be blocked -until either it is interrupted or the given time interval has -passed (approximately \- it might return a bit earlier even if not -interrupted). Returns immediately if \f(CW\*(C`interval <= 0\*(C'\fR. -.Sp -Basically this is a sub-second-resolution \f(CW\*(C`sleep ()\*(C'\fR. -.Sp -The range of the \f(CW\*(C`interval\*(C'\fR is limited \- libev only guarantees to work -with sleep times of up to one day (\f(CW\*(C`interval <= 86400\*(C'\fR). -.IP "int ev_version_major ()" 4 -.IX Item "int ev_version_major ()" -.PD 0 -.IP "int ev_version_minor ()" 4 -.IX Item "int ev_version_minor ()" -.PD -You can find out the major and minor \s-1ABI\s0 version numbers of the library -you linked against by calling the functions \f(CW\*(C`ev_version_major\*(C'\fR and -\&\f(CW\*(C`ev_version_minor\*(C'\fR. If you want, you can compare against the global -symbols \f(CW\*(C`EV_VERSION_MAJOR\*(C'\fR and \f(CW\*(C`EV_VERSION_MINOR\*(C'\fR, which specify the -version of the library your program was compiled against. -.Sp -These version numbers refer to the \s-1ABI\s0 version of the library, not the -release version. -.Sp -Usually, it's a good idea to terminate if the major versions mismatch, -as this indicates an incompatible change. Minor versions are usually -compatible to older versions, so a larger minor version alone is usually -not a problem. -.Sp -Example: Make sure we haven't accidentally been linked against the wrong -version (note, however, that this will not detect other \s-1ABI\s0 mismatches, -such as \s-1LFS\s0 or reentrancy). -.Sp -.Vb 3 -\& assert (("libev version mismatch", -\& ev_version_major () == EV_VERSION_MAJOR -\& && ev_version_minor () >= EV_VERSION_MINOR)); -.Ve -.IP "unsigned int ev_supported_backends ()" 4 -.IX Item "unsigned int ev_supported_backends ()" -Return the set of all backends (i.e. their corresponding \f(CW\*(C`EV_BACKEND_*\*(C'\fR -value) compiled into this binary of libev (independent of their -availability on the system you are running on). See \f(CW\*(C`ev_default_loop\*(C'\fR for -a description of the set values. -.Sp -Example: make sure we have the epoll method, because yeah this is cool and -a must have and can we have a torrent of it please!!!11 -.Sp -.Vb 2 -\& assert (("sorry, no epoll, no sex", -\& ev_supported_backends () & EVBACKEND_EPOLL)); -.Ve -.IP "unsigned int ev_recommended_backends ()" 4 -.IX Item "unsigned int ev_recommended_backends ()" -Return the set of all backends compiled into this binary of libev and -also recommended for this platform, meaning it will work for most file -descriptor types. This set is often smaller than the one returned by -\&\f(CW\*(C`ev_supported_backends\*(C'\fR, as for example kqueue is broken on most BSDs -and will not be auto-detected unless you explicitly request it (assuming -you know what you are doing). This is the set of backends that libev will -probe for if you specify no backends explicitly. -.IP "unsigned int ev_embeddable_backends ()" 4 -.IX Item "unsigned int ev_embeddable_backends ()" -Returns the set of backends that are embeddable in other event loops. This -value is platform-specific but can include backends not available on the -current system. To find which embeddable backends might be supported on -the current system, you would need to look at \f(CW\*(C`ev_embeddable_backends () -& ev_supported_backends ()\*(C'\fR, likewise for recommended ones. -.Sp -See the description of \f(CW\*(C`ev_embed\*(C'\fR watchers for more info. -.IP "ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())" 4 -.IX Item "ev_set_allocator (void *(*cb)(void *ptr, long size) throw ())" -Sets the allocation function to use (the prototype is similar \- the -semantics are identical to the \f(CW\*(C`realloc\*(C'\fR C89/SuS/POSIX function). It is -used to allocate and free memory (no surprises here). If it returns zero -when memory needs to be allocated (\f(CW\*(C`size != 0\*(C'\fR), the library might abort -or take some potentially destructive action. -.Sp -Since some systems (at least OpenBSD and Darwin) fail to implement -correct \f(CW\*(C`realloc\*(C'\fR semantics, libev will use a wrapper around the system -\&\f(CW\*(C`realloc\*(C'\fR and \f(CW\*(C`free\*(C'\fR functions by default. -.Sp -You could override this function in high-availability programs to, say, -free some memory if it cannot allocate memory, to use a special allocator, -or even to sleep a while and retry until some memory is available. -.Sp -Example: Replace the libev allocator with one that waits a bit and then -retries (example requires a standards-compliant \f(CW\*(C`realloc\*(C'\fR). -.Sp -.Vb 6 -\& static void * -\& persistent_realloc (void *ptr, size_t size) -\& { -\& for (;;) -\& { -\& void *newptr = realloc (ptr, size); -\& -\& if (newptr) -\& return newptr; -\& -\& sleep (60); -\& } -\& } -\& -\& ... -\& ev_set_allocator (persistent_realloc); -.Ve -.IP "ev_set_syserr_cb (void (*cb)(const char *msg) throw ())" 4 -.IX Item "ev_set_syserr_cb (void (*cb)(const char *msg) throw ())" -Set the callback function to call on a retryable system call error (such -as failed select, poll, epoll_wait). The message is a printable string -indicating the system call or subsystem causing the problem. If this -callback is set, then libev will expect it to remedy the situation, no -matter what, when it returns. That is, libev will generally retry the -requested operation, or, if the condition doesn't go away, do bad stuff -(such as abort). -.Sp -Example: This is basically the same thing that libev does internally, too. -.Sp -.Vb 6 -\& static void -\& fatal_error (const char *msg) -\& { -\& perror (msg); -\& abort (); -\& } -\& -\& ... -\& ev_set_syserr_cb (fatal_error); -.Ve -.IP "ev_feed_signal (int signum)" 4 -.IX Item "ev_feed_signal (int signum)" -This function can be used to \*(L"simulate\*(R" a signal receive. It is completely -safe to call this function at any time, from any context, including signal -handlers or random threads. -.Sp -Its main use is to customise signal handling in your process, especially -in the presence of threads. For example, you could block signals -by default in all threads (and specifying \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR when -creating any loops), and in one thread, use \f(CW\*(C`sigwait\*(C'\fR or any other -mechanism to wait for signals, then \*(L"deliver\*(R" them to libev by calling -\&\f(CW\*(C`ev_feed_signal\*(C'\fR. -.SH "FUNCTIONS CONTROLLING EVENT LOOPS" -.IX Header "FUNCTIONS CONTROLLING EVENT LOOPS" -An event loop is described by a \f(CW\*(C`struct ev_loop *\*(C'\fR (the \f(CW\*(C`struct\*(C'\fR is -\&\fInot\fR optional in this case unless libev 3 compatibility is disabled, as -libev 3 had an \f(CW\*(C`ev_loop\*(C'\fR function colliding with the struct name). -.PP -The library knows two types of such loops, the \fIdefault\fR loop, which -supports child process events, and dynamically created event loops which -do not. -.IP "struct ev_loop *ev_default_loop (unsigned int flags)" 4 -.IX Item "struct ev_loop *ev_default_loop (unsigned int flags)" -This returns the \*(L"default\*(R" event loop object, which is what you should -normally use when you just need \*(L"the event loop\*(R". Event loop objects and -the \f(CW\*(C`flags\*(C'\fR parameter are described in more detail in the entry for -\&\f(CW\*(C`ev_loop_new\*(C'\fR. -.Sp -If the default loop is already initialised then this function simply -returns it (and ignores the flags. If that is troubling you, check -\&\f(CW\*(C`ev_backend ()\*(C'\fR afterwards). Otherwise it will create it with the given -flags, which should almost always be \f(CW0\fR, unless the caller is also the -one calling \f(CW\*(C`ev_run\*(C'\fR or otherwise qualifies as \*(L"the main program\*(R". -.Sp -If you don't know what event loop to use, use the one returned from this -function (or via the \f(CW\*(C`EV_DEFAULT\*(C'\fR macro). -.Sp -Note that this function is \fInot\fR thread-safe, so if you want to use it -from multiple threads, you have to employ some kind of mutex (note also -that this case is unlikely, as loops cannot be shared easily between -threads anyway). -.Sp -The default loop is the only loop that can handle \f(CW\*(C`ev_child\*(C'\fR watchers, -and to do this, it always registers a handler for \f(CW\*(C`SIGCHLD\*(C'\fR. If this is -a problem for your application you can either create a dynamic loop with -\&\f(CW\*(C`ev_loop_new\*(C'\fR which doesn't do that, or you can simply overwrite the -\&\f(CW\*(C`SIGCHLD\*(C'\fR signal handler \fIafter\fR calling \f(CW\*(C`ev_default_init\*(C'\fR. -.Sp -Example: This is the most typical usage. -.Sp -.Vb 2 -\& if (!ev_default_loop (0)) -\& fatal ("could not initialise libev, bad $LIBEV_FLAGS in environment?"); -.Ve -.Sp -Example: Restrict libev to the select and poll backends, and do not allow -environment settings to be taken into account: -.Sp -.Vb 1 -\& ev_default_loop (EVBACKEND_POLL | EVBACKEND_SELECT | EVFLAG_NOENV); -.Ve -.IP "struct ev_loop *ev_loop_new (unsigned int flags)" 4 -.IX Item "struct ev_loop *ev_loop_new (unsigned int flags)" -This will create and initialise a new event loop object. If the loop -could not be initialised, returns false. -.Sp -This function is thread-safe, and one common way to use libev with -threads is indeed to create one loop per thread, and using the default -loop in the \*(L"main\*(R" or \*(L"initial\*(R" thread. -.Sp -The flags argument can be used to specify special behaviour or specific -backends to use, and is usually specified as \f(CW0\fR (or \f(CW\*(C`EVFLAG_AUTO\*(C'\fR). -.Sp -The following flags are supported: -.RS 4 -.ie n .IP """EVFLAG_AUTO""" 4 -.el .IP "\f(CWEVFLAG_AUTO\fR" 4 -.IX Item "EVFLAG_AUTO" -The default flags value. Use this if you have no clue (it's the right -thing, believe me). -.ie n .IP """EVFLAG_NOENV""" 4 -.el .IP "\f(CWEVFLAG_NOENV\fR" 4 -.IX Item "EVFLAG_NOENV" -If this flag bit is or'ed into the flag value (or the program runs setuid -or setgid) then libev will \fInot\fR look at the environment variable -\&\f(CW\*(C`LIBEV_FLAGS\*(C'\fR. Otherwise (the default), this environment variable will -override the flags completely if it is found in the environment. This is -useful to try out specific backends to test their performance, or to work -around bugs. -.ie n .IP """EVFLAG_FORKCHECK""" 4 -.el .IP "\f(CWEVFLAG_FORKCHECK\fR" 4 -.IX Item "EVFLAG_FORKCHECK" -Instead of calling \f(CW\*(C`ev_loop_fork\*(C'\fR manually after a fork, you can also -make libev check for a fork in each iteration by enabling this flag. -.Sp -This works by calling \f(CW\*(C`getpid ()\*(C'\fR on every iteration of the loop, -and thus this might slow down your event loop if you do a lot of loop -iterations and little real work, but is usually not noticeable (on my -GNU/Linux system for example, \f(CW\*(C`getpid\*(C'\fR is actually a simple 5\-insn sequence -without a system call and thus \fIvery\fR fast, but my GNU/Linux system also has -\&\f(CW\*(C`pthread_atfork\*(C'\fR which is even faster). -.Sp -The big advantage of this flag is that you can forget about fork (and -forget about forgetting to tell libev about forking) when you use this -flag. -.Sp -This flag setting cannot be overridden or specified in the \f(CW\*(C`LIBEV_FLAGS\*(C'\fR -environment variable. -.ie n .IP """EVFLAG_NOINOTIFY""" 4 -.el .IP "\f(CWEVFLAG_NOINOTIFY\fR" 4 -.IX Item "EVFLAG_NOINOTIFY" -When this flag is specified, then libev will not attempt to use the -\&\fIinotify\fR \s-1API\s0 for its \f(CW\*(C`ev_stat\*(C'\fR watchers. Apart from debugging and -testing, this flag can be useful to conserve inotify file descriptors, as -otherwise each loop using \f(CW\*(C`ev_stat\*(C'\fR watchers consumes one inotify handle. -.ie n .IP """EVFLAG_SIGNALFD""" 4 -.el .IP "\f(CWEVFLAG_SIGNALFD\fR" 4 -.IX Item "EVFLAG_SIGNALFD" -When this flag is specified, then libev will attempt to use the -\&\fIsignalfd\fR \s-1API\s0 for its \f(CW\*(C`ev_signal\*(C'\fR (and \f(CW\*(C`ev_child\*(C'\fR) watchers. This \s-1API\s0 -delivers signals synchronously, which makes it both faster and might make -it possible to get the queued signal data. It can also simplify signal -handling with threads, as long as you properly block signals in your -threads that are not interested in handling them. -.Sp -Signalfd will not be used by default as this changes your signal mask, and -there are a lot of shoddy libraries and programs (glib's threadpool for -example) that can't properly initialise their signal masks. -.ie n .IP """EVFLAG_NOSIGMASK""" 4 -.el .IP "\f(CWEVFLAG_NOSIGMASK\fR" 4 -.IX Item "EVFLAG_NOSIGMASK" -When this flag is specified, then libev will avoid to modify the signal -mask. Specifically, this means you have to make sure signals are unblocked -when you want to receive them. -.Sp -This behaviour is useful when you want to do your own signal handling, or -want to handle signals only in specific threads and want to avoid libev -unblocking the signals. -.Sp -It's also required by \s-1POSIX\s0 in a threaded program, as libev calls -\&\f(CW\*(C`sigprocmask\*(C'\fR, whose behaviour is officially unspecified. -.Sp -This flag's behaviour will become the default in future versions of libev. -.ie n .IP """EVBACKEND_SELECT"" (value 1, portable select backend)" 4 -.el .IP "\f(CWEVBACKEND_SELECT\fR (value 1, portable select backend)" 4 -.IX Item "EVBACKEND_SELECT (value 1, portable select backend)" -This is your standard \fIselect\fR\|(2) backend. Not \fIcompletely\fR standard, as -libev tries to roll its own fd_set with no limits on the number of fds, -but if that fails, expect a fairly low limit on the number of fds when -using this backend. It doesn't scale too well (O(highest_fd)), but its -usually the fastest backend for a low number of (low-numbered :) fds. -.Sp -To get good performance out of this backend you need a high amount of -parallelism (most of the file descriptors should be busy). If you are -writing a server, you should \f(CW\*(C`accept ()\*(C'\fR in a loop to accept as many -connections as possible during one iteration. You might also want to have -a look at \f(CW\*(C`ev_set_io_collect_interval ()\*(C'\fR to increase the amount of -readiness notifications you get per iteration. -.Sp -This backend maps \f(CW\*(C`EV_READ\*(C'\fR to the \f(CW\*(C`readfds\*(C'\fR set and \f(CW\*(C`EV_WRITE\*(C'\fR to the -\&\f(CW\*(C`writefds\*(C'\fR set (and to work around Microsoft Windows bugs, also onto the -\&\f(CW\*(C`exceptfds\*(C'\fR set on that platform). -.ie n .IP """EVBACKEND_POLL"" (value 2, poll backend, available everywhere except on windows)" 4 -.el .IP "\f(CWEVBACKEND_POLL\fR (value 2, poll backend, available everywhere except on windows)" 4 -.IX Item "EVBACKEND_POLL (value 2, poll backend, available everywhere except on windows)" -And this is your standard \fIpoll\fR\|(2) backend. It's more complicated -than select, but handles sparse fds better and has no artificial -limit on the number of fds you can use (except it will slow down -considerably with a lot of inactive fds). It scales similarly to select, -i.e. O(total_fds). See the entry for \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR, above, for -performance tips. -.Sp -This backend maps \f(CW\*(C`EV_READ\*(C'\fR to \f(CW\*(C`POLLIN | POLLERR | POLLHUP\*(C'\fR, and -\&\f(CW\*(C`EV_WRITE\*(C'\fR to \f(CW\*(C`POLLOUT | POLLERR | POLLHUP\*(C'\fR. -.ie n .IP """EVBACKEND_EPOLL"" (value 4, Linux)" 4 -.el .IP "\f(CWEVBACKEND_EPOLL\fR (value 4, Linux)" 4 -.IX Item "EVBACKEND_EPOLL (value 4, Linux)" -Use the linux-specific \fIepoll\fR\|(7) interface (for both pre\- and post\-2.6.9 -kernels). -.Sp -For few fds, this backend is a bit little slower than poll and select, but -it scales phenomenally better. While poll and select usually scale like -O(total_fds) where total_fds is the total number of fds (or the highest -fd), epoll scales either O(1) or O(active_fds). -.Sp -The epoll mechanism deserves honorable mention as the most misdesigned -of the more advanced event mechanisms: mere annoyances include silently -dropping file descriptors, requiring a system call per change per file -descriptor (and unnecessary guessing of parameters), problems with dup, -returning before the timeout value, resulting in additional iterations -(and only giving 5ms accuracy while select on the same platform gives -0.1ms) and so on. The biggest issue is fork races, however \- if a program -forks then \fIboth\fR parent and child process have to recreate the epoll -set, which can take considerable time (one syscall per file descriptor) -and is of course hard to detect. -.Sp -Epoll is also notoriously buggy \- embedding epoll fds \fIshould\fR work, -but of course \fIdoesn't\fR, and epoll just loves to report events for -totally \fIdifferent\fR file descriptors (even already closed ones, so -one cannot even remove them from the set) than registered in the set -(especially on \s-1SMP\s0 systems). Libev tries to counter these spurious -notifications by employing an additional generation counter and comparing -that against the events to filter out spurious ones, recreating the set -when required. Epoll also erroneously rounds down timeouts, but gives you -no way to know when and by how much, so sometimes you have to busy-wait -because epoll returns immediately despite a nonzero timeout. And last -not least, it also refuses to work with some file descriptors which work -perfectly fine with \f(CW\*(C`select\*(C'\fR (files, many character devices...). -.Sp -Epoll is truly the train wreck among event poll mechanisms, a frankenpoll, -cobbled together in a hurry, no thought to design or interaction with -others. Oh, the pain, will it ever stop... -.Sp -While stopping, setting and starting an I/O watcher in the same iteration -will result in some caching, there is still a system call per such -incident (because the same \fIfile descriptor\fR could point to a different -\&\fIfile description\fR now), so its best to avoid that. Also, \f(CW\*(C`dup ()\*(C'\fR'ed -file descriptors might not work very well if you register events for both -file descriptors. -.Sp -Best performance from this backend is achieved by not unregistering all -watchers for a file descriptor until it has been closed, if possible, -i.e. keep at least one watcher active per fd at all times. Stopping and -starting a watcher (without re-setting it) also usually doesn't cause -extra overhead. A fork can both result in spurious notifications as well -as in libev having to destroy and recreate the epoll object, which can -take considerable time and thus should be avoided. -.Sp -All this means that, in practice, \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR can be as fast or -faster than epoll for maybe up to a hundred file descriptors, depending on -the usage. So sad. -.Sp -While nominally embeddable in other event loops, this feature is broken in -all kernel versions tested so far. -.Sp -This backend maps \f(CW\*(C`EV_READ\*(C'\fR and \f(CW\*(C`EV_WRITE\*(C'\fR in the same way as -\&\f(CW\*(C`EVBACKEND_POLL\*(C'\fR. -.ie n .IP """EVBACKEND_KQUEUE"" (value 8, most \s-1BSD\s0 clones)" 4 -.el .IP "\f(CWEVBACKEND_KQUEUE\fR (value 8, most \s-1BSD\s0 clones)" 4 -.IX Item "EVBACKEND_KQUEUE (value 8, most BSD clones)" -Kqueue deserves special mention, as at the time of this writing, it -was broken on all BSDs except NetBSD (usually it doesn't work reliably -with anything but sockets and pipes, except on Darwin, where of course -it's completely useless). Unlike epoll, however, whose brokenness -is by design, these kqueue bugs can (and eventually will) be fixed -without \s-1API\s0 changes to existing programs. For this reason it's not being -\&\*(L"auto-detected\*(R" unless you explicitly specify it in the flags (i.e. using -\&\f(CW\*(C`EVBACKEND_KQUEUE\*(C'\fR) or libev was compiled on a known-to-be-good (\-enough) -system like NetBSD. -.Sp -You still can embed kqueue into a normal poll or select backend and use it -only for sockets (after having made sure that sockets work with kqueue on -the target platform). See \f(CW\*(C`ev_embed\*(C'\fR watchers for more info. -.Sp -It scales in the same way as the epoll backend, but the interface to the -kernel is more efficient (which says nothing about its actual speed, of -course). While stopping, setting and starting an I/O watcher does never -cause an extra system call as with \f(CW\*(C`EVBACKEND_EPOLL\*(C'\fR, it still adds up to -two event changes per incident. Support for \f(CW\*(C`fork ()\*(C'\fR is very bad (you -might have to leak fd's on fork, but it's more sane than epoll) and it -drops fds silently in similarly hard-to-detect cases. -.Sp -This backend usually performs well under most conditions. -.Sp -While nominally embeddable in other event loops, this doesn't work -everywhere, so you might need to test for this. And since it is broken -almost everywhere, you should only use it when you have a lot of sockets -(for which it usually works), by embedding it into another event loop -(e.g. \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR (but \f(CW\*(C`poll\*(C'\fR is of course -also broken on \s-1OS\s0 X)) and, did I mention it, using it only for sockets. -.Sp -This backend maps \f(CW\*(C`EV_READ\*(C'\fR into an \f(CW\*(C`EVFILT_READ\*(C'\fR kevent with -\&\f(CW\*(C`NOTE_EOF\*(C'\fR, and \f(CW\*(C`EV_WRITE\*(C'\fR into an \f(CW\*(C`EVFILT_WRITE\*(C'\fR kevent with -\&\f(CW\*(C`NOTE_EOF\*(C'\fR. -.ie n .IP """EVBACKEND_DEVPOLL"" (value 16, Solaris 8)" 4 -.el .IP "\f(CWEVBACKEND_DEVPOLL\fR (value 16, Solaris 8)" 4 -.IX Item "EVBACKEND_DEVPOLL (value 16, Solaris 8)" -This is not implemented yet (and might never be, unless you send me an -implementation). According to reports, \f(CW\*(C`/dev/poll\*(C'\fR only supports sockets -and is not embeddable, which would limit the usefulness of this backend -immensely. -.ie n .IP """EVBACKEND_PORT"" (value 32, Solaris 10)" 4 -.el .IP "\f(CWEVBACKEND_PORT\fR (value 32, Solaris 10)" 4 -.IX Item "EVBACKEND_PORT (value 32, Solaris 10)" -This uses the Solaris 10 event port mechanism. As with everything on Solaris, -it's really slow, but it still scales very well (O(active_fds)). -.Sp -While this backend scales well, it requires one system call per active -file descriptor per loop iteration. For small and medium numbers of file -descriptors a \*(L"slow\*(R" \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR backend -might perform better. -.Sp -On the positive side, this backend actually performed fully to -specification in all tests and is fully embeddable, which is a rare feat -among the OS-specific backends (I vastly prefer correctness over speed -hacks). -.Sp -On the negative side, the interface is \fIbizarre\fR \- so bizarre that -even sun itself gets it wrong in their code examples: The event polling -function sometimes returns events to the caller even though an error -occurred, but with no indication whether it has done so or not (yes, it's -even documented that way) \- deadly for edge-triggered interfaces where you -absolutely have to know whether an event occurred or not because you have -to re-arm the watcher. -.Sp -Fortunately libev seems to be able to work around these idiocies. -.Sp -This backend maps \f(CW\*(C`EV_READ\*(C'\fR and \f(CW\*(C`EV_WRITE\*(C'\fR in the same way as -\&\f(CW\*(C`EVBACKEND_POLL\*(C'\fR. -.ie n .IP """EVBACKEND_ALL""" 4 -.el .IP "\f(CWEVBACKEND_ALL\fR" 4 -.IX Item "EVBACKEND_ALL" -Try all backends (even potentially broken ones that wouldn't be tried -with \f(CW\*(C`EVFLAG_AUTO\*(C'\fR). Since this is a mask, you can do stuff such as -\&\f(CW\*(C`EVBACKEND_ALL & ~EVBACKEND_KQUEUE\*(C'\fR. -.Sp -It is definitely not recommended to use this flag, use whatever -\&\f(CW\*(C`ev_recommended_backends ()\*(C'\fR returns, or simply do not specify a backend -at all. -.ie n .IP """EVBACKEND_MASK""" 4 -.el .IP "\f(CWEVBACKEND_MASK\fR" 4 -.IX Item "EVBACKEND_MASK" -Not a backend at all, but a mask to select all backend bits from a -\&\f(CW\*(C`flags\*(C'\fR value, in case you want to mask out any backends from a flags -value (e.g. when modifying the \f(CW\*(C`LIBEV_FLAGS\*(C'\fR environment variable). -.RE -.RS 4 -.Sp -If one or more of the backend flags are or'ed into the flags value, -then only these backends will be tried (in the reverse order as listed -here). If none are specified, all backends in \f(CW\*(C`ev_recommended_backends -()\*(C'\fR will be tried. -.Sp -Example: Try to create a event loop that uses epoll and nothing else. -.Sp -.Vb 3 -\& struct ev_loop *epoller = ev_loop_new (EVBACKEND_EPOLL | EVFLAG_NOENV); -\& if (!epoller) -\& fatal ("no epoll found here, maybe it hides under your chair"); -.Ve -.Sp -Example: Use whatever libev has to offer, but make sure that kqueue is -used if available. -.Sp -.Vb 1 -\& struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_KQUEUE); -.Ve -.RE -.IP "ev_loop_destroy (loop)" 4 -.IX Item "ev_loop_destroy (loop)" -Destroys an event loop object (frees all memory and kernel state -etc.). None of the active event watchers will be stopped in the normal -sense, so e.g. \f(CW\*(C`ev_is_active\*(C'\fR might still return true. It is your -responsibility to either stop all watchers cleanly yourself \fIbefore\fR -calling this function, or cope with the fact afterwards (which is usually -the easiest thing, you can just ignore the watchers and/or \f(CW\*(C`free ()\*(C'\fR them -for example). -.Sp -Note that certain global state, such as signal state (and installed signal -handlers), will not be freed by this function, and related watchers (such -as signal and child watchers) would need to be stopped manually. -.Sp -This function is normally used on loop objects allocated by -\&\f(CW\*(C`ev_loop_new\*(C'\fR, but it can also be used on the default loop returned by -\&\f(CW\*(C`ev_default_loop\*(C'\fR, in which case it is not thread-safe. -.Sp -Note that it is not advisable to call this function on the default loop -except in the rare occasion where you really need to free its resources. -If you need dynamically allocated loops it is better to use \f(CW\*(C`ev_loop_new\*(C'\fR -and \f(CW\*(C`ev_loop_destroy\*(C'\fR. -.IP "ev_loop_fork (loop)" 4 -.IX Item "ev_loop_fork (loop)" -This function sets a flag that causes subsequent \f(CW\*(C`ev_run\*(C'\fR iterations to -reinitialise the kernel state for backends that have one. Despite the -name, you can call it anytime, but it makes most sense after forking, in -the child process. You \fImust\fR call it (or use \f(CW\*(C`EVFLAG_FORKCHECK\*(C'\fR) in the -child before resuming or calling \f(CW\*(C`ev_run\*(C'\fR. -.Sp -Again, you \fIhave\fR to call it on \fIany\fR loop that you want to re-use after -a fork, \fIeven if you do not plan to use the loop in the parent\fR. This is -because some kernel interfaces *cough* \fIkqueue\fR *cough* do funny things -during fork. -.Sp -On the other hand, you only need to call this function in the child -process if and only if you want to use the event loop in the child. If -you just fork+exec or create a new loop in the child, you don't have to -call it at all (in fact, \f(CW\*(C`epoll\*(C'\fR is so badly broken that it makes a -difference, but libev will usually detect this case on its own and do a -costly reset of the backend). -.Sp -The function itself is quite fast and it's usually not a problem to call -it just in case after a fork. -.Sp -Example: Automate calling \f(CW\*(C`ev_loop_fork\*(C'\fR on the default loop when -using pthreads. -.Sp -.Vb 5 -\& static void -\& post_fork_child (void) -\& { -\& ev_loop_fork (EV_DEFAULT); -\& } -\& -\& ... -\& pthread_atfork (0, 0, post_fork_child); -.Ve -.IP "int ev_is_default_loop (loop)" 4 -.IX Item "int ev_is_default_loop (loop)" -Returns true when the given loop is, in fact, the default loop, and false -otherwise. -.IP "unsigned int ev_iteration (loop)" 4 -.IX Item "unsigned int ev_iteration (loop)" -Returns the current iteration count for the event loop, which is identical -to the number of times libev did poll for new events. It starts at \f(CW0\fR -and happily wraps around with enough iterations. -.Sp -This value can sometimes be useful as a generation counter of sorts (it -\&\*(L"ticks\*(R" the number of loop iterations), as it roughly corresponds with -\&\f(CW\*(C`ev_prepare\*(C'\fR and \f(CW\*(C`ev_check\*(C'\fR calls \- and is incremented between the -prepare and check phases. -.IP "unsigned int ev_depth (loop)" 4 -.IX Item "unsigned int ev_depth (loop)" -Returns the number of times \f(CW\*(C`ev_run\*(C'\fR was entered minus the number of -times \f(CW\*(C`ev_run\*(C'\fR was exited normally, in other words, the recursion depth. -.Sp -Outside \f(CW\*(C`ev_run\*(C'\fR, this number is zero. In a callback, this number is -\&\f(CW1\fR, unless \f(CW\*(C`ev_run\*(C'\fR was invoked recursively (or from another thread), -in which case it is higher. -.Sp -Leaving \f(CW\*(C`ev_run\*(C'\fR abnormally (setjmp/longjmp, cancelling the thread, -throwing an exception etc.), doesn't count as \*(L"exit\*(R" \- consider this -as a hint to avoid such ungentleman-like behaviour unless it's really -convenient, in which case it is fully supported. -.IP "unsigned int ev_backend (loop)" 4 -.IX Item "unsigned int ev_backend (loop)" -Returns one of the \f(CW\*(C`EVBACKEND_*\*(C'\fR flags indicating the event backend in -use. -.IP "ev_tstamp ev_now (loop)" 4 -.IX Item "ev_tstamp ev_now (loop)" -Returns the current \*(L"event loop time\*(R", which is the time the event loop -received events and started processing them. This timestamp does not -change as long as callbacks are being processed, and this is also the base -time used for relative timers. You can treat it as the timestamp of the -event occurring (or more correctly, libev finding out about it). -.IP "ev_now_update (loop)" 4 -.IX Item "ev_now_update (loop)" -Establishes the current time by querying the kernel, updating the time -returned by \f(CW\*(C`ev_now ()\*(C'\fR in the progress. This is a costly operation and -is usually done automatically within \f(CW\*(C`ev_run ()\*(C'\fR. -.Sp -This function is rarely useful, but when some event callback runs for a -very long time without entering the event loop, updating libev's idea of -the current time is a good idea. -.Sp -See also \*(L"The special problem of time updates\*(R" in the \f(CW\*(C`ev_timer\*(C'\fR section. -.IP "ev_suspend (loop)" 4 -.IX Item "ev_suspend (loop)" -.PD 0 -.IP "ev_resume (loop)" 4 -.IX Item "ev_resume (loop)" -.PD -These two functions suspend and resume an event loop, for use when the -loop is not used for a while and timeouts should not be processed. -.Sp -A typical use case would be an interactive program such as a game: When -the user presses \f(CW\*(C`^Z\*(C'\fR to suspend the game and resumes it an hour later it -would be best to handle timeouts as if no time had actually passed while -the program was suspended. This can be achieved by calling \f(CW\*(C`ev_suspend\*(C'\fR -in your \f(CW\*(C`SIGTSTP\*(C'\fR handler, sending yourself a \f(CW\*(C`SIGSTOP\*(C'\fR and calling -\&\f(CW\*(C`ev_resume\*(C'\fR directly afterwards to resume timer processing. -.Sp -Effectively, all \f(CW\*(C`ev_timer\*(C'\fR watchers will be delayed by the time spend -between \f(CW\*(C`ev_suspend\*(C'\fR and \f(CW\*(C`ev_resume\*(C'\fR, and all \f(CW\*(C`ev_periodic\*(C'\fR watchers -will be rescheduled (that is, they will lose any events that would have -occurred while suspended). -.Sp -After calling \f(CW\*(C`ev_suspend\*(C'\fR you \fBmust not\fR call \fIany\fR function on the -given loop other than \f(CW\*(C`ev_resume\*(C'\fR, and you \fBmust not\fR call \f(CW\*(C`ev_resume\*(C'\fR -without a previous call to \f(CW\*(C`ev_suspend\*(C'\fR. -.Sp -Calling \f(CW\*(C`ev_suspend\*(C'\fR/\f(CW\*(C`ev_resume\*(C'\fR has the side effect of updating the -event loop time (see \f(CW\*(C`ev_now_update\*(C'\fR). -.IP "bool ev_run (loop, int flags)" 4 -.IX Item "bool ev_run (loop, int flags)" -Finally, this is it, the event handler. This function usually is called -after you have initialised all your watchers and you want to start -handling events. It will ask the operating system for any new events, call -the watcher callbacks, and then repeat the whole process indefinitely: This -is why event loops are called \fIloops\fR. -.Sp -If the flags argument is specified as \f(CW0\fR, it will keep handling events -until either no event watchers are active anymore or \f(CW\*(C`ev_break\*(C'\fR was -called. -.Sp -The return value is false if there are no more active watchers (which -usually means \*(L"all jobs done\*(R" or \*(L"deadlock\*(R"), and true in all other cases -(which usually means " you should call \f(CW\*(C`ev_run\*(C'\fR again"). -.Sp -Please note that an explicit \f(CW\*(C`ev_break\*(C'\fR is usually better than -relying on all watchers to be stopped when deciding when a program has -finished (especially in interactive programs), but having a program -that automatically loops as long as it has to and no longer by virtue -of relying on its watchers stopping correctly, that is truly a thing of -beauty. -.Sp -This function is \fImostly\fR exception-safe \- you can break out of a -\&\f(CW\*(C`ev_run\*(C'\fR call by calling \f(CW\*(C`longjmp\*(C'\fR in a callback, throwing a \*(C+ -exception and so on. This does not decrement the \f(CW\*(C`ev_depth\*(C'\fR value, nor -will it clear any outstanding \f(CW\*(C`EVBREAK_ONE\*(C'\fR breaks. -.Sp -A flags value of \f(CW\*(C`EVRUN_NOWAIT\*(C'\fR will look for new events, will handle -those events and any already outstanding ones, but will not wait and -block your process in case there are no events and will return after one -iteration of the loop. This is sometimes useful to poll and handle new -events while doing lengthy calculations, to keep the program responsive. -.Sp -A flags value of \f(CW\*(C`EVRUN_ONCE\*(C'\fR will look for new events (waiting if -necessary) and will handle those and any already outstanding ones. It -will block your process until at least one new event arrives (which could -be an event internal to libev itself, so there is no guarantee that a -user-registered callback will be called), and will return after one -iteration of the loop. -.Sp -This is useful if you are waiting for some external event in conjunction -with something not expressible using other libev watchers (i.e. "roll your -own \f(CW\*(C`ev_run\*(C'\fR"). However, a pair of \f(CW\*(C`ev_prepare\*(C'\fR/\f(CW\*(C`ev_check\*(C'\fR watchers is -usually a better approach for this kind of thing. -.Sp -Here are the gory details of what \f(CW\*(C`ev_run\*(C'\fR does (this is for your -understanding, not a guarantee that things will work exactly like this in -future versions): -.Sp -.Vb 10 -\& \- Increment loop depth. -\& \- Reset the ev_break status. -\& \- Before the first iteration, call any pending watchers. -\& LOOP: -\& \- If EVFLAG_FORKCHECK was used, check for a fork. -\& \- If a fork was detected (by any means), queue and call all fork watchers. -\& \- Queue and call all prepare watchers. -\& \- If ev_break was called, goto FINISH. -\& \- If we have been forked, detach and recreate the kernel state -\& as to not disturb the other process. -\& \- Update the kernel state with all outstanding changes. -\& \- Update the "event loop time" (ev_now ()). -\& \- Calculate for how long to sleep or block, if at all -\& (active idle watchers, EVRUN_NOWAIT or not having -\& any active watchers at all will result in not sleeping). -\& \- Sleep if the I/O and timer collect interval say so. -\& \- Increment loop iteration counter. -\& \- Block the process, waiting for any events. -\& \- Queue all outstanding I/O (fd) events. -\& \- Update the "event loop time" (ev_now ()), and do time jump adjustments. -\& \- Queue all expired timers. -\& \- Queue all expired periodics. -\& \- Queue all idle watchers with priority higher than that of pending events. -\& \- Queue all check watchers. -\& \- Call all queued watchers in reverse order (i.e. check watchers first). -\& Signals and child watchers are implemented as I/O watchers, and will -\& be handled here by queueing them when their watcher gets executed. -\& \- If ev_break has been called, or EVRUN_ONCE or EVRUN_NOWAIT -\& were used, or there are no active watchers, goto FINISH, otherwise -\& continue with step LOOP. -\& FINISH: -\& \- Reset the ev_break status iff it was EVBREAK_ONE. -\& \- Decrement the loop depth. -\& \- Return. -.Ve -.Sp -Example: Queue some jobs and then loop until no events are outstanding -anymore. -.Sp -.Vb 4 -\& ... queue jobs here, make sure they register event watchers as long -\& ... as they still have work to do (even an idle watcher will do..) -\& ev_run (my_loop, 0); -\& ... jobs done or somebody called break. yeah! -.Ve -.IP "ev_break (loop, how)" 4 -.IX Item "ev_break (loop, how)" -Can be used to make a call to \f(CW\*(C`ev_run\*(C'\fR return early (but only after it -has processed all outstanding events). The \f(CW\*(C`how\*(C'\fR argument must be either -\&\f(CW\*(C`EVBREAK_ONE\*(C'\fR, which will make the innermost \f(CW\*(C`ev_run\*(C'\fR call return, or -\&\f(CW\*(C`EVBREAK_ALL\*(C'\fR, which will make all nested \f(CW\*(C`ev_run\*(C'\fR calls return. -.Sp -This \*(L"break state\*(R" will be cleared on the next call to \f(CW\*(C`ev_run\*(C'\fR. -.Sp -It is safe to call \f(CW\*(C`ev_break\*(C'\fR from outside any \f(CW\*(C`ev_run\*(C'\fR calls, too, in -which case it will have no effect. -.IP "ev_ref (loop)" 4 -.IX Item "ev_ref (loop)" -.PD 0 -.IP "ev_unref (loop)" 4 -.IX Item "ev_unref (loop)" -.PD -Ref/unref can be used to add or remove a reference count on the event -loop: Every watcher keeps one reference, and as long as the reference -count is nonzero, \f(CW\*(C`ev_run\*(C'\fR will not return on its own. -.Sp -This is useful when you have a watcher that you never intend to -unregister, but that nevertheless should not keep \f(CW\*(C`ev_run\*(C'\fR from -returning. In such a case, call \f(CW\*(C`ev_unref\*(C'\fR after starting, and \f(CW\*(C`ev_ref\*(C'\fR -before stopping it. -.Sp -As an example, libev itself uses this for its internal signal pipe: It -is not visible to the libev user and should not keep \f(CW\*(C`ev_run\*(C'\fR from -exiting if no event watchers registered by it are active. It is also an -excellent way to do this for generic recurring timers or from within -third-party libraries. Just remember to \fIunref after start\fR and \fIref -before stop\fR (but only if the watcher wasn't active before, or was active -before, respectively. Note also that libev might stop watchers itself -(e.g. non-repeating timers) in which case you have to \f(CW\*(C`ev_ref\*(C'\fR -in the callback). -.Sp -Example: Create a signal watcher, but keep it from keeping \f(CW\*(C`ev_run\*(C'\fR -running when nothing else is active. -.Sp -.Vb 4 -\& ev_signal exitsig; -\& ev_signal_init (&exitsig, sig_cb, SIGINT); -\& ev_signal_start (loop, &exitsig); -\& ev_unref (loop); -.Ve -.Sp -Example: For some weird reason, unregister the above signal handler again. -.Sp -.Vb 2 -\& ev_ref (loop); -\& ev_signal_stop (loop, &exitsig); -.Ve -.IP "ev_set_io_collect_interval (loop, ev_tstamp interval)" 4 -.IX Item "ev_set_io_collect_interval (loop, ev_tstamp interval)" -.PD 0 -.IP "ev_set_timeout_collect_interval (loop, ev_tstamp interval)" 4 -.IX Item "ev_set_timeout_collect_interval (loop, ev_tstamp interval)" -.PD -These advanced functions influence the time that libev will spend waiting -for events. Both time intervals are by default \f(CW0\fR, meaning that libev -will try to invoke timer/periodic callbacks and I/O callbacks with minimum -latency. -.Sp -Setting these to a higher value (the \f(CW\*(C`interval\*(C'\fR \fImust\fR be >= \f(CW0\fR) -allows libev to delay invocation of I/O and timer/periodic callbacks -to increase efficiency of loop iterations (or to increase power-saving -opportunities). -.Sp -The idea is that sometimes your program runs just fast enough to handle -one (or very few) event(s) per loop iteration. While this makes the -program responsive, it also wastes a lot of \s-1CPU\s0 time to poll for new -events, especially with backends like \f(CW\*(C`select ()\*(C'\fR which have a high -overhead for the actual polling but can deliver many events at once. -.Sp -By setting a higher \fIio collect interval\fR you allow libev to spend more -time collecting I/O events, so you can handle more events per iteration, -at the cost of increasing latency. Timeouts (both \f(CW\*(C`ev_periodic\*(C'\fR and -\&\f(CW\*(C`ev_timer\*(C'\fR) will not be affected. Setting this to a non-null value will -introduce an additional \f(CW\*(C`ev_sleep ()\*(C'\fR call into most loop iterations. The -sleep time ensures that libev will not poll for I/O events more often then -once per this interval, on average (as long as the host time resolution is -good enough). -.Sp -Likewise, by setting a higher \fItimeout collect interval\fR you allow libev -to spend more time collecting timeouts, at the expense of increased -latency/jitter/inexactness (the watcher callback will be called -later). \f(CW\*(C`ev_io\*(C'\fR watchers will not be affected. Setting this to a non-null -value will not introduce any overhead in libev. -.Sp -Many (busy) programs can usually benefit by setting the I/O collect -interval to a value near \f(CW0.1\fR or so, which is often enough for -interactive servers (of course not for games), likewise for timeouts. It -usually doesn't make much sense to set it to a lower value than \f(CW0.01\fR, -as this approaches the timing granularity of most systems. Note that if -you do transactions with the outside world and you can't increase the -parallelity, then this setting will limit your transaction rate (if you -need to poll once per transaction and the I/O collect interval is 0.01, -then you can't do more than 100 transactions per second). -.Sp -Setting the \fItimeout collect interval\fR can improve the opportunity for -saving power, as the program will \*(L"bundle\*(R" timer callback invocations that -are \*(L"near\*(R" in time together, by delaying some, thus reducing the number of -times the process sleeps and wakes up again. Another useful technique to -reduce iterations/wake\-ups is to use \f(CW\*(C`ev_periodic\*(C'\fR watchers and make sure -they fire on, say, one-second boundaries only. -.Sp -Example: we only need 0.1s timeout granularity, and we wish not to poll -more often than 100 times per second: -.Sp -.Vb 2 -\& ev_set_timeout_collect_interval (EV_DEFAULT_UC_ 0.1); -\& ev_set_io_collect_interval (EV_DEFAULT_UC_ 0.01); -.Ve -.IP "ev_invoke_pending (loop)" 4 -.IX Item "ev_invoke_pending (loop)" -This call will simply invoke all pending watchers while resetting their -pending state. Normally, \f(CW\*(C`ev_run\*(C'\fR does this automatically when required, -but when overriding the invoke callback this call comes handy. This -function can be invoked from a watcher \- this can be useful for example -when you want to do some lengthy calculation and want to pass further -event handling to another thread (you still have to make sure only one -thread executes within \f(CW\*(C`ev_invoke_pending\*(C'\fR or \f(CW\*(C`ev_run\*(C'\fR of course). -.IP "int ev_pending_count (loop)" 4 -.IX Item "int ev_pending_count (loop)" -Returns the number of pending watchers \- zero indicates that no watchers -are pending. -.IP "ev_set_invoke_pending_cb (loop, void (*invoke_pending_cb)(\s-1EV_P\s0))" 4 -.IX Item "ev_set_invoke_pending_cb (loop, void (*invoke_pending_cb)(EV_P))" -This overrides the invoke pending functionality of the loop: Instead of -invoking all pending watchers when there are any, \f(CW\*(C`ev_run\*(C'\fR will call -this callback instead. This is useful, for example, when you want to -invoke the actual watchers inside another context (another thread etc.). -.Sp -If you want to reset the callback, use \f(CW\*(C`ev_invoke_pending\*(C'\fR as new -callback. -.IP "ev_set_loop_release_cb (loop, void (*release)(\s-1EV_P\s0) throw (), void (*acquire)(\s-1EV_P\s0) throw ())" 4 -.IX Item "ev_set_loop_release_cb (loop, void (*release)(EV_P) throw (), void (*acquire)(EV_P) throw ())" -Sometimes you want to share the same loop between multiple threads. This -can be done relatively simply by putting mutex_lock/unlock calls around -each call to a libev function. -.Sp -However, \f(CW\*(C`ev_run\*(C'\fR can run an indefinite time, so it is not feasible -to wait for it to return. One way around this is to wake up the event -loop via \f(CW\*(C`ev_break\*(C'\fR and \f(CW\*(C`ev_async_send\*(C'\fR, another way is to set these -\&\fIrelease\fR and \fIacquire\fR callbacks on the loop. -.Sp -When set, then \f(CW\*(C`release\*(C'\fR will be called just before the thread is -suspended waiting for new events, and \f(CW\*(C`acquire\*(C'\fR is called just -afterwards. -.Sp -Ideally, \f(CW\*(C`release\*(C'\fR will just call your mutex_unlock function, and -\&\f(CW\*(C`acquire\*(C'\fR will just call the mutex_lock function again. -.Sp -While event loop modifications are allowed between invocations of -\&\f(CW\*(C`release\*(C'\fR and \f(CW\*(C`acquire\*(C'\fR (that's their only purpose after all), no -modifications done will affect the event loop, i.e. adding watchers will -have no effect on the set of file descriptors being watched, or the time -waited. Use an \f(CW\*(C`ev_async\*(C'\fR watcher to wake up \f(CW\*(C`ev_run\*(C'\fR when you want it -to take note of any changes you made. -.Sp -In theory, threads executing \f(CW\*(C`ev_run\*(C'\fR will be async-cancel safe between -invocations of \f(CW\*(C`release\*(C'\fR and \f(CW\*(C`acquire\*(C'\fR. -.Sp -See also the locking example in the \f(CW\*(C`THREADS\*(C'\fR section later in this -document. -.IP "ev_set_userdata (loop, void *data)" 4 -.IX Item "ev_set_userdata (loop, void *data)" -.PD 0 -.IP "void *ev_userdata (loop)" 4 -.IX Item "void *ev_userdata (loop)" -.PD -Set and retrieve a single \f(CW\*(C`void *\*(C'\fR associated with a loop. When -\&\f(CW\*(C`ev_set_userdata\*(C'\fR has never been called, then \f(CW\*(C`ev_userdata\*(C'\fR returns -\&\f(CW0\fR. -.Sp -These two functions can be used to associate arbitrary data with a loop, -and are intended solely for the \f(CW\*(C`invoke_pending_cb\*(C'\fR, \f(CW\*(C`release\*(C'\fR and -\&\f(CW\*(C`acquire\*(C'\fR callbacks described above, but of course can be (ab\-)used for -any other purpose as well. -.IP "ev_verify (loop)" 4 -.IX Item "ev_verify (loop)" -This function only does something when \f(CW\*(C`EV_VERIFY\*(C'\fR support has been -compiled in, which is the default for non-minimal builds. It tries to go -through all internal structures and checks them for validity. If anything -is found to be inconsistent, it will print an error message to standard -error and call \f(CW\*(C`abort ()\*(C'\fR. -.Sp -This can be used to catch bugs inside libev itself: under normal -circumstances, this function will never abort as of course libev keeps its -data structures consistent. -.SH "ANATOMY OF A WATCHER" -.IX Header "ANATOMY OF A WATCHER" -In the following description, uppercase \f(CW\*(C`TYPE\*(C'\fR in names stands for the -watcher type, e.g. \f(CW\*(C`ev_TYPE_start\*(C'\fR can mean \f(CW\*(C`ev_timer_start\*(C'\fR for timer -watchers and \f(CW\*(C`ev_io_start\*(C'\fR for I/O watchers. -.PP -A watcher is an opaque structure that you allocate and register to record -your interest in some event. To make a concrete example, imagine you want -to wait for \s-1STDIN\s0 to become readable, you would create an \f(CW\*(C`ev_io\*(C'\fR watcher -for that: -.PP -.Vb 5 -\& static void my_cb (struct ev_loop *loop, ev_io *w, int revents) -\& { -\& ev_io_stop (w); -\& ev_break (loop, EVBREAK_ALL); -\& } -\& -\& struct ev_loop *loop = ev_default_loop (0); -\& -\& ev_io stdin_watcher; -\& -\& ev_init (&stdin_watcher, my_cb); -\& ev_io_set (&stdin_watcher, STDIN_FILENO, EV_READ); -\& ev_io_start (loop, &stdin_watcher); -\& -\& ev_run (loop, 0); -.Ve -.PP -As you can see, you are responsible for allocating the memory for your -watcher structures (and it is \fIusually\fR a bad idea to do this on the -stack). -.PP -Each watcher has an associated watcher structure (called \f(CW\*(C`struct ev_TYPE\*(C'\fR -or simply \f(CW\*(C`ev_TYPE\*(C'\fR, as typedefs are provided for all watcher structs). -.PP -Each watcher structure must be initialised by a call to \f(CW\*(C`ev_init (watcher -*, callback)\*(C'\fR, which expects a callback to be provided. This callback is -invoked each time the event occurs (or, in the case of I/O watchers, each -time the event loop detects that the file descriptor given is readable -and/or writable). -.PP -Each watcher type further has its own \f(CW\*(C`ev_TYPE_set (watcher *, ...)\*(C'\fR -macro to configure it, with arguments specific to the watcher type. There -is also a macro to combine initialisation and setting in one call: \f(CW\*(C`ev_TYPE_init (watcher *, callback, ...)\*(C'\fR. -.PP -To make the watcher actually watch out for events, you have to start it -with a watcher-specific start function (\f(CW\*(C`ev_TYPE_start (loop, watcher -*)\*(C'\fR), and you can stop watching for events at any time by calling the -corresponding stop function (\f(CW\*(C`ev_TYPE_stop (loop, watcher *)\*(C'\fR. -.PP -As long as your watcher is active (has been started but not stopped) you -must not touch the values stored in it. Most specifically you must never -reinitialise it or call its \f(CW\*(C`ev_TYPE_set\*(C'\fR macro. -.PP -Each and every callback receives the event loop pointer as first, the -registered watcher structure as second, and a bitset of received events as -third argument. -.PP -The received events usually include a single bit per event type received -(you can receive multiple events at the same time). The possible bit masks -are: -.ie n .IP """EV_READ""" 4 -.el .IP "\f(CWEV_READ\fR" 4 -.IX Item "EV_READ" -.PD 0 -.ie n .IP """EV_WRITE""" 4 -.el .IP "\f(CWEV_WRITE\fR" 4 -.IX Item "EV_WRITE" -.PD -The file descriptor in the \f(CW\*(C`ev_io\*(C'\fR watcher has become readable and/or -writable. -.ie n .IP """EV_TIMER""" 4 -.el .IP "\f(CWEV_TIMER\fR" 4 -.IX Item "EV_TIMER" -The \f(CW\*(C`ev_timer\*(C'\fR watcher has timed out. -.ie n .IP """EV_PERIODIC""" 4 -.el .IP "\f(CWEV_PERIODIC\fR" 4 -.IX Item "EV_PERIODIC" -The \f(CW\*(C`ev_periodic\*(C'\fR watcher has timed out. -.ie n .IP """EV_SIGNAL""" 4 -.el .IP "\f(CWEV_SIGNAL\fR" 4 -.IX Item "EV_SIGNAL" -The signal specified in the \f(CW\*(C`ev_signal\*(C'\fR watcher has been received by a thread. -.ie n .IP """EV_CHILD""" 4 -.el .IP "\f(CWEV_CHILD\fR" 4 -.IX Item "EV_CHILD" -The pid specified in the \f(CW\*(C`ev_child\*(C'\fR watcher has received a status change. -.ie n .IP """EV_STAT""" 4 -.el .IP "\f(CWEV_STAT\fR" 4 -.IX Item "EV_STAT" -The path specified in the \f(CW\*(C`ev_stat\*(C'\fR watcher changed its attributes somehow. -.ie n .IP """EV_IDLE""" 4 -.el .IP "\f(CWEV_IDLE\fR" 4 -.IX Item "EV_IDLE" -The \f(CW\*(C`ev_idle\*(C'\fR watcher has determined that you have nothing better to do. -.ie n .IP """EV_PREPARE""" 4 -.el .IP "\f(CWEV_PREPARE\fR" 4 -.IX Item "EV_PREPARE" -.PD 0 -.ie n .IP """EV_CHECK""" 4 -.el .IP "\f(CWEV_CHECK\fR" 4 -.IX Item "EV_CHECK" -.PD -All \f(CW\*(C`ev_prepare\*(C'\fR watchers are invoked just \fIbefore\fR \f(CW\*(C`ev_run\*(C'\fR starts to -gather new events, and all \f(CW\*(C`ev_check\*(C'\fR watchers are queued (not invoked) -just after \f(CW\*(C`ev_run\*(C'\fR has gathered them, but before it queues any callbacks -for any received events. That means \f(CW\*(C`ev_prepare\*(C'\fR watchers are the last -watchers invoked before the event loop sleeps or polls for new events, and -\&\f(CW\*(C`ev_check\*(C'\fR watchers will be invoked before any other watchers of the same -or lower priority within an event loop iteration. -.Sp -Callbacks of both watcher types can start and stop as many watchers as -they want, and all of them will be taken into account (for example, a -\&\f(CW\*(C`ev_prepare\*(C'\fR watcher might start an idle watcher to keep \f(CW\*(C`ev_run\*(C'\fR from -blocking). -.ie n .IP """EV_EMBED""" 4 -.el .IP "\f(CWEV_EMBED\fR" 4 -.IX Item "EV_EMBED" -The embedded event loop specified in the \f(CW\*(C`ev_embed\*(C'\fR watcher needs attention. -.ie n .IP """EV_FORK""" 4 -.el .IP "\f(CWEV_FORK\fR" 4 -.IX Item "EV_FORK" -The event loop has been resumed in the child process after fork (see -\&\f(CW\*(C`ev_fork\*(C'\fR). -.ie n .IP """EV_CLEANUP""" 4 -.el .IP "\f(CWEV_CLEANUP\fR" 4 -.IX Item "EV_CLEANUP" -The event loop is about to be destroyed (see \f(CW\*(C`ev_cleanup\*(C'\fR). -.ie n .IP """EV_ASYNC""" 4 -.el .IP "\f(CWEV_ASYNC\fR" 4 -.IX Item "EV_ASYNC" -The given async watcher has been asynchronously notified (see \f(CW\*(C`ev_async\*(C'\fR). -.ie n .IP """EV_CUSTOM""" 4 -.el .IP "\f(CWEV_CUSTOM\fR" 4 -.IX Item "EV_CUSTOM" -Not ever sent (or otherwise used) by libev itself, but can be freely used -by libev users to signal watchers (e.g. via \f(CW\*(C`ev_feed_event\*(C'\fR). -.ie n .IP """EV_ERROR""" 4 -.el .IP "\f(CWEV_ERROR\fR" 4 -.IX Item "EV_ERROR" -An unspecified error has occurred, the watcher has been stopped. This might -happen because the watcher could not be properly started because libev -ran out of memory, a file descriptor was found to be closed or any other -problem. Libev considers these application bugs. -.Sp -You best act on it by reporting the problem and somehow coping with the -watcher being stopped. Note that well-written programs should not receive -an error ever, so when your watcher receives it, this usually indicates a -bug in your program. -.Sp -Libev will usually signal a few \*(L"dummy\*(R" events together with an error, for -example it might indicate that a fd is readable or writable, and if your -callbacks is well-written it can just attempt the operation and cope with -the error from \fIread()\fR or \fIwrite()\fR. This will not work in multi-threaded -programs, though, as the fd could already be closed and reused for another -thing, so beware. -.SS "\s-1GENERIC\s0 \s-1WATCHER\s0 \s-1FUNCTIONS\s0" -.IX Subsection "GENERIC WATCHER FUNCTIONS" -.ie n .IP """ev_init"" (ev_TYPE *watcher, callback)" 4 -.el .IP "\f(CWev_init\fR (ev_TYPE *watcher, callback)" 4 -.IX Item "ev_init (ev_TYPE *watcher, callback)" -This macro initialises the generic portion of a watcher. The contents -of the watcher object can be arbitrary (so \f(CW\*(C`malloc\*(C'\fR will do). Only -the generic parts of the watcher are initialised, you \fIneed\fR to call -the type-specific \f(CW\*(C`ev_TYPE_set\*(C'\fR macro afterwards to initialise the -type-specific parts. For each type there is also a \f(CW\*(C`ev_TYPE_init\*(C'\fR macro -which rolls both calls into one. -.Sp -You can reinitialise a watcher at any time as long as it has been stopped -(or never started) and there are no pending events outstanding. -.Sp -The callback is always of type \f(CW\*(C`void (*)(struct ev_loop *loop, ev_TYPE *watcher, -int revents)\*(C'\fR. -.Sp -Example: Initialise an \f(CW\*(C`ev_io\*(C'\fR watcher in two steps. -.Sp -.Vb 3 -\& ev_io w; -\& ev_init (&w, my_cb); -\& ev_io_set (&w, STDIN_FILENO, EV_READ); -.Ve -.ie n .IP """ev_TYPE_set"" (ev_TYPE *watcher, [args])" 4 -.el .IP "\f(CWev_TYPE_set\fR (ev_TYPE *watcher, [args])" 4 -.IX Item "ev_TYPE_set (ev_TYPE *watcher, [args])" -This macro initialises the type-specific parts of a watcher. You need to -call \f(CW\*(C`ev_init\*(C'\fR at least once before you call this macro, but you can -call \f(CW\*(C`ev_TYPE_set\*(C'\fR any number of times. You must not, however, call this -macro on a watcher that is active (it can be pending, however, which is a -difference to the \f(CW\*(C`ev_init\*(C'\fR macro). -.Sp -Although some watcher types do not have type-specific arguments -(e.g. \f(CW\*(C`ev_prepare\*(C'\fR) you still need to call its \f(CW\*(C`set\*(C'\fR macro. -.Sp -See \f(CW\*(C`ev_init\*(C'\fR, above, for an example. -.ie n .IP """ev_TYPE_init"" (ev_TYPE *watcher, callback, [args])" 4 -.el .IP "\f(CWev_TYPE_init\fR (ev_TYPE *watcher, callback, [args])" 4 -.IX Item "ev_TYPE_init (ev_TYPE *watcher, callback, [args])" -This convenience macro rolls both \f(CW\*(C`ev_init\*(C'\fR and \f(CW\*(C`ev_TYPE_set\*(C'\fR macro -calls into a single call. This is the most convenient method to initialise -a watcher. The same limitations apply, of course. -.Sp -Example: Initialise and set an \f(CW\*(C`ev_io\*(C'\fR watcher in one step. -.Sp -.Vb 1 -\& ev_io_init (&w, my_cb, STDIN_FILENO, EV_READ); -.Ve -.ie n .IP """ev_TYPE_start"" (loop, ev_TYPE *watcher)" 4 -.el .IP "\f(CWev_TYPE_start\fR (loop, ev_TYPE *watcher)" 4 -.IX Item "ev_TYPE_start (loop, ev_TYPE *watcher)" -Starts (activates) the given watcher. Only active watchers will receive -events. If the watcher is already active nothing will happen. -.Sp -Example: Start the \f(CW\*(C`ev_io\*(C'\fR watcher that is being abused as example in this -whole section. -.Sp -.Vb 1 -\& ev_io_start (EV_DEFAULT_UC, &w); -.Ve -.ie n .IP """ev_TYPE_stop"" (loop, ev_TYPE *watcher)" 4 -.el .IP "\f(CWev_TYPE_stop\fR (loop, ev_TYPE *watcher)" 4 -.IX Item "ev_TYPE_stop (loop, ev_TYPE *watcher)" -Stops the given watcher if active, and clears the pending status (whether -the watcher was active or not). -.Sp -It is possible that stopped watchers are pending \- for example, -non-repeating timers are being stopped when they become pending \- but -calling \f(CW\*(C`ev_TYPE_stop\*(C'\fR ensures that the watcher is neither active nor -pending. If you want to free or reuse the memory used by the watcher it is -therefore a good idea to always call its \f(CW\*(C`ev_TYPE_stop\*(C'\fR function. -.IP "bool ev_is_active (ev_TYPE *watcher)" 4 -.IX Item "bool ev_is_active (ev_TYPE *watcher)" -Returns a true value iff the watcher is active (i.e. it has been started -and not yet been stopped). As long as a watcher is active you must not modify -it. -.IP "bool ev_is_pending (ev_TYPE *watcher)" 4 -.IX Item "bool ev_is_pending (ev_TYPE *watcher)" -Returns a true value iff the watcher is pending, (i.e. it has outstanding -events but its callback has not yet been invoked). As long as a watcher -is pending (but not active) you must not call an init function on it (but -\&\f(CW\*(C`ev_TYPE_set\*(C'\fR is safe), you must not change its priority, and you must -make sure the watcher is available to libev (e.g. you cannot \f(CW\*(C`free ()\*(C'\fR -it). -.IP "callback ev_cb (ev_TYPE *watcher)" 4 -.IX Item "callback ev_cb (ev_TYPE *watcher)" -Returns the callback currently set on the watcher. -.IP "ev_set_cb (ev_TYPE *watcher, callback)" 4 -.IX Item "ev_set_cb (ev_TYPE *watcher, callback)" -Change the callback. You can change the callback at virtually any time -(modulo threads). -.IP "ev_set_priority (ev_TYPE *watcher, int priority)" 4 -.IX Item "ev_set_priority (ev_TYPE *watcher, int priority)" -.PD 0 -.IP "int ev_priority (ev_TYPE *watcher)" 4 -.IX Item "int ev_priority (ev_TYPE *watcher)" -.PD -Set and query the priority of the watcher. The priority is a small -integer between \f(CW\*(C`EV_MAXPRI\*(C'\fR (default: \f(CW2\fR) and \f(CW\*(C`EV_MINPRI\*(C'\fR -(default: \f(CW\*(C`\-2\*(C'\fR). Pending watchers with higher priority will be invoked -before watchers with lower priority, but priority will not keep watchers -from being executed (except for \f(CW\*(C`ev_idle\*(C'\fR watchers). -.Sp -If you need to suppress invocation when higher priority events are pending -you need to look at \f(CW\*(C`ev_idle\*(C'\fR watchers, which provide this functionality. -.Sp -You \fImust not\fR change the priority of a watcher as long as it is active or -pending. -.Sp -Setting a priority outside the range of \f(CW\*(C`EV_MINPRI\*(C'\fR to \f(CW\*(C`EV_MAXPRI\*(C'\fR is -fine, as long as you do not mind that the priority value you query might -or might not have been clamped to the valid range. -.Sp -The default priority used by watchers when no priority has been set is -always \f(CW0\fR, which is supposed to not be too high and not be too low :). -.Sp -See \*(L"\s-1WATCHER\s0 \s-1PRIORITY\s0 \s-1MODELS\s0\*(R", below, for a more thorough treatment of -priorities. -.IP "ev_invoke (loop, ev_TYPE *watcher, int revents)" 4 -.IX Item "ev_invoke (loop, ev_TYPE *watcher, int revents)" -Invoke the \f(CW\*(C`watcher\*(C'\fR with the given \f(CW\*(C`loop\*(C'\fR and \f(CW\*(C`revents\*(C'\fR. Neither -\&\f(CW\*(C`loop\*(C'\fR nor \f(CW\*(C`revents\*(C'\fR need to be valid as long as the watcher callback -can deal with that fact, as both are simply passed through to the -callback. -.IP "int ev_clear_pending (loop, ev_TYPE *watcher)" 4 -.IX Item "int ev_clear_pending (loop, ev_TYPE *watcher)" -If the watcher is pending, this function clears its pending status and -returns its \f(CW\*(C`revents\*(C'\fR bitset (as if its callback was invoked). If the -watcher isn't pending it does nothing and returns \f(CW0\fR. -.Sp -Sometimes it can be useful to \*(L"poll\*(R" a watcher instead of waiting for its -callback to be invoked, which can be accomplished with this function. -.IP "ev_feed_event (loop, ev_TYPE *watcher, int revents)" 4 -.IX Item "ev_feed_event (loop, ev_TYPE *watcher, int revents)" -Feeds the given event set into the event loop, as if the specified event -had happened for the specified watcher (which must be a pointer to an -initialised but not necessarily started event watcher). Obviously you must -not free the watcher as long as it has pending events. -.Sp -Stopping the watcher, letting libev invoke it, or calling -\&\f(CW\*(C`ev_clear_pending\*(C'\fR will clear the pending event, even if the watcher was -not started in the first place. -.Sp -See also \f(CW\*(C`ev_feed_fd_event\*(C'\fR and \f(CW\*(C`ev_feed_signal_event\*(C'\fR for related -functions that do not need a watcher. -.PP -See also the \*(L"\s-1ASSOCIATING\s0 \s-1CUSTOM\s0 \s-1DATA\s0 \s-1WITH\s0 A \s-1WATCHER\s0\*(R" and \*(L"\s-1BUILDING\s0 \s-1YOUR\s0 -\&\s-1OWN\s0 \s-1COMPOSITE\s0 \s-1WATCHERS\s0\*(R" idioms. -.SS "\s-1WATCHER\s0 \s-1STATES\s0" -.IX Subsection "WATCHER STATES" -There are various watcher states mentioned throughout this manual \- -active, pending and so on. In this section these states and the rules to -transition between them will be described in more detail \- and while these -rules might look complicated, they usually do \*(L"the right thing\*(R". -.IP "initialised" 4 -.IX Item "initialised" -Before a watcher can be registered with the event loop it has to be -initialised. This can be done with a call to \f(CW\*(C`ev_TYPE_init\*(C'\fR, or calls to -\&\f(CW\*(C`ev_init\*(C'\fR followed by the watcher-specific \f(CW\*(C`ev_TYPE_set\*(C'\fR function. -.Sp -In this state it is simply some block of memory that is suitable for -use in an event loop. It can be moved around, freed, reused etc. at -will \- as long as you either keep the memory contents intact, or call -\&\f(CW\*(C`ev_TYPE_init\*(C'\fR again. -.IP "started/running/active" 4 -.IX Item "started/running/active" -Once a watcher has been started with a call to \f(CW\*(C`ev_TYPE_start\*(C'\fR it becomes -property of the event loop, and is actively waiting for events. While in -this state it cannot be accessed (except in a few documented ways), moved, -freed or anything else \- the only legal thing is to keep a pointer to it, -and call libev functions on it that are documented to work on active watchers. -.IP "pending" 4 -.IX Item "pending" -If a watcher is active and libev determines that an event it is interested -in has occurred (such as a timer expiring), it will become pending. It will -stay in this pending state until either it is stopped or its callback is -about to be invoked, so it is not normally pending inside the watcher -callback. -.Sp -The watcher might or might not be active while it is pending (for example, -an expired non-repeating timer can be pending but no longer active). If it -is stopped, it can be freely accessed (e.g. by calling \f(CW\*(C`ev_TYPE_set\*(C'\fR), -but it is still property of the event loop at this time, so cannot be -moved, freed or reused. And if it is active the rules described in the -previous item still apply. -.Sp -It is also possible to feed an event on a watcher that is not active (e.g. -via \f(CW\*(C`ev_feed_event\*(C'\fR), in which case it becomes pending without being -active. -.IP "stopped" 4 -.IX Item "stopped" -A watcher can be stopped implicitly by libev (in which case it might still -be pending), or explicitly by calling its \f(CW\*(C`ev_TYPE_stop\*(C'\fR function. The -latter will clear any pending state the watcher might be in, regardless -of whether it was active or not, so stopping a watcher explicitly before -freeing it is often a good idea. -.Sp -While stopped (and not pending) the watcher is essentially in the -initialised state, that is, it can be reused, moved, modified in any way -you wish (but when you trash the memory block, you need to \f(CW\*(C`ev_TYPE_init\*(C'\fR -it again). -.SS "\s-1WATCHER\s0 \s-1PRIORITY\s0 \s-1MODELS\s0" -.IX Subsection "WATCHER PRIORITY MODELS" -Many event loops support \fIwatcher priorities\fR, which are usually small -integers that influence the ordering of event callback invocation -between watchers in some way, all else being equal. -.PP -In libev, Watcher priorities can be set using \f(CW\*(C`ev_set_priority\*(C'\fR. See its -description for the more technical details such as the actual priority -range. -.PP -There are two common ways how these these priorities are being interpreted -by event loops: -.PP -In the more common lock-out model, higher priorities \*(L"lock out\*(R" invocation -of lower priority watchers, which means as long as higher priority -watchers receive events, lower priority watchers are not being invoked. -.PP -The less common only-for-ordering model uses priorities solely to order -callback invocation within a single event loop iteration: Higher priority -watchers are invoked before lower priority ones, but they all get invoked -before polling for new events. -.PP -Libev uses the second (only-for-ordering) model for all its watchers -except for idle watchers (which use the lock-out model). -.PP -The rationale behind this is that implementing the lock-out model for -watchers is not well supported by most kernel interfaces, and most event -libraries will just poll for the same events again and again as long as -their callbacks have not been executed, which is very inefficient in the -common case of one high-priority watcher locking out a mass of lower -priority ones. -.PP -Static (ordering) priorities are most useful when you have two or more -watchers handling the same resource: a typical usage example is having an -\&\f(CW\*(C`ev_io\*(C'\fR watcher to receive data, and an associated \f(CW\*(C`ev_timer\*(C'\fR to handle -timeouts. Under load, data might be received while the program handles -other jobs, but since timers normally get invoked first, the timeout -handler will be executed before checking for data. In that case, giving -the timer a lower priority than the I/O watcher ensures that I/O will be -handled first even under adverse conditions (which is usually, but not -always, what you want). -.PP -Since idle watchers use the \*(L"lock-out\*(R" model, meaning that idle watchers -will only be executed when no same or higher priority watchers have -received events, they can be used to implement the \*(L"lock-out\*(R" model when -required. -.PP -For example, to emulate how many other event libraries handle priorities, -you can associate an \f(CW\*(C`ev_idle\*(C'\fR watcher to each such watcher, and in -the normal watcher callback, you just start the idle watcher. The real -processing is done in the idle watcher callback. This causes libev to -continuously poll and process kernel event data for the watcher, but when -the lock-out case is known to be rare (which in turn is rare :), this is -workable. -.PP -Usually, however, the lock-out model implemented that way will perform -miserably under the type of load it was designed to handle. In that case, -it might be preferable to stop the real watcher before starting the -idle watcher, so the kernel will not have to process the event in case -the actual processing will be delayed for considerable time. -.PP -Here is an example of an I/O watcher that should run at a strictly lower -priority than the default, and which should only process data when no -other events are pending: -.PP -.Vb 2 -\& ev_idle idle; // actual processing watcher -\& ev_io io; // actual event watcher -\& -\& static void -\& io_cb (EV_P_ ev_io *w, int revents) -\& { -\& // stop the I/O watcher, we received the event, but -\& // are not yet ready to handle it. -\& ev_io_stop (EV_A_ w); -\& -\& // start the idle watcher to handle the actual event. -\& // it will not be executed as long as other watchers -\& // with the default priority are receiving events. -\& ev_idle_start (EV_A_ &idle); -\& } -\& -\& static void -\& idle_cb (EV_P_ ev_idle *w, int revents) -\& { -\& // actual processing -\& read (STDIN_FILENO, ...); -\& -\& // have to start the I/O watcher again, as -\& // we have handled the event -\& ev_io_start (EV_P_ &io); -\& } -\& -\& // initialisation -\& ev_idle_init (&idle, idle_cb); -\& ev_io_init (&io, io_cb, STDIN_FILENO, EV_READ); -\& ev_io_start (EV_DEFAULT_ &io); -.Ve -.PP -In the \*(L"real\*(R" world, it might also be beneficial to start a timer, so that -low-priority connections can not be locked out forever under load. This -enables your program to keep a lower latency for important connections -during short periods of high load, while not completely locking out less -important ones. -.SH "WATCHER TYPES" -.IX Header "WATCHER TYPES" -This section describes each watcher in detail, but will not repeat -information given in the last section. Any initialisation/set macros, -functions and members specific to the watcher type are explained. -.PP -Members are additionally marked with either \fI[read\-only]\fR, meaning that, -while the watcher is active, you can look at the member and expect some -sensible content, but you must not modify it (you can modify it while the -watcher is stopped to your hearts content), or \fI[read\-write]\fR, which -means you can expect it to have some sensible content while the watcher -is active, but you can also modify it. Modifying it may not do something -sensible or take immediate effect (or do anything at all), but libev will -not crash or malfunction in any way. -.ie n .SS """ev_io"" \- is this file descriptor readable or writable?" -.el .SS "\f(CWev_io\fP \- is this file descriptor readable or writable?" -.IX Subsection "ev_io - is this file descriptor readable or writable?" -I/O watchers check whether a file descriptor is readable or writable -in each iteration of the event loop, or, more precisely, when reading -would not block the process and writing would at least be able to write -some data. This behaviour is called level-triggering because you keep -receiving events as long as the condition persists. Remember you can stop -the watcher if you don't want to act on the event and neither want to -receive future events. -.PP -In general you can register as many read and/or write event watchers per -fd as you want (as long as you don't confuse yourself). Setting all file -descriptors to non-blocking mode is also usually a good idea (but not -required if you know what you are doing). -.PP -Another thing you have to watch out for is that it is quite easy to -receive \*(L"spurious\*(R" readiness notifications, that is, your callback might -be called with \f(CW\*(C`EV_READ\*(C'\fR but a subsequent \f(CW\*(C`read\*(C'\fR(2) will actually block -because there is no data. It is very easy to get into this situation even -with a relatively standard program structure. Thus it is best to always -use non-blocking I/O: An extra \f(CW\*(C`read\*(C'\fR(2) returning \f(CW\*(C`EAGAIN\*(C'\fR is far -preferable to a program hanging until some data arrives. -.PP -If you cannot run the fd in non-blocking mode (for example you should -not play around with an Xlib connection), then you have to separately -re-test whether a file descriptor is really ready with a known-to-be good -interface such as poll (fortunately in the case of Xlib, it already does -this on its own, so its quite safe to use). Some people additionally -use \f(CW\*(C`SIGALRM\*(C'\fR and an interval timer, just to be sure you won't block -indefinitely. -.PP -But really, best use non-blocking mode. -.PP -\fIThe special problem of disappearing file descriptors\fR -.IX Subsection "The special problem of disappearing file descriptors" -.PP -Some backends (e.g. kqueue, epoll) need to be told about closing a file -descriptor (either due to calling \f(CW\*(C`close\*(C'\fR explicitly or any other means, -such as \f(CW\*(C`dup2\*(C'\fR). The reason is that you register interest in some file -descriptor, but when it goes away, the operating system will silently drop -this interest. If another file descriptor with the same number then is -registered with libev, there is no efficient way to see that this is, in -fact, a different file descriptor. -.PP -To avoid having to explicitly tell libev about such cases, libev follows -the following policy: Each time \f(CW\*(C`ev_io_set\*(C'\fR is being called, libev -will assume that this is potentially a new file descriptor, otherwise -it is assumed that the file descriptor stays the same. That means that -you \fIhave\fR to call \f(CW\*(C`ev_io_set\*(C'\fR (or \f(CW\*(C`ev_io_init\*(C'\fR) when you change the -descriptor even if the file descriptor number itself did not change. -.PP -This is how one would do it normally anyway, the important point is that -the libev application should not optimise around libev but should leave -optimisations to libev. -.PP -\fIThe special problem of dup'ed file descriptors\fR -.IX Subsection "The special problem of dup'ed file descriptors" -.PP -Some backends (e.g. epoll), cannot register events for file descriptors, -but only events for the underlying file descriptions. That means when you -have \f(CW\*(C`dup ()\*(C'\fR'ed file descriptors or weirder constellations, and register -events for them, only one file descriptor might actually receive events. -.PP -There is no workaround possible except not registering events -for potentially \f(CW\*(C`dup ()\*(C'\fR'ed file descriptors, or to resort to -\&\f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR. -.PP -\fIThe special problem of files\fR -.IX Subsection "The special problem of files" -.PP -Many people try to use \f(CW\*(C`select\*(C'\fR (or libev) on file descriptors -representing files, and expect it to become ready when their program -doesn't block on disk accesses (which can take a long time on their own). -.PP -However, this cannot ever work in the \*(L"expected\*(R" way \- you get a readiness -notification as soon as the kernel knows whether and how much data is -there, and in the case of open files, that's always the case, so you -always get a readiness notification instantly, and your read (or possibly -write) will still block on the disk I/O. -.PP -Another way to view it is that in the case of sockets, pipes, character -devices and so on, there is another party (the sender) that delivers data -on its own, but in the case of files, there is no such thing: the disk -will not send data on its own, simply because it doesn't know what you -wish to read \- you would first have to request some data. -.PP -Since files are typically not-so-well supported by advanced notification -mechanism, libev tries hard to emulate \s-1POSIX\s0 behaviour with respect -to files, even though you should not use it. The reason for this is -convenience: sometimes you want to watch \s-1STDIN\s0 or \s-1STDOUT\s0, which is -usually a tty, often a pipe, but also sometimes files or special devices -(for example, \f(CW\*(C`epoll\*(C'\fR on Linux works with \fI/dev/random\fR but not with -\&\fI/dev/urandom\fR), and even though the file might better be served with -asynchronous I/O instead of with non-blocking I/O, it is still useful when -it \*(L"just works\*(R" instead of freezing. -.PP -So avoid file descriptors pointing to files when you know it (e.g. use -libeio), but use them when it is convenient, e.g. for \s-1STDIN/STDOUT\s0, or -when you rarely read from a file instead of from a socket, and want to -reuse the same code path. -.PP -\fIThe special problem of fork\fR -.IX Subsection "The special problem of fork" -.PP -Some backends (epoll, kqueue) do not support \f(CW\*(C`fork ()\*(C'\fR at all or exhibit -useless behaviour. Libev fully supports fork, but needs to be told about -it in the child if you want to continue to use it in the child. -.PP -To support fork in your child processes, you have to call \f(CW\*(C`ev_loop_fork -()\*(C'\fR after a fork in the child, enable \f(CW\*(C`EVFLAG_FORKCHECK\*(C'\fR, or resort to -\&\f(CW\*(C`EVBACKEND_SELECT\*(C'\fR or \f(CW\*(C`EVBACKEND_POLL\*(C'\fR. -.PP -\fIThe special problem of \s-1SIGPIPE\s0\fR -.IX Subsection "The special problem of SIGPIPE" -.PP -While not really specific to libev, it is easy to forget about \f(CW\*(C`SIGPIPE\*(C'\fR: -when writing to a pipe whose other end has been closed, your program gets -sent a \s-1SIGPIPE\s0, which, by default, aborts your program. For most programs -this is sensible behaviour, for daemons, this is usually undesirable. -.PP -So when you encounter spurious, unexplained daemon exits, make sure you -ignore \s-1SIGPIPE\s0 (and maybe make sure you log the exit status of your daemon -somewhere, as that would have given you a big clue). -.PP -\fIThe special problem of \fIaccept()\fIing when you can't\fR -.IX Subsection "The special problem of accept()ing when you can't" -.PP -Many implementations of the \s-1POSIX\s0 \f(CW\*(C`accept\*(C'\fR function (for example, -found in post\-2004 Linux) have the peculiar behaviour of not removing a -connection from the pending queue in all error cases. -.PP -For example, larger servers often run out of file descriptors (because -of resource limits), causing \f(CW\*(C`accept\*(C'\fR to fail with \f(CW\*(C`ENFILE\*(C'\fR but not -rejecting the connection, leading to libev signalling readiness on -the next iteration again (the connection still exists after all), and -typically causing the program to loop at 100% \s-1CPU\s0 usage. -.PP -Unfortunately, the set of errors that cause this issue differs between -operating systems, there is usually little the app can do to remedy the -situation, and no known thread-safe method of removing the connection to -cope with overload is known (to me). -.PP -One of the easiest ways to handle this situation is to just ignore it -\&\- when the program encounters an overload, it will just loop until the -situation is over. While this is a form of busy waiting, no \s-1OS\s0 offers an -event-based way to handle this situation, so it's the best one can do. -.PP -A better way to handle the situation is to log any errors other than -\&\f(CW\*(C`EAGAIN\*(C'\fR and \f(CW\*(C`EWOULDBLOCK\*(C'\fR, making sure not to flood the log with such -messages, and continue as usual, which at least gives the user an idea of -what could be wrong (\*(L"raise the ulimit!\*(R"). For extra points one could stop -the \f(CW\*(C`ev_io\*(C'\fR watcher on the listening fd \*(L"for a while\*(R", which reduces \s-1CPU\s0 -usage. -.PP -If your program is single-threaded, then you could also keep a dummy file -descriptor for overload situations (e.g. by opening \fI/dev/null\fR), and -when you run into \f(CW\*(C`ENFILE\*(C'\fR or \f(CW\*(C`EMFILE\*(C'\fR, close it, run \f(CW\*(C`accept\*(C'\fR, -close that fd, and create a new dummy fd. This will gracefully refuse -clients under typical overload conditions. -.PP -The last way to handle it is to simply log the error and \f(CW\*(C`exit\*(C'\fR, as -is often done with \f(CW\*(C`malloc\*(C'\fR failures, but this results in an easy -opportunity for a DoS attack. -.PP -\fIWatcher-Specific Functions\fR -.IX Subsection "Watcher-Specific Functions" -.IP "ev_io_init (ev_io *, callback, int fd, int events)" 4 -.IX Item "ev_io_init (ev_io *, callback, int fd, int events)" -.PD 0 -.IP "ev_io_set (ev_io *, int fd, int events)" 4 -.IX Item "ev_io_set (ev_io *, int fd, int events)" -.PD -Configures an \f(CW\*(C`ev_io\*(C'\fR watcher. The \f(CW\*(C`fd\*(C'\fR is the file descriptor to -receive events for and \f(CW\*(C`events\*(C'\fR is either \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR or -\&\f(CW\*(C`EV_READ | EV_WRITE\*(C'\fR, to express the desire to receive the given events. -.IP "int fd [read\-only]" 4 -.IX Item "int fd [read-only]" -The file descriptor being watched. -.IP "int events [read\-only]" 4 -.IX Item "int events [read-only]" -The events being watched. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Call \f(CW\*(C`stdin_readable_cb\*(C'\fR when \s-1STDIN_FILENO\s0 has become, well -readable, but only once. Since it is likely line-buffered, you could -attempt to read a whole line in the callback. -.PP -.Vb 6 -\& static void -\& stdin_readable_cb (struct ev_loop *loop, ev_io *w, int revents) -\& { -\& ev_io_stop (loop, w); -\& .. read from stdin here (or from w\->fd) and handle any I/O errors -\& } -\& -\& ... -\& struct ev_loop *loop = ev_default_init (0); -\& ev_io stdin_readable; -\& ev_io_init (&stdin_readable, stdin_readable_cb, STDIN_FILENO, EV_READ); -\& ev_io_start (loop, &stdin_readable); -\& ev_run (loop, 0); -.Ve -.ie n .SS """ev_timer"" \- relative and optionally repeating timeouts" -.el .SS "\f(CWev_timer\fP \- relative and optionally repeating timeouts" -.IX Subsection "ev_timer - relative and optionally repeating timeouts" -Timer watchers are simple relative timers that generate an event after a -given time, and optionally repeating in regular intervals after that. -.PP -The timers are based on real time, that is, if you register an event that -times out after an hour and you reset your system clock to January last -year, it will still time out after (roughly) one hour. \*(L"Roughly\*(R" because -detecting time jumps is hard, and some inaccuracies are unavoidable (the -monotonic clock option helps a lot here). -.PP -The callback is guaranteed to be invoked only \fIafter\fR its timeout has -passed (not \fIat\fR, so on systems with very low-resolution clocks this -might introduce a small delay, see \*(L"the special problem of being too -early\*(R", below). If multiple timers become ready during the same loop -iteration then the ones with earlier time-out values are invoked before -ones of the same priority with later time-out values (but this is no -longer true when a callback calls \f(CW\*(C`ev_run\*(C'\fR recursively). -.PP -\fIBe smart about timeouts\fR -.IX Subsection "Be smart about timeouts" -.PP -Many real-world problems involve some kind of timeout, usually for error -recovery. A typical example is an \s-1HTTP\s0 request \- if the other side hangs, -you want to raise some error after a while. -.PP -What follows are some ways to handle this problem, from obvious and -inefficient to smart and efficient. -.PP -In the following, a 60 second activity timeout is assumed \- a timeout that -gets reset to 60 seconds each time there is activity (e.g. each time some -data or other life sign was received). -.IP "1. Use a timer and stop, reinitialise and start it on activity." 4 -.IX Item "1. Use a timer and stop, reinitialise and start it on activity." -This is the most obvious, but not the most simple way: In the beginning, -start the watcher: -.Sp -.Vb 2 -\& ev_timer_init (timer, callback, 60., 0.); -\& ev_timer_start (loop, timer); -.Ve -.Sp -Then, each time there is some activity, \f(CW\*(C`ev_timer_stop\*(C'\fR it, initialise it -and start it again: -.Sp -.Vb 3 -\& ev_timer_stop (loop, timer); -\& ev_timer_set (timer, 60., 0.); -\& ev_timer_start (loop, timer); -.Ve -.Sp -This is relatively simple to implement, but means that each time there is -some activity, libev will first have to remove the timer from its internal -data structure and then add it again. Libev tries to be fast, but it's -still not a constant-time operation. -.ie n .IP "2. Use a timer and re-start it with ""ev_timer_again"" inactivity." 4 -.el .IP "2. Use a timer and re-start it with \f(CWev_timer_again\fR inactivity." 4 -.IX Item "2. Use a timer and re-start it with ev_timer_again inactivity." -This is the easiest way, and involves using \f(CW\*(C`ev_timer_again\*(C'\fR instead of -\&\f(CW\*(C`ev_timer_start\*(C'\fR. -.Sp -To implement this, configure an \f(CW\*(C`ev_timer\*(C'\fR with a \f(CW\*(C`repeat\*(C'\fR value -of \f(CW60\fR and then call \f(CW\*(C`ev_timer_again\*(C'\fR at start and each time you -successfully read or write some data. If you go into an idle state where -you do not expect data to travel on the socket, you can \f(CW\*(C`ev_timer_stop\*(C'\fR -the timer, and \f(CW\*(C`ev_timer_again\*(C'\fR will automatically restart it if need be. -.Sp -That means you can ignore both the \f(CW\*(C`ev_timer_start\*(C'\fR function and the -\&\f(CW\*(C`after\*(C'\fR argument to \f(CW\*(C`ev_timer_set\*(C'\fR, and only ever use the \f(CW\*(C`repeat\*(C'\fR -member and \f(CW\*(C`ev_timer_again\*(C'\fR. -.Sp -At start: -.Sp -.Vb 3 -\& ev_init (timer, callback); -\& timer\->repeat = 60.; -\& ev_timer_again (loop, timer); -.Ve -.Sp -Each time there is some activity: -.Sp -.Vb 1 -\& ev_timer_again (loop, timer); -.Ve -.Sp -It is even possible to change the time-out on the fly, regardless of -whether the watcher is active or not: -.Sp -.Vb 2 -\& timer\->repeat = 30.; -\& ev_timer_again (loop, timer); -.Ve -.Sp -This is slightly more efficient then stopping/starting the timer each time -you want to modify its timeout value, as libev does not have to completely -remove and re-insert the timer from/into its internal data structure. -.Sp -It is, however, even simpler than the \*(L"obvious\*(R" way to do it. -.IP "3. Let the timer time out, but then re-arm it as required." 4 -.IX Item "3. Let the timer time out, but then re-arm it as required." -This method is more tricky, but usually most efficient: Most timeouts are -relatively long compared to the intervals between other activity \- in -our example, within 60 seconds, there are usually many I/O events with -associated activity resets. -.Sp -In this case, it would be more efficient to leave the \f(CW\*(C`ev_timer\*(C'\fR alone, -but remember the time of last activity, and check for a real timeout only -within the callback: -.Sp -.Vb 3 -\& ev_tstamp timeout = 60.; -\& ev_tstamp last_activity; // time of last activity -\& ev_timer timer; -\& -\& static void -\& callback (EV_P_ ev_timer *w, int revents) -\& { -\& // calculate when the timeout would happen -\& ev_tstamp after = last_activity \- ev_now (EV_A) + timeout; -\& -\& // if negative, it means we the timeout already occurred -\& if (after < 0.) -\& { -\& // timeout occurred, take action -\& } -\& else -\& { -\& // callback was invoked, but there was some recent -\& // activity. simply restart the timer to time out -\& // after "after" seconds, which is the earliest time -\& // the timeout can occur. -\& ev_timer_set (w, after, 0.); -\& ev_timer_start (EV_A_ w); -\& } -\& } -.Ve -.Sp -To summarise the callback: first calculate in how many seconds the -timeout will occur (by calculating the absolute time when it would occur, -\&\f(CW\*(C`last_activity + timeout\*(C'\fR, and subtracting the current time, \f(CW\*(C`ev_now -(EV_A)\*(C'\fR from that). -.Sp -If this value is negative, then we are already past the timeout, i.e. we -timed out, and need to do whatever is needed in this case. -.Sp -Otherwise, we now the earliest time at which the timeout would trigger, -and simply start the timer with this timeout value. -.Sp -In other words, each time the callback is invoked it will check whether -the timeout occurred. If not, it will simply reschedule itself to check -again at the earliest time it could time out. Rinse. Repeat. -.Sp -This scheme causes more callback invocations (about one every 60 seconds -minus half the average time between activity), but virtually no calls to -libev to change the timeout. -.Sp -To start the machinery, simply initialise the watcher and set -\&\f(CW\*(C`last_activity\*(C'\fR to the current time (meaning there was some activity just -now), then call the callback, which will \*(L"do the right thing\*(R" and start -the timer: -.Sp -.Vb 3 -\& last_activity = ev_now (EV_A); -\& ev_init (&timer, callback); -\& callback (EV_A_ &timer, 0); -.Ve -.Sp -When there is some activity, simply store the current time in -\&\f(CW\*(C`last_activity\*(C'\fR, no libev calls at all: -.Sp -.Vb 2 -\& if (activity detected) -\& last_activity = ev_now (EV_A); -.Ve -.Sp -When your timeout value changes, then the timeout can be changed by simply -providing a new value, stopping the timer and calling the callback, which -will again do the right thing (for example, time out immediately :). -.Sp -.Vb 3 -\& timeout = new_value; -\& ev_timer_stop (EV_A_ &timer); -\& callback (EV_A_ &timer, 0); -.Ve -.Sp -This technique is slightly more complex, but in most cases where the -time-out is unlikely to be triggered, much more efficient. -.IP "4. Wee, just use a double-linked list for your timeouts." 4 -.IX Item "4. Wee, just use a double-linked list for your timeouts." -If there is not one request, but many thousands (millions...), all -employing some kind of timeout with the same timeout value, then one can -do even better: -.Sp -When starting the timeout, calculate the timeout value and put the timeout -at the \fIend\fR of the list. -.Sp -Then use an \f(CW\*(C`ev_timer\*(C'\fR to fire when the timeout at the \fIbeginning\fR of -the list is expected to fire (for example, using the technique #3). -.Sp -When there is some activity, remove the timer from the list, recalculate -the timeout, append it to the end of the list again, and make sure to -update the \f(CW\*(C`ev_timer\*(C'\fR if it was taken from the beginning of the list. -.Sp -This way, one can manage an unlimited number of timeouts in O(1) time for -starting, stopping and updating the timers, at the expense of a major -complication, and having to use a constant timeout. The constant timeout -ensures that the list stays sorted. -.PP -So which method the best? -.PP -Method #2 is a simple no-brain-required solution that is adequate in most -situations. Method #3 requires a bit more thinking, but handles many cases -better, and isn't very complicated either. In most case, choosing either -one is fine, with #3 being better in typical situations. -.PP -Method #1 is almost always a bad idea, and buys you nothing. Method #4 is -rather complicated, but extremely efficient, something that really pays -off after the first million or so of active timers, i.e. it's usually -overkill :) -.PP -\fIThe special problem of being too early\fR -.IX Subsection "The special problem of being too early" -.PP -If you ask a timer to call your callback after three seconds, then -you expect it to be invoked after three seconds \- but of course, this -cannot be guaranteed to infinite precision. Less obviously, it cannot be -guaranteed to any precision by libev \- imagine somebody suspending the -process with a \s-1STOP\s0 signal for a few hours for example. -.PP -So, libev tries to invoke your callback as soon as possible \fIafter\fR the -delay has occurred, but cannot guarantee this. -.PP -A less obvious failure mode is calling your callback too early: many event -loops compare timestamps with a \*(L"elapsed delay >= requested delay\*(R", but -this can cause your callback to be invoked much earlier than you would -expect. -.PP -To see why, imagine a system with a clock that only offers full second -resolution (think windows if you can't come up with a broken enough \s-1OS\s0 -yourself). If you schedule a one-second timer at the time 500.9, then the -event loop will schedule your timeout to elapse at a system time of 500 -(500.9 truncated to the resolution) + 1, or 501. -.PP -If an event library looks at the timeout 0.1s later, it will see \*(L"501 >= -501\*(R" and invoke the callback 0.1s after it was started, even though a -one-second delay was requested \- this is being \*(L"too early\*(R", despite best -intentions. -.PP -This is the reason why libev will never invoke the callback if the elapsed -delay equals the requested delay, but only when the elapsed delay is -larger than the requested delay. In the example above, libev would only invoke -the callback at system time 502, or 1.1s after the timer was started. -.PP -So, while libev cannot guarantee that your callback will be invoked -exactly when requested, it \fIcan\fR and \fIdoes\fR guarantee that the requested -delay has actually elapsed, or in other words, it always errs on the \*(L"too -late\*(R" side of things. -.PP -\fIThe special problem of time updates\fR -.IX Subsection "The special problem of time updates" -.PP -Establishing the current time is a costly operation (it usually takes -at least one system call): \s-1EV\s0 therefore updates its idea of the current -time only before and after \f(CW\*(C`ev_run\*(C'\fR collects new events, which causes a -growing difference between \f(CW\*(C`ev_now ()\*(C'\fR and \f(CW\*(C`ev_time ()\*(C'\fR when handling -lots of events in one iteration. -.PP -The relative timeouts are calculated relative to the \f(CW\*(C`ev_now ()\*(C'\fR -time. This is usually the right thing as this timestamp refers to the time -of the event triggering whatever timeout you are modifying/starting. If -you suspect event processing to be delayed and you \fIneed\fR to base the -timeout on the current time, use something like this to adjust for this: -.PP -.Vb 1 -\& ev_timer_set (&timer, after + ev_now () \- ev_time (), 0.); -.Ve -.PP -If the event loop is suspended for a long time, you can also force an -update of the time returned by \f(CW\*(C`ev_now ()\*(C'\fR by calling \f(CW\*(C`ev_now_update -()\*(C'\fR. -.PP -\fIThe special problem of unsynchronised clocks\fR -.IX Subsection "The special problem of unsynchronised clocks" -.PP -Modern systems have a variety of clocks \- libev itself uses the normal -\&\*(L"wall clock\*(R" clock and, if available, the monotonic clock (to avoid time -jumps). -.PP -Neither of these clocks is synchronised with each other or any other clock -on the system, so \f(CW\*(C`ev_time ()\*(C'\fR might return a considerably different time -than \f(CW\*(C`gettimeofday ()\*(C'\fR or \f(CW\*(C`time ()\*(C'\fR. On a GNU/Linux system, for example, -a call to \f(CW\*(C`gettimeofday\*(C'\fR might return a second count that is one higher -than a directly following call to \f(CW\*(C`time\*(C'\fR. -.PP -The moral of this is to only compare libev-related timestamps with -\&\f(CW\*(C`ev_time ()\*(C'\fR and \f(CW\*(C`ev_now ()\*(C'\fR, at least if you want better precision than -a second or so. -.PP -One more problem arises due to this lack of synchronisation: if libev uses -the system monotonic clock and you compare timestamps from \f(CW\*(C`ev_time\*(C'\fR -or \f(CW\*(C`ev_now\*(C'\fR from when you started your timer and when your callback is -invoked, you will find that sometimes the callback is a bit \*(L"early\*(R". -.PP -This is because \f(CW\*(C`ev_timer\*(C'\fRs work in real time, not wall clock time, so -libev makes sure your callback is not invoked before the delay happened, -\&\fImeasured according to the real time\fR, not the system clock. -.PP -If your timeouts are based on a physical timescale (e.g. \*(L"time out this -connection after 100 seconds\*(R") then this shouldn't bother you as it is -exactly the right behaviour. -.PP -If you want to compare wall clock/system timestamps to your timers, then -you need to use \f(CW\*(C`ev_periodic\*(C'\fRs, as these are based on the wall clock -time, where your comparisons will always generate correct results. -.PP -\fIThe special problems of suspended animation\fR -.IX Subsection "The special problems of suspended animation" -.PP -When you leave the server world it is quite customary to hit machines that -can suspend/hibernate \- what happens to the clocks during such a suspend? -.PP -Some quick tests made with a Linux 2.6.28 indicate that a suspend freezes -all processes, while the clocks (\f(CW\*(C`times\*(C'\fR, \f(CW\*(C`CLOCK_MONOTONIC\*(C'\fR) continue -to run until the system is suspended, but they will not advance while the -system is suspended. That means, on resume, it will be as if the program -was frozen for a few seconds, but the suspend time will not be counted -towards \f(CW\*(C`ev_timer\*(C'\fR when a monotonic clock source is used. The real time -clock advanced as expected, but if it is used as sole clocksource, then a -long suspend would be detected as a time jump by libev, and timers would -be adjusted accordingly. -.PP -I would not be surprised to see different behaviour in different between -operating systems, \s-1OS\s0 versions or even different hardware. -.PP -The other form of suspend (job control, or sending a \s-1SIGSTOP\s0) will see a -time jump in the monotonic clocks and the realtime clock. If the program -is suspended for a very long time, and monotonic clock sources are in use, -then you can expect \f(CW\*(C`ev_timer\*(C'\fRs to expire as the full suspension time -will be counted towards the timers. When no monotonic clock source is in -use, then libev will again assume a timejump and adjust accordingly. -.PP -It might be beneficial for this latter case to call \f(CW\*(C`ev_suspend\*(C'\fR -and \f(CW\*(C`ev_resume\*(C'\fR in code that handles \f(CW\*(C`SIGTSTP\*(C'\fR, to at least get -deterministic behaviour in this case (you can do nothing against -\&\f(CW\*(C`SIGSTOP\*(C'\fR). -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_timer_init (ev_timer *, callback, ev_tstamp after, ev_tstamp repeat)" 4 -.IX Item "ev_timer_init (ev_timer *, callback, ev_tstamp after, ev_tstamp repeat)" -.PD 0 -.IP "ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)" 4 -.IX Item "ev_timer_set (ev_timer *, ev_tstamp after, ev_tstamp repeat)" -.PD -Configure the timer to trigger after \f(CW\*(C`after\*(C'\fR seconds. If \f(CW\*(C`repeat\*(C'\fR -is \f(CW0.\fR, then it will automatically be stopped once the timeout is -reached. If it is positive, then the timer will automatically be -configured to trigger again \f(CW\*(C`repeat\*(C'\fR seconds later, again, and again, -until stopped manually. -.Sp -The timer itself will do a best-effort at avoiding drift, that is, if -you configure a timer to trigger every 10 seconds, then it will normally -trigger at exactly 10 second intervals. If, however, your program cannot -keep up with the timer (because it takes longer than those 10 seconds to -do stuff) the timer will not fire more than once per event loop iteration. -.IP "ev_timer_again (loop, ev_timer *)" 4 -.IX Item "ev_timer_again (loop, ev_timer *)" -This will act as if the timer timed out, and restarts it again if it is -repeating. It basically works like calling \f(CW\*(C`ev_timer_stop\*(C'\fR, updating the -timeout to the \f(CW\*(C`repeat\*(C'\fR value and calling \f(CW\*(C`ev_timer_start\*(C'\fR. -.Sp -The exact semantics are as in the following rules, all of which will be -applied to the watcher: -.RS 4 -.IP "If the timer is pending, the pending status is always cleared." 4 -.IX Item "If the timer is pending, the pending status is always cleared." -.PD 0 -.IP "If the timer is started but non-repeating, stop it (as if it timed out, without invoking it)." 4 -.IX Item "If the timer is started but non-repeating, stop it (as if it timed out, without invoking it)." -.ie n .IP "If the timer is repeating, make the ""repeat"" value the new timeout and start the timer, if necessary." 4 -.el .IP "If the timer is repeating, make the \f(CWrepeat\fR value the new timeout and start the timer, if necessary." 4 -.IX Item "If the timer is repeating, make the repeat value the new timeout and start the timer, if necessary." -.RE -.RS 4 -.PD -.Sp -This sounds a bit complicated, see \*(L"Be smart about timeouts\*(R", above, for a -usage example. -.RE -.IP "ev_tstamp ev_timer_remaining (loop, ev_timer *)" 4 -.IX Item "ev_tstamp ev_timer_remaining (loop, ev_timer *)" -Returns the remaining time until a timer fires. If the timer is active, -then this time is relative to the current event loop time, otherwise it's -the timeout value currently configured. -.Sp -That is, after an \f(CW\*(C`ev_timer_set (w, 5, 7)\*(C'\fR, \f(CW\*(C`ev_timer_remaining\*(C'\fR returns -\&\f(CW5\fR. When the timer is started and one second passes, \f(CW\*(C`ev_timer_remaining\*(C'\fR -will return \f(CW4\fR. When the timer expires and is restarted, it will return -roughly \f(CW7\fR (likely slightly less as callback invocation takes some time, -too), and so on. -.IP "ev_tstamp repeat [read\-write]" 4 -.IX Item "ev_tstamp repeat [read-write]" -The current \f(CW\*(C`repeat\*(C'\fR value. Will be used each time the watcher times out -or \f(CW\*(C`ev_timer_again\*(C'\fR is called, and determines the next timeout (if any), -which is also when any modifications are taken into account. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Create a timer that fires after 60 seconds. -.PP -.Vb 5 -\& static void -\& one_minute_cb (struct ev_loop *loop, ev_timer *w, int revents) -\& { -\& .. one minute over, w is actually stopped right here -\& } -\& -\& ev_timer mytimer; -\& ev_timer_init (&mytimer, one_minute_cb, 60., 0.); -\& ev_timer_start (loop, &mytimer); -.Ve -.PP -Example: Create a timeout timer that times out after 10 seconds of -inactivity. -.PP -.Vb 5 -\& static void -\& timeout_cb (struct ev_loop *loop, ev_timer *w, int revents) -\& { -\& .. ten seconds without any activity -\& } -\& -\& ev_timer mytimer; -\& ev_timer_init (&mytimer, timeout_cb, 0., 10.); /* note, only repeat used */ -\& ev_timer_again (&mytimer); /* start timer */ -\& ev_run (loop, 0); -\& -\& // and in some piece of code that gets executed on any "activity": -\& // reset the timeout to start ticking again at 10 seconds -\& ev_timer_again (&mytimer); -.Ve -.ie n .SS """ev_periodic"" \- to cron or not to cron?" -.el .SS "\f(CWev_periodic\fP \- to cron or not to cron?" -.IX Subsection "ev_periodic - to cron or not to cron?" -Periodic watchers are also timers of a kind, but they are very versatile -(and unfortunately a bit complex). -.PP -Unlike \f(CW\*(C`ev_timer\*(C'\fR, periodic watchers are not based on real time (or -relative time, the physical time that passes) but on wall clock time -(absolute time, the thing you can read on your calender or clock). The -difference is that wall clock time can run faster or slower than real -time, and time jumps are not uncommon (e.g. when you adjust your -wrist-watch). -.PP -You can tell a periodic watcher to trigger after some specific point -in time: for example, if you tell a periodic watcher to trigger \*(L"in 10 -seconds\*(R" (by specifying e.g. \f(CW\*(C`ev_now () + 10.\*(C'\fR, that is, an absolute time -not a delay) and then reset your system clock to January of the previous -year, then it will take a year or more to trigger the event (unlike an -\&\f(CW\*(C`ev_timer\*(C'\fR, which would still trigger roughly 10 seconds after starting -it, as it uses a relative timeout). -.PP -\&\f(CW\*(C`ev_periodic\*(C'\fR watchers can also be used to implement vastly more complex -timers, such as triggering an event on each \*(L"midnight, local time\*(R", or -other complicated rules. This cannot be done with \f(CW\*(C`ev_timer\*(C'\fR watchers, as -those cannot react to time jumps. -.PP -As with timers, the callback is guaranteed to be invoked only when the -point in time where it is supposed to trigger has passed. If multiple -timers become ready during the same loop iteration then the ones with -earlier time-out values are invoked before ones with later time-out values -(but this is no longer true when a callback calls \f(CW\*(C`ev_run\*(C'\fR recursively). -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_periodic_init (ev_periodic *, callback, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" 4 -.IX Item "ev_periodic_init (ev_periodic *, callback, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" -.PD 0 -.IP "ev_periodic_set (ev_periodic *, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" 4 -.IX Item "ev_periodic_set (ev_periodic *, ev_tstamp offset, ev_tstamp interval, reschedule_cb)" -.PD -Lots of arguments, let's sort it out... There are basically three modes of -operation, and we will explain them from simplest to most complex: -.RS 4 -.IP "\(bu" 4 -absolute timer (offset = absolute time, interval = 0, reschedule_cb = 0) -.Sp -In this configuration the watcher triggers an event after the wall clock -time \f(CW\*(C`offset\*(C'\fR has passed. It will not repeat and will not adjust when a -time jump occurs, that is, if it is to be run at January 1st 2011 then it -will be stopped and invoked when the system clock reaches or surpasses -this point in time. -.IP "\(bu" 4 -repeating interval timer (offset = offset within interval, interval > 0, reschedule_cb = 0) -.Sp -In this mode the watcher will always be scheduled to time out at the next -\&\f(CW\*(C`offset + N * interval\*(C'\fR time (for some integer N, which can also be -negative) and then repeat, regardless of any time jumps. The \f(CW\*(C`offset\*(C'\fR -argument is merely an offset into the \f(CW\*(C`interval\*(C'\fR periods. -.Sp -This can be used to create timers that do not drift with respect to the -system clock, for example, here is an \f(CW\*(C`ev_periodic\*(C'\fR that triggers each -hour, on the hour (with respect to \s-1UTC\s0): -.Sp -.Vb 1 -\& ev_periodic_set (&periodic, 0., 3600., 0); -.Ve -.Sp -This doesn't mean there will always be 3600 seconds in between triggers, -but only that the callback will be called when the system time shows a -full hour (\s-1UTC\s0), or more correctly, when the system time is evenly divisible -by 3600. -.Sp -Another way to think about it (for the mathematically inclined) is that -\&\f(CW\*(C`ev_periodic\*(C'\fR will try to run the callback in this mode at the next possible -time where \f(CW\*(C`time = offset (mod interval)\*(C'\fR, regardless of any time jumps. -.Sp -The \f(CW\*(C`interval\*(C'\fR \fI\s-1MUST\s0\fR be positive, and for numerical stability, the -interval value should be higher than \f(CW\*(C`1/8192\*(C'\fR (which is around 100 -microseconds) and \f(CW\*(C`offset\*(C'\fR should be higher than \f(CW0\fR and should have -at most a similar magnitude as the current time (say, within a factor of -ten). Typical values for offset are, in fact, \f(CW0\fR or something between -\&\f(CW0\fR and \f(CW\*(C`interval\*(C'\fR, which is also the recommended range. -.Sp -Note also that there is an upper limit to how often a timer can fire (\s-1CPU\s0 -speed for example), so if \f(CW\*(C`interval\*(C'\fR is very small then timing stability -will of course deteriorate. Libev itself tries to be exact to be about one -millisecond (if the \s-1OS\s0 supports it and the machine is fast enough). -.IP "\(bu" 4 -manual reschedule mode (offset ignored, interval ignored, reschedule_cb = callback) -.Sp -In this mode the values for \f(CW\*(C`interval\*(C'\fR and \f(CW\*(C`offset\*(C'\fR are both being -ignored. Instead, each time the periodic watcher gets scheduled, the -reschedule callback will be called with the watcher as first, and the -current time as second argument. -.Sp -\&\s-1NOTE:\s0 \fIThis callback \s-1MUST\s0 \s-1NOT\s0 stop or destroy any periodic watcher, ever, -or make \s-1ANY\s0 other event loop modifications whatsoever, unless explicitly -allowed by documentation here\fR. -.Sp -If you need to stop it, return \f(CW\*(C`now + 1e30\*(C'\fR (or so, fudge fudge) and stop -it afterwards (e.g. by starting an \f(CW\*(C`ev_prepare\*(C'\fR watcher, which is the -only event loop modification you are allowed to do). -.Sp -The callback prototype is \f(CW\*(C`ev_tstamp (*reschedule_cb)(ev_periodic -*w, ev_tstamp now)\*(C'\fR, e.g.: -.Sp -.Vb 5 -\& static ev_tstamp -\& my_rescheduler (ev_periodic *w, ev_tstamp now) -\& { -\& return now + 60.; -\& } -.Ve -.Sp -It must return the next time to trigger, based on the passed time value -(that is, the lowest time value larger than to the second argument). It -will usually be called just before the callback will be triggered, but -might be called at other times, too. -.Sp -\&\s-1NOTE:\s0 \fIThis callback must always return a time that is higher than or -equal to the passed \f(CI\*(C`now\*(C'\fI value\fR. -.Sp -This can be used to create very complex timers, such as a timer that -triggers on \*(L"next midnight, local time\*(R". To do this, you would calculate the -next midnight after \f(CW\*(C`now\*(C'\fR and return the timestamp value for this. How -you do this is, again, up to you (but it is not trivial, which is the main -reason I omitted it as an example). -.RE -.RS 4 -.RE -.IP "ev_periodic_again (loop, ev_periodic *)" 4 -.IX Item "ev_periodic_again (loop, ev_periodic *)" -Simply stops and restarts the periodic watcher again. This is only useful -when you changed some parameters or the reschedule callback would return -a different time than the last time it was called (e.g. in a crond like -program when the crontabs have changed). -.IP "ev_tstamp ev_periodic_at (ev_periodic *)" 4 -.IX Item "ev_tstamp ev_periodic_at (ev_periodic *)" -When active, returns the absolute time that the watcher is supposed -to trigger next. This is not the same as the \f(CW\*(C`offset\*(C'\fR argument to -\&\f(CW\*(C`ev_periodic_set\*(C'\fR, but indeed works even in interval and manual -rescheduling modes. -.IP "ev_tstamp offset [read\-write]" 4 -.IX Item "ev_tstamp offset [read-write]" -When repeating, this contains the offset value, otherwise this is the -absolute point in time (the \f(CW\*(C`offset\*(C'\fR value passed to \f(CW\*(C`ev_periodic_set\*(C'\fR, -although libev might modify this value for better numerical stability). -.Sp -Can be modified any time, but changes only take effect when the periodic -timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being called. -.IP "ev_tstamp interval [read\-write]" 4 -.IX Item "ev_tstamp interval [read-write]" -The current interval value. Can be modified any time, but changes only -take effect when the periodic timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being -called. -.IP "ev_tstamp (*reschedule_cb)(ev_periodic *w, ev_tstamp now) [read\-write]" 4 -.IX Item "ev_tstamp (*reschedule_cb)(ev_periodic *w, ev_tstamp now) [read-write]" -The current reschedule callback, or \f(CW0\fR, if this functionality is -switched off. Can be changed any time, but changes only take effect when -the periodic timer fires or \f(CW\*(C`ev_periodic_again\*(C'\fR is being called. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Call a callback every hour, or, more precisely, whenever the -system time is divisible by 3600. The callback invocation times have -potentially a lot of jitter, but good long-term stability. -.PP -.Vb 5 -\& static void -\& clock_cb (struct ev_loop *loop, ev_periodic *w, int revents) -\& { -\& ... its now a full hour (UTC, or TAI or whatever your clock follows) -\& } -\& -\& ev_periodic hourly_tick; -\& ev_periodic_init (&hourly_tick, clock_cb, 0., 3600., 0); -\& ev_periodic_start (loop, &hourly_tick); -.Ve -.PP -Example: The same as above, but use a reschedule callback to do it: -.PP -.Vb 1 -\& #include -\& -\& static ev_tstamp -\& my_scheduler_cb (ev_periodic *w, ev_tstamp now) -\& { -\& return now + (3600. \- fmod (now, 3600.)); -\& } -\& -\& ev_periodic_init (&hourly_tick, clock_cb, 0., 0., my_scheduler_cb); -.Ve -.PP -Example: Call a callback every hour, starting now: -.PP -.Vb 4 -\& ev_periodic hourly_tick; -\& ev_periodic_init (&hourly_tick, clock_cb, -\& fmod (ev_now (loop), 3600.), 3600., 0); -\& ev_periodic_start (loop, &hourly_tick); -.Ve -.ie n .SS """ev_signal"" \- signal me when a signal gets signalled!" -.el .SS "\f(CWev_signal\fP \- signal me when a signal gets signalled!" -.IX Subsection "ev_signal - signal me when a signal gets signalled!" -Signal watchers will trigger an event when the process receives a specific -signal one or more times. Even though signals are very asynchronous, libev -will try its best to deliver signals synchronously, i.e. as part of the -normal event processing, like any other event. -.PP -If you want signals to be delivered truly asynchronously, just use -\&\f(CW\*(C`sigaction\*(C'\fR as you would do without libev and forget about sharing -the signal. You can even use \f(CW\*(C`ev_async\*(C'\fR from a signal handler to -synchronously wake up an event loop. -.PP -You can configure as many watchers as you like for the same signal, but -only within the same loop, i.e. you can watch for \f(CW\*(C`SIGINT\*(C'\fR in your -default loop and for \f(CW\*(C`SIGIO\*(C'\fR in another loop, but you cannot watch for -\&\f(CW\*(C`SIGINT\*(C'\fR in both the default loop and another loop at the same time. At -the moment, \f(CW\*(C`SIGCHLD\*(C'\fR is permanently tied to the default loop. -.PP -When the first watcher gets started will libev actually register something -with the kernel (thus it coexists with your own signal handlers as long as -you don't register any with libev for the same signal). -.PP -If possible and supported, libev will install its handlers with -\&\f(CW\*(C`SA_RESTART\*(C'\fR (or equivalent) behaviour enabled, so system calls should -not be unduly interrupted. If you have a problem with system calls getting -interrupted by signals you can block all signals in an \f(CW\*(C`ev_check\*(C'\fR watcher -and unblock them in an \f(CW\*(C`ev_prepare\*(C'\fR watcher. -.PP -\fIThe special problem of inheritance over fork/execve/pthread_create\fR -.IX Subsection "The special problem of inheritance over fork/execve/pthread_create" -.PP -Both the signal mask (\f(CW\*(C`sigprocmask\*(C'\fR) and the signal disposition -(\f(CW\*(C`sigaction\*(C'\fR) are unspecified after starting a signal watcher (and after -stopping it again), that is, libev might or might not block the signal, -and might or might not set or restore the installed signal handler (but -see \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR). -.PP -While this does not matter for the signal disposition (libev never -sets signals to \f(CW\*(C`SIG_IGN\*(C'\fR, so handlers will be reset to \f(CW\*(C`SIG_DFL\*(C'\fR on -\&\f(CW\*(C`execve\*(C'\fR), this matters for the signal mask: many programs do not expect -certain signals to be blocked. -.PP -This means that before calling \f(CW\*(C`exec\*(C'\fR (from the child) you should reset -the signal mask to whatever \*(L"default\*(R" you expect (all clear is a good -choice usually). -.PP -The simplest way to ensure that the signal mask is reset in the child is -to install a fork handler with \f(CW\*(C`pthread_atfork\*(C'\fR that resets it. That will -catch fork calls done by libraries (such as the libc) as well. -.PP -In current versions of libev, the signal will not be blocked indefinitely -unless you use the \f(CW\*(C`signalfd\*(C'\fR \s-1API\s0 (\f(CW\*(C`EV_SIGNALFD\*(C'\fR). While this reduces -the window of opportunity for problems, it will not go away, as libev -\&\fIhas\fR to modify the signal mask, at least temporarily. -.PP -So I can't stress this enough: \fIIf you do not reset your signal mask when -you expect it to be empty, you have a race condition in your code\fR. This -is not a libev-specific thing, this is true for most event libraries. -.PP -\fIThe special problem of threads signal handling\fR -.IX Subsection "The special problem of threads signal handling" -.PP -\&\s-1POSIX\s0 threads has problematic signal handling semantics, specifically, -a lot of functionality (sigfd, sigwait etc.) only really works if all -threads in a process block signals, which is hard to achieve. -.PP -When you want to use sigwait (or mix libev signal handling with your own -for the same signals), you can tackle this problem by globally blocking -all signals before creating any threads (or creating them with a fully set -sigprocmask) and also specifying the \f(CW\*(C`EVFLAG_NOSIGMASK\*(C'\fR when creating -loops. Then designate one thread as \*(L"signal receiver thread\*(R" which handles -these signals. You can pass on any signals that libev might be interested -in by calling \f(CW\*(C`ev_feed_signal\*(C'\fR. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_signal_init (ev_signal *, callback, int signum)" 4 -.IX Item "ev_signal_init (ev_signal *, callback, int signum)" -.PD 0 -.IP "ev_signal_set (ev_signal *, int signum)" 4 -.IX Item "ev_signal_set (ev_signal *, int signum)" -.PD -Configures the watcher to trigger on the given signal number (usually one -of the \f(CW\*(C`SIGxxx\*(C'\fR constants). -.IP "int signum [read\-only]" 4 -.IX Item "int signum [read-only]" -The signal the watcher watches out for. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Try to exit cleanly on \s-1SIGINT\s0. -.PP -.Vb 5 -\& static void -\& sigint_cb (struct ev_loop *loop, ev_signal *w, int revents) -\& { -\& ev_break (loop, EVBREAK_ALL); -\& } -\& -\& ev_signal signal_watcher; -\& ev_signal_init (&signal_watcher, sigint_cb, SIGINT); -\& ev_signal_start (loop, &signal_watcher); -.Ve -.ie n .SS """ev_child"" \- watch out for process status changes" -.el .SS "\f(CWev_child\fP \- watch out for process status changes" -.IX Subsection "ev_child - watch out for process status changes" -Child watchers trigger when your process receives a \s-1SIGCHLD\s0 in response to -some child status changes (most typically when a child of yours dies or -exits). It is permissible to install a child watcher \fIafter\fR the child -has been forked (which implies it might have already exited), as long -as the event loop isn't entered (or is continued from a watcher), i.e., -forking and then immediately registering a watcher for the child is fine, -but forking and registering a watcher a few event loop iterations later or -in the next callback invocation is not. -.PP -Only the default event loop is capable of handling signals, and therefore -you can only register child watchers in the default event loop. -.PP -Due to some design glitches inside libev, child watchers will always be -handled at maximum priority (their priority is set to \f(CW\*(C`EV_MAXPRI\*(C'\fR by -libev) -.PP -\fIProcess Interaction\fR -.IX Subsection "Process Interaction" -.PP -Libev grabs \f(CW\*(C`SIGCHLD\*(C'\fR as soon as the default event loop is -initialised. This is necessary to guarantee proper behaviour even if the -first child watcher is started after the child exits. The occurrence -of \f(CW\*(C`SIGCHLD\*(C'\fR is recorded asynchronously, but child reaping is done -synchronously as part of the event loop processing. Libev always reaps all -children, even ones not watched. -.PP -\fIOverriding the Built-In Processing\fR -.IX Subsection "Overriding the Built-In Processing" -.PP -Libev offers no special support for overriding the built-in child -processing, but if your application collides with libev's default child -handler, you can override it easily by installing your own handler for -\&\f(CW\*(C`SIGCHLD\*(C'\fR after initialising the default loop, and making sure the -default loop never gets destroyed. You are encouraged, however, to use an -event-based approach to child reaping and thus use libev's support for -that, so other libev users can use \f(CW\*(C`ev_child\*(C'\fR watchers freely. -.PP -\fIStopping the Child Watcher\fR -.IX Subsection "Stopping the Child Watcher" -.PP -Currently, the child watcher never gets stopped, even when the -child terminates, so normally one needs to stop the watcher in the -callback. Future versions of libev might stop the watcher automatically -when a child exit is detected (calling \f(CW\*(C`ev_child_stop\*(C'\fR twice is not a -problem). -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_child_init (ev_child *, callback, int pid, int trace)" 4 -.IX Item "ev_child_init (ev_child *, callback, int pid, int trace)" -.PD 0 -.IP "ev_child_set (ev_child *, int pid, int trace)" 4 -.IX Item "ev_child_set (ev_child *, int pid, int trace)" -.PD -Configures the watcher to wait for status changes of process \f(CW\*(C`pid\*(C'\fR (or -\&\fIany\fR process if \f(CW\*(C`pid\*(C'\fR is specified as \f(CW0\fR). The callback can look -at the \f(CW\*(C`rstatus\*(C'\fR member of the \f(CW\*(C`ev_child\*(C'\fR watcher structure to see -the status word (use the macros from \f(CW\*(C`sys/wait.h\*(C'\fR and see your systems -\&\f(CW\*(C`waitpid\*(C'\fR documentation). The \f(CW\*(C`rpid\*(C'\fR member contains the pid of the -process causing the status change. \f(CW\*(C`trace\*(C'\fR must be either \f(CW0\fR (only -activate the watcher when the process terminates) or \f(CW1\fR (additionally -activate the watcher when the process is stopped or continued). -.IP "int pid [read\-only]" 4 -.IX Item "int pid [read-only]" -The process id this watcher watches out for, or \f(CW0\fR, meaning any process id. -.IP "int rpid [read\-write]" 4 -.IX Item "int rpid [read-write]" -The process id that detected a status change. -.IP "int rstatus [read\-write]" 4 -.IX Item "int rstatus [read-write]" -The process exit/trace status caused by \f(CW\*(C`rpid\*(C'\fR (see your systems -\&\f(CW\*(C`waitpid\*(C'\fR and \f(CW\*(C`sys/wait.h\*(C'\fR documentation for details). -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: \f(CW\*(C`fork()\*(C'\fR a new process and install a child handler to wait for -its completion. -.PP -.Vb 1 -\& ev_child cw; -\& -\& static void -\& child_cb (EV_P_ ev_child *w, int revents) -\& { -\& ev_child_stop (EV_A_ w); -\& printf ("process %d exited with status %x\en", w\->rpid, w\->rstatus); -\& } -\& -\& pid_t pid = fork (); -\& -\& if (pid < 0) -\& // error -\& else if (pid == 0) -\& { -\& // the forked child executes here -\& exit (1); -\& } -\& else -\& { -\& ev_child_init (&cw, child_cb, pid, 0); -\& ev_child_start (EV_DEFAULT_ &cw); -\& } -.Ve -.ie n .SS """ev_stat"" \- did the file attributes just change?" -.el .SS "\f(CWev_stat\fP \- did the file attributes just change?" -.IX Subsection "ev_stat - did the file attributes just change?" -This watches a file system path for attribute changes. That is, it calls -\&\f(CW\*(C`stat\*(C'\fR on that path in regular intervals (or when the \s-1OS\s0 says it changed) -and sees if it changed compared to the last time, invoking the callback -if it did. Starting the watcher \f(CW\*(C`stat\*(C'\fR's the file, so only changes that -happen after the watcher has been started will be reported. -.PP -The path does not need to exist: changing from \*(L"path exists\*(R" to \*(L"path does -not exist\*(R" is a status change like any other. The condition \*(L"path does not -exist\*(R" (or more correctly \*(L"path cannot be stat'ed\*(R") is signified by the -\&\f(CW\*(C`st_nlink\*(C'\fR field being zero (which is otherwise always forced to be at -least one) and all the other fields of the stat buffer having unspecified -contents. -.PP -The path \fImust not\fR end in a slash or contain special components such as -\&\f(CW\*(C`.\*(C'\fR or \f(CW\*(C`..\*(C'\fR. The path \fIshould\fR be absolute: If it is relative and -your working directory changes, then the behaviour is undefined. -.PP -Since there is no portable change notification interface available, the -portable implementation simply calls \f(CWstat(2)\fR regularly on the path -to see if it changed somehow. You can specify a recommended polling -interval for this case. If you specify a polling interval of \f(CW0\fR (highly -recommended!) then a \fIsuitable, unspecified default\fR value will be used -(which you can expect to be around five seconds, although this might -change dynamically). Libev will also impose a minimum interval which is -currently around \f(CW0.1\fR, but that's usually overkill. -.PP -This watcher type is not meant for massive numbers of stat watchers, -as even with OS-supported change notifications, this can be -resource-intensive. -.PP -At the time of this writing, the only OS-specific interface implemented -is the Linux inotify interface (implementing kqueue support is left as an -exercise for the reader. Note, however, that the author sees no way of -implementing \f(CW\*(C`ev_stat\*(C'\fR semantics with kqueue, except as a hint). -.PP -\fI\s-1ABI\s0 Issues (Largefile Support)\fR -.IX Subsection "ABI Issues (Largefile Support)" -.PP -Libev by default (unless the user overrides this) uses the default -compilation environment, which means that on systems with large file -support disabled by default, you get the 32 bit version of the stat -structure. When using the library from programs that change the \s-1ABI\s0 to -use 64 bit file offsets the programs will fail. In that case you have to -compile libev with the same flags to get binary compatibility. This is -obviously the case with any flags that change the \s-1ABI\s0, but the problem is -most noticeably displayed with ev_stat and large file support. -.PP -The solution for this is to lobby your distribution maker to make large -file interfaces available by default (as e.g. FreeBSD does) and not -optional. Libev cannot simply switch on large file support because it has -to exchange stat structures with application programs compiled using the -default compilation environment. -.PP -\fIInotify and Kqueue\fR -.IX Subsection "Inotify and Kqueue" -.PP -When \f(CW\*(C`inotify (7)\*(C'\fR support has been compiled into libev and present at -runtime, it will be used to speed up change detection where possible. The -inotify descriptor will be created lazily when the first \f(CW\*(C`ev_stat\*(C'\fR -watcher is being started. -.PP -Inotify presence does not change the semantics of \f(CW\*(C`ev_stat\*(C'\fR watchers -except that changes might be detected earlier, and in some cases, to avoid -making regular \f(CW\*(C`stat\*(C'\fR calls. Even in the presence of inotify support -there are many cases where libev has to resort to regular \f(CW\*(C`stat\*(C'\fR polling, -but as long as kernel 2.6.25 or newer is used (2.6.24 and older have too -many bugs), the path exists (i.e. stat succeeds), and the path resides on -a local filesystem (libev currently assumes only ext2/3, jfs, reiserfs and -xfs are fully working) libev usually gets away without polling. -.PP -There is no support for kqueue, as apparently it cannot be used to -implement this functionality, due to the requirement of having a file -descriptor open on the object at all times, and detecting renames, unlinks -etc. is difficult. -.PP -\fI\f(CI\*(C`stat ()\*(C'\fI is a synchronous operation\fR -.IX Subsection "stat () is a synchronous operation" -.PP -Libev doesn't normally do any kind of I/O itself, and so is not blocking -the process. The exception are \f(CW\*(C`ev_stat\*(C'\fR watchers \- those call \f(CW\*(C`stat -()\*(C'\fR, which is a synchronous operation. -.PP -For local paths, this usually doesn't matter: unless the system is very -busy or the intervals between stat's are large, a stat call will be fast, -as the path data is usually in memory already (except when starting the -watcher). -.PP -For networked file systems, calling \f(CW\*(C`stat ()\*(C'\fR can block an indefinite -time due to network issues, and even under good conditions, a stat call -often takes multiple milliseconds. -.PP -Therefore, it is best to avoid using \f(CW\*(C`ev_stat\*(C'\fR watchers on networked -paths, although this is fully supported by libev. -.PP -\fIThe special problem of stat time resolution\fR -.IX Subsection "The special problem of stat time resolution" -.PP -The \f(CW\*(C`stat ()\*(C'\fR system call only supports full-second resolution portably, -and even on systems where the resolution is higher, most file systems -still only support whole seconds. -.PP -That means that, if the time is the only thing that changes, you can -easily miss updates: on the first update, \f(CW\*(C`ev_stat\*(C'\fR detects a change and -calls your callback, which does something. When there is another update -within the same second, \f(CW\*(C`ev_stat\*(C'\fR will be unable to detect unless the -stat data does change in other ways (e.g. file size). -.PP -The solution to this is to delay acting on a change for slightly more -than a second (or till slightly after the next full second boundary), using -a roughly one-second-delay \f(CW\*(C`ev_timer\*(C'\fR (e.g. \f(CW\*(C`ev_timer_set (w, 0., 1.02); -ev_timer_again (loop, w)\*(C'\fR). -.PP -The \f(CW.02\fR offset is added to work around small timing inconsistencies -of some operating systems (where the second counter of the current time -might be be delayed. One such system is the Linux kernel, where a call to -\&\f(CW\*(C`gettimeofday\*(C'\fR might return a timestamp with a full second later than -a subsequent \f(CW\*(C`time\*(C'\fR call \- if the equivalent of \f(CW\*(C`time ()\*(C'\fR is used to -update file times then there will be a small window where the kernel uses -the previous second to update file times but libev might already execute -the timer callback). -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_stat_init (ev_stat *, callback, const char *path, ev_tstamp interval)" 4 -.IX Item "ev_stat_init (ev_stat *, callback, const char *path, ev_tstamp interval)" -.PD 0 -.IP "ev_stat_set (ev_stat *, const char *path, ev_tstamp interval)" 4 -.IX Item "ev_stat_set (ev_stat *, const char *path, ev_tstamp interval)" -.PD -Configures the watcher to wait for status changes of the given -\&\f(CW\*(C`path\*(C'\fR. The \f(CW\*(C`interval\*(C'\fR is a hint on how quickly a change is expected to -be detected and should normally be specified as \f(CW0\fR to let libev choose -a suitable value. The memory pointed to by \f(CW\*(C`path\*(C'\fR must point to the same -path for as long as the watcher is active. -.Sp -The callback will receive an \f(CW\*(C`EV_STAT\*(C'\fR event when a change was detected, -relative to the attributes at the time the watcher was started (or the -last change was detected). -.IP "ev_stat_stat (loop, ev_stat *)" 4 -.IX Item "ev_stat_stat (loop, ev_stat *)" -Updates the stat buffer immediately with new values. If you change the -watched path in your callback, you could call this function to avoid -detecting this change (while introducing a race condition if you are not -the only one changing the path). Can also be useful simply to find out the -new values. -.IP "ev_statdata attr [read\-only]" 4 -.IX Item "ev_statdata attr [read-only]" -The most-recently detected attributes of the file. Although the type is -\&\f(CW\*(C`ev_statdata\*(C'\fR, this is usually the (or one of the) \f(CW\*(C`struct stat\*(C'\fR types -suitable for your system, but you can only rely on the POSIX-standardised -members to be present. If the \f(CW\*(C`st_nlink\*(C'\fR member is \f(CW0\fR, then there was -some error while \f(CW\*(C`stat\*(C'\fRing the file. -.IP "ev_statdata prev [read\-only]" 4 -.IX Item "ev_statdata prev [read-only]" -The previous attributes of the file. The callback gets invoked whenever -\&\f(CW\*(C`prev\*(C'\fR != \f(CW\*(C`attr\*(C'\fR, or, more precisely, one or more of these members -differ: \f(CW\*(C`st_dev\*(C'\fR, \f(CW\*(C`st_ino\*(C'\fR, \f(CW\*(C`st_mode\*(C'\fR, \f(CW\*(C`st_nlink\*(C'\fR, \f(CW\*(C`st_uid\*(C'\fR, -\&\f(CW\*(C`st_gid\*(C'\fR, \f(CW\*(C`st_rdev\*(C'\fR, \f(CW\*(C`st_size\*(C'\fR, \f(CW\*(C`st_atime\*(C'\fR, \f(CW\*(C`st_mtime\*(C'\fR, \f(CW\*(C`st_ctime\*(C'\fR. -.IP "ev_tstamp interval [read\-only]" 4 -.IX Item "ev_tstamp interval [read-only]" -The specified interval. -.IP "const char *path [read\-only]" 4 -.IX Item "const char *path [read-only]" -The file system path that is being watched. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Watch \f(CW\*(C`/etc/passwd\*(C'\fR for attribute changes. -.PP -.Vb 10 -\& static void -\& passwd_cb (struct ev_loop *loop, ev_stat *w, int revents) -\& { -\& /* /etc/passwd changed in some way */ -\& if (w\->attr.st_nlink) -\& { -\& printf ("passwd current size %ld\en", (long)w\->attr.st_size); -\& printf ("passwd current atime %ld\en", (long)w\->attr.st_mtime); -\& printf ("passwd current mtime %ld\en", (long)w\->attr.st_mtime); -\& } -\& else -\& /* you shalt not abuse printf for puts */ -\& puts ("wow, /etc/passwd is not there, expect problems. " -\& "if this is windows, they already arrived\en"); -\& } -\& -\& ... -\& ev_stat passwd; -\& -\& ev_stat_init (&passwd, passwd_cb, "/etc/passwd", 0.); -\& ev_stat_start (loop, &passwd); -.Ve -.PP -Example: Like above, but additionally use a one-second delay so we do not -miss updates (however, frequent updates will delay processing, too, so -one might do the work both on \f(CW\*(C`ev_stat\*(C'\fR callback invocation \fIand\fR on -\&\f(CW\*(C`ev_timer\*(C'\fR callback invocation). -.PP -.Vb 2 -\& static ev_stat passwd; -\& static ev_timer timer; -\& -\& static void -\& timer_cb (EV_P_ ev_timer *w, int revents) -\& { -\& ev_timer_stop (EV_A_ w); -\& -\& /* now it\*(Aqs one second after the most recent passwd change */ -\& } -\& -\& static void -\& stat_cb (EV_P_ ev_stat *w, int revents) -\& { -\& /* reset the one\-second timer */ -\& ev_timer_again (EV_A_ &timer); -\& } -\& -\& ... -\& ev_stat_init (&passwd, stat_cb, "/etc/passwd", 0.); -\& ev_stat_start (loop, &passwd); -\& ev_timer_init (&timer, timer_cb, 0., 1.02); -.Ve -.ie n .SS """ev_idle"" \- when you've got nothing better to do..." -.el .SS "\f(CWev_idle\fP \- when you've got nothing better to do..." -.IX Subsection "ev_idle - when you've got nothing better to do..." -Idle watchers trigger events when no other events of the same or higher -priority are pending (prepare, check and other idle watchers do not count -as receiving \*(L"events\*(R"). -.PP -That is, as long as your process is busy handling sockets or timeouts -(or even signals, imagine) of the same or higher priority it will not be -triggered. But when your process is idle (or only lower-priority watchers -are pending), the idle watchers are being called once per event loop -iteration \- until stopped, that is, or your process receives more events -and becomes busy again with higher priority stuff. -.PP -The most noteworthy effect is that as long as any idle watchers are -active, the process will not block when waiting for new events. -.PP -Apart from keeping your process non-blocking (which is a useful -effect on its own sometimes), idle watchers are a good place to do -\&\*(L"pseudo-background processing\*(R", or delay processing stuff to after the -event loop has handled all outstanding events. -.PP -\fIAbusing an \f(CI\*(C`ev_idle\*(C'\fI watcher for its side-effect\fR -.IX Subsection "Abusing an ev_idle watcher for its side-effect" -.PP -As long as there is at least one active idle watcher, libev will never -sleep unnecessarily. Or in other words, it will loop as fast as possible. -For this to work, the idle watcher doesn't need to be invoked at all \- the -lowest priority will do. -.PP -This mode of operation can be useful together with an \f(CW\*(C`ev_check\*(C'\fR watcher, -to do something on each event loop iteration \- for example to balance load -between different connections. -.PP -See \*(L"Abusing an ev_check watcher for its side-effect\*(R" for a longer -example. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_idle_init (ev_idle *, callback)" 4 -.IX Item "ev_idle_init (ev_idle *, callback)" -Initialises and configures the idle watcher \- it has no parameters of any -kind. There is a \f(CW\*(C`ev_idle_set\*(C'\fR macro, but using it is utterly pointless, -believe me. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Dynamically allocate an \f(CW\*(C`ev_idle\*(C'\fR watcher, start it, and in the -callback, free it. Also, use no error checking, as usual. -.PP -.Vb 5 -\& static void -\& idle_cb (struct ev_loop *loop, ev_idle *w, int revents) -\& { -\& // stop the watcher -\& ev_idle_stop (loop, w); -\& -\& // now we can free it -\& free (w); -\& -\& // now do something you wanted to do when the program has -\& // no longer anything immediate to do. -\& } -\& -\& ev_idle *idle_watcher = malloc (sizeof (ev_idle)); -\& ev_idle_init (idle_watcher, idle_cb); -\& ev_idle_start (loop, idle_watcher); -.Ve -.ie n .SS """ev_prepare"" and ""ev_check"" \- customise your event loop!" -.el .SS "\f(CWev_prepare\fP and \f(CWev_check\fP \- customise your event loop!" -.IX Subsection "ev_prepare and ev_check - customise your event loop!" -Prepare and check watchers are often (but not always) used in pairs: -prepare watchers get invoked before the process blocks and check watchers -afterwards. -.PP -You \fImust not\fR call \f(CW\*(C`ev_run\*(C'\fR or similar functions that enter -the current event loop from either \f(CW\*(C`ev_prepare\*(C'\fR or \f(CW\*(C`ev_check\*(C'\fR -watchers. Other loops than the current one are fine, however. The -rationale behind this is that you do not need to check for recursion in -those watchers, i.e. the sequence will always be \f(CW\*(C`ev_prepare\*(C'\fR, blocking, -\&\f(CW\*(C`ev_check\*(C'\fR so if you have one watcher of each kind they will always be -called in pairs bracketing the blocking call. -.PP -Their main purpose is to integrate other event mechanisms into libev and -their use is somewhat advanced. They could be used, for example, to track -variable changes, implement your own watchers, integrate net-snmp or a -coroutine library and lots more. They are also occasionally useful if -you cache some data and want to flush it before blocking (for example, -in X programs you might want to do an \f(CW\*(C`XFlush ()\*(C'\fR in an \f(CW\*(C`ev_prepare\*(C'\fR -watcher). -.PP -This is done by examining in each prepare call which file descriptors -need to be watched by the other library, registering \f(CW\*(C`ev_io\*(C'\fR watchers -for them and starting an \f(CW\*(C`ev_timer\*(C'\fR watcher for any timeouts (many -libraries provide exactly this functionality). Then, in the check watcher, -you check for any events that occurred (by checking the pending status -of all watchers and stopping them) and call back into the library. The -I/O and timer callbacks will never actually be called (but must be valid -nevertheless, because you never know, you know?). -.PP -As another example, the Perl Coro module uses these hooks to integrate -coroutines into libev programs, by yielding to other active coroutines -during each prepare and only letting the process block if no coroutines -are ready to run (it's actually more complicated: it only runs coroutines -with priority higher than or equal to the event loop and one coroutine -of lower priority, but only once, using idle watchers to keep the event -loop from blocking if lower-priority coroutines are active, thus mapping -low-priority coroutines to idle/background tasks). -.PP -When used for this purpose, it is recommended to give \f(CW\*(C`ev_check\*(C'\fR watchers -highest (\f(CW\*(C`EV_MAXPRI\*(C'\fR) priority, to ensure that they are being run before -any other watchers after the poll (this doesn't matter for \f(CW\*(C`ev_prepare\*(C'\fR -watchers). -.PP -Also, \f(CW\*(C`ev_check\*(C'\fR watchers (and \f(CW\*(C`ev_prepare\*(C'\fR watchers, too) should not -activate (\*(L"feed\*(R") events into libev. While libev fully supports this, they -might get executed before other \f(CW\*(C`ev_check\*(C'\fR watchers did their job. As -\&\f(CW\*(C`ev_check\*(C'\fR watchers are often used to embed other (non-libev) event -loops those other event loops might be in an unusable state until their -\&\f(CW\*(C`ev_check\*(C'\fR watcher ran (always remind yourself to coexist peacefully with -others). -.PP -\fIAbusing an \f(CI\*(C`ev_check\*(C'\fI watcher for its side-effect\fR -.IX Subsection "Abusing an ev_check watcher for its side-effect" -.PP -\&\f(CW\*(C`ev_check\*(C'\fR (and less often also \f(CW\*(C`ev_prepare\*(C'\fR) watchers can also be -useful because they are called once per event loop iteration. For -example, if you want to handle a large number of connections fairly, you -normally only do a bit of work for each active connection, and if there -is more work to do, you wait for the next event loop iteration, so other -connections have a chance of making progress. -.PP -Using an \f(CW\*(C`ev_check\*(C'\fR watcher is almost enough: it will be called on the -next event loop iteration. However, that isn't as soon as possible \- -without external events, your \f(CW\*(C`ev_check\*(C'\fR watcher will not be invoked. -.PP -This is where \f(CW\*(C`ev_idle\*(C'\fR watchers come in handy \- all you need is a -single global idle watcher that is active as long as you have one active -\&\f(CW\*(C`ev_check\*(C'\fR watcher. The \f(CW\*(C`ev_idle\*(C'\fR watcher makes sure the event loop -will not sleep, and the \f(CW\*(C`ev_check\*(C'\fR watcher makes sure a callback gets -invoked. Neither watcher alone can do that. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_prepare_init (ev_prepare *, callback)" 4 -.IX Item "ev_prepare_init (ev_prepare *, callback)" -.PD 0 -.IP "ev_check_init (ev_check *, callback)" 4 -.IX Item "ev_check_init (ev_check *, callback)" -.PD -Initialises and configures the prepare or check watcher \- they have no -parameters of any kind. There are \f(CW\*(C`ev_prepare_set\*(C'\fR and \f(CW\*(C`ev_check_set\*(C'\fR -macros, but using them is utterly, utterly, utterly and completely -pointless. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -There are a number of principal ways to embed other event loops or modules -into libev. Here are some ideas on how to include libadns into libev -(there is a Perl module named \f(CW\*(C`EV::ADNS\*(C'\fR that does this, which you could -use as a working example. Another Perl module named \f(CW\*(C`EV::Glib\*(C'\fR embeds a -Glib main context into libev, and finally, \f(CW\*(C`Glib::EV\*(C'\fR embeds \s-1EV\s0 into the -Glib event loop). -.PP -Method 1: Add \s-1IO\s0 watchers and a timeout watcher in a prepare handler, -and in a check watcher, destroy them and call into libadns. What follows -is pseudo-code only of course. This requires you to either use a low -priority for the check watcher or use \f(CW\*(C`ev_clear_pending\*(C'\fR explicitly, as -the callbacks for the IO/timeout watchers might not have been called yet. -.PP -.Vb 2 -\& static ev_io iow [nfd]; -\& static ev_timer tw; -\& -\& static void -\& io_cb (struct ev_loop *loop, ev_io *w, int revents) -\& { -\& } -\& -\& // create io watchers for each fd and a timer before blocking -\& static void -\& adns_prepare_cb (struct ev_loop *loop, ev_prepare *w, int revents) -\& { -\& int timeout = 3600000; -\& struct pollfd fds [nfd]; -\& // actual code will need to loop here and realloc etc. -\& adns_beforepoll (ads, fds, &nfd, &timeout, timeval_from (ev_time ())); -\& -\& /* the callback is illegal, but won\*(Aqt be called as we stop during check */ -\& ev_timer_init (&tw, 0, timeout * 1e\-3, 0.); -\& ev_timer_start (loop, &tw); -\& -\& // create one ev_io per pollfd -\& for (int i = 0; i < nfd; ++i) -\& { -\& ev_io_init (iow + i, io_cb, fds [i].fd, -\& ((fds [i].events & POLLIN ? EV_READ : 0) -\& | (fds [i].events & POLLOUT ? EV_WRITE : 0))); -\& -\& fds [i].revents = 0; -\& ev_io_start (loop, iow + i); -\& } -\& } -\& -\& // stop all watchers after blocking -\& static void -\& adns_check_cb (struct ev_loop *loop, ev_check *w, int revents) -\& { -\& ev_timer_stop (loop, &tw); -\& -\& for (int i = 0; i < nfd; ++i) -\& { -\& // set the relevant poll flags -\& // could also call adns_processreadable etc. here -\& struct pollfd *fd = fds + i; -\& int revents = ev_clear_pending (iow + i); -\& if (revents & EV_READ ) fd\->revents |= fd\->events & POLLIN; -\& if (revents & EV_WRITE) fd\->revents |= fd\->events & POLLOUT; -\& -\& // now stop the watcher -\& ev_io_stop (loop, iow + i); -\& } -\& -\& adns_afterpoll (adns, fds, nfd, timeval_from (ev_now (loop)); -\& } -.Ve -.PP -Method 2: This would be just like method 1, but you run \f(CW\*(C`adns_afterpoll\*(C'\fR -in the prepare watcher and would dispose of the check watcher. -.PP -Method 3: If the module to be embedded supports explicit event -notification (libadns does), you can also make use of the actual watcher -callbacks, and only destroy/create the watchers in the prepare watcher. -.PP -.Vb 5 -\& static void -\& timer_cb (EV_P_ ev_timer *w, int revents) -\& { -\& adns_state ads = (adns_state)w\->data; -\& update_now (EV_A); -\& -\& adns_processtimeouts (ads, &tv_now); -\& } -\& -\& static void -\& io_cb (EV_P_ ev_io *w, int revents) -\& { -\& adns_state ads = (adns_state)w\->data; -\& update_now (EV_A); -\& -\& if (revents & EV_READ ) adns_processreadable (ads, w\->fd, &tv_now); -\& if (revents & EV_WRITE) adns_processwriteable (ads, w\->fd, &tv_now); -\& } -\& -\& // do not ever call adns_afterpoll -.Ve -.PP -Method 4: Do not use a prepare or check watcher because the module you -want to embed is not flexible enough to support it. Instead, you can -override their poll function. The drawback with this solution is that the -main loop is now no longer controllable by \s-1EV\s0. The \f(CW\*(C`Glib::EV\*(C'\fR module uses -this approach, effectively embedding \s-1EV\s0 as a client into the horrible -libglib event loop. -.PP -.Vb 4 -\& static gint -\& event_poll_func (GPollFD *fds, guint nfds, gint timeout) -\& { -\& int got_events = 0; -\& -\& for (n = 0; n < nfds; ++n) -\& // create/start io watcher that sets the relevant bits in fds[n] and increment got_events -\& -\& if (timeout >= 0) -\& // create/start timer -\& -\& // poll -\& ev_run (EV_A_ 0); -\& -\& // stop timer again -\& if (timeout >= 0) -\& ev_timer_stop (EV_A_ &to); -\& -\& // stop io watchers again \- their callbacks should have set -\& for (n = 0; n < nfds; ++n) -\& ev_io_stop (EV_A_ iow [n]); -\& -\& return got_events; -\& } -.Ve -.ie n .SS """ev_embed"" \- when one backend isn't enough..." -.el .SS "\f(CWev_embed\fP \- when one backend isn't enough..." -.IX Subsection "ev_embed - when one backend isn't enough..." -This is a rather advanced watcher type that lets you embed one event loop -into another (currently only \f(CW\*(C`ev_io\*(C'\fR events are supported in the embedded -loop, other types of watchers might be handled in a delayed or incorrect -fashion and must not be used). -.PP -There are primarily two reasons you would want that: work around bugs and -prioritise I/O. -.PP -As an example for a bug workaround, the kqueue backend might only support -sockets on some platform, so it is unusable as generic backend, but you -still want to make use of it because you have many sockets and it scales -so nicely. In this case, you would create a kqueue-based loop and embed -it into your default loop (which might use e.g. poll). Overall operation -will be a bit slower because first libev has to call \f(CW\*(C`poll\*(C'\fR and then -\&\f(CW\*(C`kevent\*(C'\fR, but at least you can use both mechanisms for what they are -best: \f(CW\*(C`kqueue\*(C'\fR for scalable sockets and \f(CW\*(C`poll\*(C'\fR if you want it to work :) -.PP -As for prioritising I/O: under rare circumstances you have the case where -some fds have to be watched and handled very quickly (with low latency), -and even priorities and idle watchers might have too much overhead. In -this case you would put all the high priority stuff in one loop and all -the rest in a second one, and embed the second one in the first. -.PP -As long as the watcher is active, the callback will be invoked every -time there might be events pending in the embedded loop. The callback -must then call \f(CW\*(C`ev_embed_sweep (mainloop, watcher)\*(C'\fR to make a single -sweep and invoke their callbacks (the callback doesn't need to invoke the -\&\f(CW\*(C`ev_embed_sweep\*(C'\fR function directly, it could also start an idle watcher -to give the embedded loop strictly lower priority for example). -.PP -You can also set the callback to \f(CW0\fR, in which case the embed watcher -will automatically execute the embedded loop sweep whenever necessary. -.PP -Fork detection will be handled transparently while the \f(CW\*(C`ev_embed\*(C'\fR watcher -is active, i.e., the embedded loop will automatically be forked when the -embedding loop forks. In other cases, the user is responsible for calling -\&\f(CW\*(C`ev_loop_fork\*(C'\fR on the embedded loop. -.PP -Unfortunately, not all backends are embeddable: only the ones returned by -\&\f(CW\*(C`ev_embeddable_backends\*(C'\fR are, which, unfortunately, does not include any -portable one. -.PP -So when you want to use this feature you will always have to be prepared -that you cannot get an embeddable loop. The recommended way to get around -this is to have a separate variables for your embeddable loop, try to -create it, and if that fails, use the normal loop for everything. -.PP -\fI\f(CI\*(C`ev_embed\*(C'\fI and fork\fR -.IX Subsection "ev_embed and fork" -.PP -While the \f(CW\*(C`ev_embed\*(C'\fR watcher is running, forks in the embedding loop will -automatically be applied to the embedded loop as well, so no special -fork handling is required in that case. When the watcher is not running, -however, it is still the task of the libev user to call \f(CW\*(C`ev_loop_fork ()\*(C'\fR -as applicable. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)" 4 -.IX Item "ev_embed_init (ev_embed *, callback, struct ev_loop *embedded_loop)" -.PD 0 -.IP "ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)" 4 -.IX Item "ev_embed_set (ev_embed *, struct ev_loop *embedded_loop)" -.PD -Configures the watcher to embed the given loop, which must be -embeddable. If the callback is \f(CW0\fR, then \f(CW\*(C`ev_embed_sweep\*(C'\fR will be -invoked automatically, otherwise it is the responsibility of the callback -to invoke it (it will continue to be called until the sweep has been done, -if you do not want that, you need to temporarily stop the embed watcher). -.IP "ev_embed_sweep (loop, ev_embed *)" 4 -.IX Item "ev_embed_sweep (loop, ev_embed *)" -Make a single, non-blocking sweep over the embedded loop. This works -similarly to \f(CW\*(C`ev_run (embedded_loop, EVRUN_NOWAIT)\*(C'\fR, but in the most -appropriate way for embedded loops. -.IP "struct ev_loop *other [read\-only]" 4 -.IX Item "struct ev_loop *other [read-only]" -The embedded event loop. -.PP -\fIExamples\fR -.IX Subsection "Examples" -.PP -Example: Try to get an embeddable event loop and embed it into the default -event loop. If that is not possible, use the default loop. The default -loop is stored in \f(CW\*(C`loop_hi\*(C'\fR, while the embeddable loop is stored in -\&\f(CW\*(C`loop_lo\*(C'\fR (which is \f(CW\*(C`loop_hi\*(C'\fR in the case no embeddable loop can be -used). -.PP -.Vb 3 -\& struct ev_loop *loop_hi = ev_default_init (0); -\& struct ev_loop *loop_lo = 0; -\& ev_embed embed; -\& -\& // see if there is a chance of getting one that works -\& // (remember that a flags value of 0 means autodetection) -\& loop_lo = ev_embeddable_backends () & ev_recommended_backends () -\& ? ev_loop_new (ev_embeddable_backends () & ev_recommended_backends ()) -\& : 0; -\& -\& // if we got one, then embed it, otherwise default to loop_hi -\& if (loop_lo) -\& { -\& ev_embed_init (&embed, 0, loop_lo); -\& ev_embed_start (loop_hi, &embed); -\& } -\& else -\& loop_lo = loop_hi; -.Ve -.PP -Example: Check if kqueue is available but not recommended and create -a kqueue backend for use with sockets (which usually work with any -kqueue implementation). Store the kqueue/socket\-only event loop in -\&\f(CW\*(C`loop_socket\*(C'\fR. (One might optionally use \f(CW\*(C`EVFLAG_NOENV\*(C'\fR, too). -.PP -.Vb 3 -\& struct ev_loop *loop = ev_default_init (0); -\& struct ev_loop *loop_socket = 0; -\& ev_embed embed; -\& -\& if (ev_supported_backends () & ~ev_recommended_backends () & EVBACKEND_KQUEUE) -\& if ((loop_socket = ev_loop_new (EVBACKEND_KQUEUE)) -\& { -\& ev_embed_init (&embed, 0, loop_socket); -\& ev_embed_start (loop, &embed); -\& } -\& -\& if (!loop_socket) -\& loop_socket = loop; -\& -\& // now use loop_socket for all sockets, and loop for everything else -.Ve -.ie n .SS """ev_fork"" \- the audacity to resume the event loop after a fork" -.el .SS "\f(CWev_fork\fP \- the audacity to resume the event loop after a fork" -.IX Subsection "ev_fork - the audacity to resume the event loop after a fork" -Fork watchers are called when a \f(CW\*(C`fork ()\*(C'\fR was detected (usually because -whoever is a good citizen cared to tell libev about it by calling -\&\f(CW\*(C`ev_loop_fork\*(C'\fR). The invocation is done before the event loop blocks next -and before \f(CW\*(C`ev_check\*(C'\fR watchers are being called, and only in the child -after the fork. If whoever good citizen calling \f(CW\*(C`ev_default_fork\*(C'\fR cheats -and calls it in the wrong process, the fork handlers will be invoked, too, -of course. -.PP -\fIThe special problem of life after fork \- how is it possible?\fR -.IX Subsection "The special problem of life after fork - how is it possible?" -.PP -Most uses of \f(CW\*(C`fork()\*(C'\fR consist of forking, then some simple calls to set -up/change the process environment, followed by a call to \f(CW\*(C`exec()\*(C'\fR. This -sequence should be handled by libev without any problems. -.PP -This changes when the application actually wants to do event handling -in the child, or both parent in child, in effect \*(L"continuing\*(R" after the -fork. -.PP -The default mode of operation (for libev, with application help to detect -forks) is to duplicate all the state in the child, as would be expected -when \fIeither\fR the parent \fIor\fR the child process continues. -.PP -When both processes want to continue using libev, then this is usually the -wrong result. In that case, usually one process (typically the parent) is -supposed to continue with all watchers in place as before, while the other -process typically wants to start fresh, i.e. without any active watchers. -.PP -The cleanest and most efficient way to achieve that with libev is to -simply create a new event loop, which of course will be \*(L"empty\*(R", and -use that for new watchers. This has the advantage of not touching more -memory than necessary, and thus avoiding the copy-on-write, and the -disadvantage of having to use multiple event loops (which do not support -signal watchers). -.PP -When this is not possible, or you want to use the default loop for -other reasons, then in the process that wants to start \*(L"fresh\*(R", call -\&\f(CW\*(C`ev_loop_destroy (EV_DEFAULT)\*(C'\fR followed by \f(CW\*(C`ev_default_loop (...)\*(C'\fR. -Destroying the default loop will \*(L"orphan\*(R" (not stop) all registered -watchers, so you have to be careful not to execute code that modifies -those watchers. Note also that in that case, you have to re-register any -signal watchers. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_fork_init (ev_fork *, callback)" 4 -.IX Item "ev_fork_init (ev_fork *, callback)" -Initialises and configures the fork watcher \- it has no parameters of any -kind. There is a \f(CW\*(C`ev_fork_set\*(C'\fR macro, but using it is utterly pointless, -really. -.ie n .SS """ev_cleanup"" \- even the best things end" -.el .SS "\f(CWev_cleanup\fP \- even the best things end" -.IX Subsection "ev_cleanup - even the best things end" -Cleanup watchers are called just before the event loop is being destroyed -by a call to \f(CW\*(C`ev_loop_destroy\*(C'\fR. -.PP -While there is no guarantee that the event loop gets destroyed, cleanup -watchers provide a convenient method to install cleanup hooks for your -program, worker threads and so on \- you just to make sure to destroy the -loop when you want them to be invoked. -.PP -Cleanup watchers are invoked in the same way as any other watcher. Unlike -all other watchers, they do not keep a reference to the event loop (which -makes a lot of sense if you think about it). Like all other watchers, you -can call libev functions in the callback, except \f(CW\*(C`ev_cleanup_start\*(C'\fR. -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_cleanup_init (ev_cleanup *, callback)" 4 -.IX Item "ev_cleanup_init (ev_cleanup *, callback)" -Initialises and configures the cleanup watcher \- it has no parameters of -any kind. There is a \f(CW\*(C`ev_cleanup_set\*(C'\fR macro, but using it is utterly -pointless, I assure you. -.PP -Example: Register an atexit handler to destroy the default loop, so any -cleanup functions are called. -.PP -.Vb 5 -\& static void -\& program_exits (void) -\& { -\& ev_loop_destroy (EV_DEFAULT_UC); -\& } -\& -\& ... -\& atexit (program_exits); -.Ve -.ie n .SS """ev_async"" \- how to wake up an event loop" -.el .SS "\f(CWev_async\fP \- how to wake up an event loop" -.IX Subsection "ev_async - how to wake up an event loop" -In general, you cannot use an \f(CW\*(C`ev_loop\*(C'\fR from multiple threads or other -asynchronous sources such as signal handlers (as opposed to multiple event -loops \- those are of course safe to use in different threads). -.PP -Sometimes, however, you need to wake up an event loop you do not control, -for example because it belongs to another thread. This is what \f(CW\*(C`ev_async\*(C'\fR -watchers do: as long as the \f(CW\*(C`ev_async\*(C'\fR watcher is active, you can signal -it by calling \f(CW\*(C`ev_async_send\*(C'\fR, which is thread\- and signal safe. -.PP -This functionality is very similar to \f(CW\*(C`ev_signal\*(C'\fR watchers, as signals, -too, are asynchronous in nature, and signals, too, will be compressed -(i.e. the number of callback invocations may be less than the number of -\&\f(CW\*(C`ev_async_send\*(C'\fR calls). In fact, you could use signal watchers as a kind -of \*(L"global async watchers\*(R" by using a watcher on an otherwise unused -signal, and \f(CW\*(C`ev_feed_signal\*(C'\fR to signal this watcher from another thread, -even without knowing which loop owns the signal. -.PP -\fIQueueing\fR -.IX Subsection "Queueing" -.PP -\&\f(CW\*(C`ev_async\*(C'\fR does not support queueing of data in any way. The reason -is that the author does not know of a simple (or any) algorithm for a -multiple-writer-single-reader queue that works in all cases and doesn't -need elaborate support such as pthreads or unportable memory access -semantics. -.PP -That means that if you want to queue data, you have to provide your own -queue. But at least I can tell you how to implement locking around your -queue: -.IP "queueing from a signal handler context" 4 -.IX Item "queueing from a signal handler context" -To implement race-free queueing, you simply add to the queue in the signal -handler but you block the signal handler in the watcher callback. Here is -an example that does that for some fictitious \s-1SIGUSR1\s0 handler: -.Sp -.Vb 1 -\& static ev_async mysig; -\& -\& static void -\& sigusr1_handler (void) -\& { -\& sometype data; -\& -\& // no locking etc. -\& queue_put (data); -\& ev_async_send (EV_DEFAULT_ &mysig); -\& } -\& -\& static void -\& mysig_cb (EV_P_ ev_async *w, int revents) -\& { -\& sometype data; -\& sigset_t block, prev; -\& -\& sigemptyset (&block); -\& sigaddset (&block, SIGUSR1); -\& sigprocmask (SIG_BLOCK, &block, &prev); -\& -\& while (queue_get (&data)) -\& process (data); -\& -\& if (sigismember (&prev, SIGUSR1) -\& sigprocmask (SIG_UNBLOCK, &block, 0); -\& } -.Ve -.Sp -(Note: pthreads in theory requires you to use \f(CW\*(C`pthread_setmask\*(C'\fR -instead of \f(CW\*(C`sigprocmask\*(C'\fR when you use threads, but libev doesn't do it -either...). -.IP "queueing from a thread context" 4 -.IX Item "queueing from a thread context" -The strategy for threads is different, as you cannot (easily) block -threads but you can easily preempt them, so to queue safely you need to -employ a traditional mutex lock, such as in this pthread example: -.Sp -.Vb 2 -\& static ev_async mysig; -\& static pthread_mutex_t mymutex = PTHREAD_MUTEX_INITIALIZER; -\& -\& static void -\& otherthread (void) -\& { -\& // only need to lock the actual queueing operation -\& pthread_mutex_lock (&mymutex); -\& queue_put (data); -\& pthread_mutex_unlock (&mymutex); -\& -\& ev_async_send (EV_DEFAULT_ &mysig); -\& } -\& -\& static void -\& mysig_cb (EV_P_ ev_async *w, int revents) -\& { -\& pthread_mutex_lock (&mymutex); -\& -\& while (queue_get (&data)) -\& process (data); -\& -\& pthread_mutex_unlock (&mymutex); -\& } -.Ve -.PP -\fIWatcher-Specific Functions and Data Members\fR -.IX Subsection "Watcher-Specific Functions and Data Members" -.IP "ev_async_init (ev_async *, callback)" 4 -.IX Item "ev_async_init (ev_async *, callback)" -Initialises and configures the async watcher \- it has no parameters of any -kind. There is a \f(CW\*(C`ev_async_set\*(C'\fR macro, but using it is utterly pointless, -trust me. -.IP "ev_async_send (loop, ev_async *)" 4 -.IX Item "ev_async_send (loop, ev_async *)" -Sends/signals/activates the given \f(CW\*(C`ev_async\*(C'\fR watcher, that is, feeds -an \f(CW\*(C`EV_ASYNC\*(C'\fR event on the watcher into the event loop, and instantly -returns. -.Sp -Unlike \f(CW\*(C`ev_feed_event\*(C'\fR, this call is safe to do from other threads, -signal or similar contexts (see the discussion of \f(CW\*(C`EV_ATOMIC_T\*(C'\fR in the -embedding section below on what exactly this means). -.Sp -Note that, as with other watchers in libev, multiple events might get -compressed into a single callback invocation (another way to look at -this is that \f(CW\*(C`ev_async\*(C'\fR watchers are level-triggered: they are set on -\&\f(CW\*(C`ev_async_send\*(C'\fR, reset when the event loop detects that). -.Sp -This call incurs the overhead of at most one extra system call per event -loop iteration, if the event loop is blocked, and no syscall at all if -the event loop (or your program) is processing events. That means that -repeated calls are basically free (there is no need to avoid calls for -performance reasons) and that the overhead becomes smaller (typically -zero) under load. -.IP "bool = ev_async_pending (ev_async *)" 4 -.IX Item "bool = ev_async_pending (ev_async *)" -Returns a non-zero value when \f(CW\*(C`ev_async_send\*(C'\fR has been called on the -watcher but the event has not yet been processed (or even noted) by the -event loop. -.Sp -\&\f(CW\*(C`ev_async_send\*(C'\fR sets a flag in the watcher and wakes up the loop. When -the loop iterates next and checks for the watcher to have become active, -it will reset the flag again. \f(CW\*(C`ev_async_pending\*(C'\fR can be used to very -quickly check whether invoking the loop might be a good idea. -.Sp -Not that this does \fInot\fR check whether the watcher itself is pending, -only whether it has been requested to make this watcher pending: there -is a time window between the event loop checking and resetting the async -notification, and the callback being invoked. -.SH "OTHER FUNCTIONS" -.IX Header "OTHER FUNCTIONS" -There are some other functions of possible interest. Described. Here. Now. -.IP "ev_once (loop, int fd, int events, ev_tstamp timeout, callback)" 4 -.IX Item "ev_once (loop, int fd, int events, ev_tstamp timeout, callback)" -This function combines a simple timer and an I/O watcher, calls your -callback on whichever event happens first and automatically stops both -watchers. This is useful if you want to wait for a single event on an fd -or timeout without having to allocate/configure/start/stop/free one or -more watchers yourself. -.Sp -If \f(CW\*(C`fd\*(C'\fR is less than 0, then no I/O watcher will be started and the -\&\f(CW\*(C`events\*(C'\fR argument is being ignored. Otherwise, an \f(CW\*(C`ev_io\*(C'\fR watcher for -the given \f(CW\*(C`fd\*(C'\fR and \f(CW\*(C`events\*(C'\fR set will be created and started. -.Sp -If \f(CW\*(C`timeout\*(C'\fR is less than 0, then no timeout watcher will be -started. Otherwise an \f(CW\*(C`ev_timer\*(C'\fR watcher with after = \f(CW\*(C`timeout\*(C'\fR (and -repeat = 0) will be started. \f(CW0\fR is a valid timeout. -.Sp -The callback has the type \f(CW\*(C`void (*cb)(int revents, void *arg)\*(C'\fR and is -passed an \f(CW\*(C`revents\*(C'\fR set like normal event callbacks (a combination of -\&\f(CW\*(C`EV_ERROR\*(C'\fR, \f(CW\*(C`EV_READ\*(C'\fR, \f(CW\*(C`EV_WRITE\*(C'\fR or \f(CW\*(C`EV_TIMER\*(C'\fR) and the \f(CW\*(C`arg\*(C'\fR -value passed to \f(CW\*(C`ev_once\*(C'\fR. Note that it is possible to receive \fIboth\fR -a timeout and an io event at the same time \- you probably should give io -events precedence. -.Sp -Example: wait up to ten seconds for data to appear on \s-1STDIN_FILENO\s0. -.Sp -.Vb 7 -\& static void stdin_ready (int revents, void *arg) -\& { -\& if (revents & EV_READ) -\& /* stdin might have data for us, joy! */; -\& else if (revents & EV_TIMER) -\& /* doh, nothing entered */; -\& } -\& -\& ev_once (STDIN_FILENO, EV_READ, 10., stdin_ready, 0); -.Ve -.IP "ev_feed_fd_event (loop, int fd, int revents)" 4 -.IX Item "ev_feed_fd_event (loop, int fd, int revents)" -Feed an event on the given fd, as if a file descriptor backend detected -the given events. -.IP "ev_feed_signal_event (loop, int signum)" 4 -.IX Item "ev_feed_signal_event (loop, int signum)" -Feed an event as if the given signal occurred. See also \f(CW\*(C`ev_feed_signal\*(C'\fR, -which is async-safe. -.SH "COMMON OR USEFUL IDIOMS (OR BOTH)" -.IX Header "COMMON OR USEFUL IDIOMS (OR BOTH)" -This section explains some common idioms that are not immediately -obvious. Note that examples are sprinkled over the whole manual, and this -section only contains stuff that wouldn't fit anywhere else. -.SS "\s-1ASSOCIATING\s0 \s-1CUSTOM\s0 \s-1DATA\s0 \s-1WITH\s0 A \s-1WATCHER\s0" -.IX Subsection "ASSOCIATING CUSTOM DATA WITH A WATCHER" -Each watcher has, by default, a \f(CW\*(C`void *data\*(C'\fR member that you can read -or modify at any time: libev will completely ignore it. This can be used -to associate arbitrary data with your watcher. If you need more data and -don't want to allocate memory separately and store a pointer to it in that -data member, you can also \*(L"subclass\*(R" the watcher type and provide your own -data: -.PP -.Vb 7 -\& struct my_io -\& { -\& ev_io io; -\& int otherfd; -\& void *somedata; -\& struct whatever *mostinteresting; -\& }; -\& -\& ... -\& struct my_io w; -\& ev_io_init (&w.io, my_cb, fd, EV_READ); -.Ve -.PP -And since your callback will be called with a pointer to the watcher, you -can cast it back to your own type: -.PP -.Vb 5 -\& static void my_cb (struct ev_loop *loop, ev_io *w_, int revents) -\& { -\& struct my_io *w = (struct my_io *)w_; -\& ... -\& } -.Ve -.PP -More interesting and less C\-conformant ways of casting your callback -function type instead have been omitted. -.SS "\s-1BUILDING\s0 \s-1YOUR\s0 \s-1OWN\s0 \s-1COMPOSITE\s0 \s-1WATCHERS\s0" -.IX Subsection "BUILDING YOUR OWN COMPOSITE WATCHERS" -Another common scenario is to use some data structure with multiple -embedded watchers, in effect creating your own watcher that combines -multiple libev event sources into one \*(L"super-watcher\*(R": -.PP -.Vb 6 -\& struct my_biggy -\& { -\& int some_data; -\& ev_timer t1; -\& ev_timer t2; -\& } -.Ve -.PP -In this case getting the pointer to \f(CW\*(C`my_biggy\*(C'\fR is a bit more -complicated: Either you store the address of your \f(CW\*(C`my_biggy\*(C'\fR struct in -the \f(CW\*(C`data\*(C'\fR member of the watcher (for woozies or \*(C+ coders), or you need -to use some pointer arithmetic using \f(CW\*(C`offsetof\*(C'\fR inside your watchers (for -real programmers): -.PP -.Vb 1 -\& #include -\& -\& static void -\& t1_cb (EV_P_ ev_timer *w, int revents) -\& { -\& struct my_biggy big = (struct my_biggy *) -\& (((char *)w) \- offsetof (struct my_biggy, t1)); -\& } -\& -\& static void -\& t2_cb (EV_P_ ev_timer *w, int revents) -\& { -\& struct my_biggy big = (struct my_biggy *) -\& (((char *)w) \- offsetof (struct my_biggy, t2)); -\& } -.Ve -.SS "\s-1AVOIDING\s0 \s-1FINISHING\s0 \s-1BEFORE\s0 \s-1RETURNING\s0" -.IX Subsection "AVOIDING FINISHING BEFORE RETURNING" -Often you have structures like this in event-based programs: -.PP -.Vb 4 -\& callback () -\& { -\& free (request); -\& } -\& -\& request = start_new_request (..., callback); -.Ve -.PP -The intent is to start some \*(L"lengthy\*(R" operation. The \f(CW\*(C`request\*(C'\fR could be -used to cancel the operation, or do other things with it. -.PP -It's not uncommon to have code paths in \f(CW\*(C`start_new_request\*(C'\fR that -immediately invoke the callback, for example, to report errors. Or you add -some caching layer that finds that it can skip the lengthy aspects of the -operation and simply invoke the callback with the result. -.PP -The problem here is that this will happen \fIbefore\fR \f(CW\*(C`start_new_request\*(C'\fR -has returned, so \f(CW\*(C`request\*(C'\fR is not set. -.PP -Even if you pass the request by some safer means to the callback, you -might want to do something to the request after starting it, such as -canceling it, which probably isn't working so well when the callback has -already been invoked. -.PP -A common way around all these issues is to make sure that -\&\f(CW\*(C`start_new_request\*(C'\fR \fIalways\fR returns before the callback is invoked. If -\&\f(CW\*(C`start_new_request\*(C'\fR immediately knows the result, it can artificially -delay invoking the callback by using a \f(CW\*(C`prepare\*(C'\fR or \f(CW\*(C`idle\*(C'\fR watcher for -example, or more sneakily, by reusing an existing (stopped) watcher and -pushing it into the pending queue: -.PP -.Vb 2 -\& ev_set_cb (watcher, callback); -\& ev_feed_event (EV_A_ watcher, 0); -.Ve -.PP -This way, \f(CW\*(C`start_new_request\*(C'\fR can safely return before the callback is -invoked, while not delaying callback invocation too much. -.SS "\s-1MODEL/NESTED\s0 \s-1EVENT\s0 \s-1LOOP\s0 \s-1INVOCATIONS\s0 \s-1AND\s0 \s-1EXIT\s0 \s-1CONDITIONS\s0" -.IX Subsection "MODEL/NESTED EVENT LOOP INVOCATIONS AND EXIT CONDITIONS" -Often (especially in \s-1GUI\s0 toolkits) there are places where you have -\&\fImodal\fR interaction, which is most easily implemented by recursively -invoking \f(CW\*(C`ev_run\*(C'\fR. -.PP -This brings the problem of exiting \- a callback might want to finish the -main \f(CW\*(C`ev_run\*(C'\fR call, but not the nested one (e.g. user clicked \*(L"Quit\*(R", but -a modal \*(L"Are you sure?\*(R" dialog is still waiting), or just the nested one -and not the main one (e.g. user clocked \*(L"Ok\*(R" in a modal dialog), or some -other combination: In these cases, a simple \f(CW\*(C`ev_break\*(C'\fR will not work. -.PP -The solution is to maintain \*(L"break this loop\*(R" variable for each \f(CW\*(C`ev_run\*(C'\fR -invocation, and use a loop around \f(CW\*(C`ev_run\*(C'\fR until the condition is -triggered, using \f(CW\*(C`EVRUN_ONCE\*(C'\fR: -.PP -.Vb 2 -\& // main loop -\& int exit_main_loop = 0; -\& -\& while (!exit_main_loop) -\& ev_run (EV_DEFAULT_ EVRUN_ONCE); -\& -\& // in a modal watcher -\& int exit_nested_loop = 0; -\& -\& while (!exit_nested_loop) -\& ev_run (EV_A_ EVRUN_ONCE); -.Ve -.PP -To exit from any of these loops, just set the corresponding exit variable: -.PP -.Vb 2 -\& // exit modal loop -\& exit_nested_loop = 1; -\& -\& // exit main program, after modal loop is finished -\& exit_main_loop = 1; -\& -\& // exit both -\& exit_main_loop = exit_nested_loop = 1; -.Ve -.SS "\s-1THREAD\s0 \s-1LOCKING\s0 \s-1EXAMPLE\s0" -.IX Subsection "THREAD LOCKING EXAMPLE" -Here is a fictitious example of how to run an event loop in a different -thread from where callbacks are being invoked and watchers are -created/added/removed. -.PP -For a real-world example, see the \f(CW\*(C`EV::Loop::Async\*(C'\fR perl module, -which uses exactly this technique (which is suited for many high-level -languages). -.PP -The example uses a pthread mutex to protect the loop data, a condition -variable to wait for callback invocations, an async watcher to notify the -event loop thread and an unspecified mechanism to wake up the main thread. -.PP -First, you need to associate some data with the event loop: -.PP -.Vb 6 -\& typedef struct { -\& mutex_t lock; /* global loop lock */ -\& ev_async async_w; -\& thread_t tid; -\& cond_t invoke_cv; -\& } userdata; -\& -\& void prepare_loop (EV_P) -\& { -\& // for simplicity, we use a static userdata struct. -\& static userdata u; -\& -\& ev_async_init (&u\->async_w, async_cb); -\& ev_async_start (EV_A_ &u\->async_w); -\& -\& pthread_mutex_init (&u\->lock, 0); -\& pthread_cond_init (&u\->invoke_cv, 0); -\& -\& // now associate this with the loop -\& ev_set_userdata (EV_A_ u); -\& ev_set_invoke_pending_cb (EV_A_ l_invoke); -\& ev_set_loop_release_cb (EV_A_ l_release, l_acquire); -\& -\& // then create the thread running ev_run -\& pthread_create (&u\->tid, 0, l_run, EV_A); -\& } -.Ve -.PP -The callback for the \f(CW\*(C`ev_async\*(C'\fR watcher does nothing: the watcher is used -solely to wake up the event loop so it takes notice of any new watchers -that might have been added: -.PP -.Vb 5 -\& static void -\& async_cb (EV_P_ ev_async *w, int revents) -\& { -\& // just used for the side effects -\& } -.Ve -.PP -The \f(CW\*(C`l_release\*(C'\fR and \f(CW\*(C`l_acquire\*(C'\fR callbacks simply unlock/lock the mutex -protecting the loop data, respectively. -.PP -.Vb 6 -\& static void -\& l_release (EV_P) -\& { -\& userdata *u = ev_userdata (EV_A); -\& pthread_mutex_unlock (&u\->lock); -\& } -\& -\& static void -\& l_acquire (EV_P) -\& { -\& userdata *u = ev_userdata (EV_A); -\& pthread_mutex_lock (&u\->lock); -\& } -.Ve -.PP -The event loop thread first acquires the mutex, and then jumps straight -into \f(CW\*(C`ev_run\*(C'\fR: -.PP -.Vb 4 -\& void * -\& l_run (void *thr_arg) -\& { -\& struct ev_loop *loop = (struct ev_loop *)thr_arg; -\& -\& l_acquire (EV_A); -\& pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, 0); -\& ev_run (EV_A_ 0); -\& l_release (EV_A); -\& -\& return 0; -\& } -.Ve -.PP -Instead of invoking all pending watchers, the \f(CW\*(C`l_invoke\*(C'\fR callback will -signal the main thread via some unspecified mechanism (signals? pipe -writes? \f(CW\*(C`Async::Interrupt\*(C'\fR?) and then waits until all pending watchers -have been called (in a while loop because a) spurious wakeups are possible -and b) skipping inter-thread-communication when there are no pending -watchers is very beneficial): -.PP -.Vb 4 -\& static void -\& l_invoke (EV_P) -\& { -\& userdata *u = ev_userdata (EV_A); -\& -\& while (ev_pending_count (EV_A)) -\& { -\& wake_up_other_thread_in_some_magic_or_not_so_magic_way (); -\& pthread_cond_wait (&u\->invoke_cv, &u\->lock); -\& } -\& } -.Ve -.PP -Now, whenever the main thread gets told to invoke pending watchers, it -will grab the lock, call \f(CW\*(C`ev_invoke_pending\*(C'\fR and then signal the loop -thread to continue: -.PP -.Vb 4 -\& static void -\& real_invoke_pending (EV_P) -\& { -\& userdata *u = ev_userdata (EV_A); -\& -\& pthread_mutex_lock (&u\->lock); -\& ev_invoke_pending (EV_A); -\& pthread_cond_signal (&u\->invoke_cv); -\& pthread_mutex_unlock (&u\->lock); -\& } -.Ve -.PP -Whenever you want to start/stop a watcher or do other modifications to an -event loop, you will now have to lock: -.PP -.Vb 2 -\& ev_timer timeout_watcher; -\& userdata *u = ev_userdata (EV_A); -\& -\& ev_timer_init (&timeout_watcher, timeout_cb, 5.5, 0.); -\& -\& pthread_mutex_lock (&u\->lock); -\& ev_timer_start (EV_A_ &timeout_watcher); -\& ev_async_send (EV_A_ &u\->async_w); -\& pthread_mutex_unlock (&u\->lock); -.Ve -.PP -Note that sending the \f(CW\*(C`ev_async\*(C'\fR watcher is required because otherwise -an event loop currently blocking in the kernel will have no knowledge -about the newly added timer. By waking up the loop it will pick up any new -watchers in the next event loop iteration. -.SS "\s-1THREADS\s0, \s-1COROUTINES\s0, \s-1CONTINUATIONS\s0, \s-1QUEUES\s0... \s-1INSTEAD\s0 \s-1OF\s0 \s-1CALLBACKS\s0" -.IX Subsection "THREADS, COROUTINES, CONTINUATIONS, QUEUES... INSTEAD OF CALLBACKS" -While the overhead of a callback that e.g. schedules a thread is small, it -is still an overhead. If you embed libev, and your main usage is with some -kind of threads or coroutines, you might want to customise libev so that -doesn't need callbacks anymore. -.PP -Imagine you have coroutines that you can switch to using a function -\&\f(CW\*(C`switch_to (coro)\*(C'\fR, that libev runs in a coroutine called \f(CW\*(C`libev_coro\*(C'\fR -and that due to some magic, the currently active coroutine is stored in a -global called \f(CW\*(C`current_coro\*(C'\fR. Then you can build your own \*(L"wait for libev -event\*(R" primitive by changing \f(CW\*(C`EV_CB_DECLARE\*(C'\fR and \f(CW\*(C`EV_CB_INVOKE\*(C'\fR (note -the differing \f(CW\*(C`;\*(C'\fR conventions): -.PP -.Vb 2 -\& #define EV_CB_DECLARE(type) struct my_coro *cb; -\& #define EV_CB_INVOKE(watcher) switch_to ((watcher)\->cb) -.Ve -.PP -That means instead of having a C callback function, you store the -coroutine to switch to in each watcher, and instead of having libev call -your callback, you instead have it switch to that coroutine. -.PP -A coroutine might now wait for an event with a function called -\&\f(CW\*(C`wait_for_event\*(C'\fR. (the watcher needs to be started, as always, but it doesn't -matter when, or whether the watcher is active or not when this function is -called): -.PP -.Vb 6 -\& void -\& wait_for_event (ev_watcher *w) -\& { -\& ev_set_cb (w, current_coro); -\& switch_to (libev_coro); -\& } -.Ve -.PP -That basically suspends the coroutine inside \f(CW\*(C`wait_for_event\*(C'\fR and -continues the libev coroutine, which, when appropriate, switches back to -this or any other coroutine. -.PP -You can do similar tricks if you have, say, threads with an event queue \- -instead of storing a coroutine, you store the queue object and instead of -switching to a coroutine, you push the watcher onto the queue and notify -any waiters. -.PP -To embed libev, see \*(L"\s-1EMBEDDING\s0\*(R", but in short, it's easiest to create two -files, \fImy_ev.h\fR and \fImy_ev.c\fR that include the respective libev files: -.PP -.Vb 4 -\& // my_ev.h -\& #define EV_CB_DECLARE(type) struct my_coro *cb; -\& #define EV_CB_INVOKE(watcher) switch_to ((watcher)\->cb); -\& #include "../libev/ev.h" -\& -\& // my_ev.c -\& #define EV_H "my_ev.h" -\& #include "../libev/ev.c" -.Ve -.PP -And then use \fImy_ev.h\fR when you would normally use \fIev.h\fR, and compile -\&\fImy_ev.c\fR into your project. When properly specifying include paths, you -can even use \fIev.h\fR as header file name directly. -.SH "LIBEVENT EMULATION" -.IX Header "LIBEVENT EMULATION" -Libev offers a compatibility emulation layer for libevent. It cannot -emulate the internals of libevent, so here are some usage hints: -.IP "\(bu" 4 -Only the libevent\-1.4.1\-beta \s-1API\s0 is being emulated. -.Sp -This was the newest libevent version available when libev was implemented, -and is still mostly unchanged in 2010. -.IP "\(bu" 4 -Use it by including , as usual. -.IP "\(bu" 4 -The following members are fully supported: ev_base, ev_callback, -ev_arg, ev_fd, ev_res, ev_events. -.IP "\(bu" 4 -Avoid using ev_flags and the EVLIST_*\-macros, while it is -maintained by libev, it does not work exactly the same way as in libevent (consider -it a private \s-1API\s0). -.IP "\(bu" 4 -Priorities are not currently supported. Initialising priorities -will fail and all watchers will have the same priority, even though there -is an ev_pri field. -.IP "\(bu" 4 -In libevent, the last base created gets the signals, in libev, the -base that registered the signal gets the signals. -.IP "\(bu" 4 -Other members are not supported. -.IP "\(bu" 4 -The libev emulation is \fInot\fR \s-1ABI\s0 compatible to libevent, you need -to use the libev header file and library. -.SH "\*(C+ SUPPORT" -.IX Header " SUPPORT" -.SS "C \s-1API\s0" -.IX Subsection "C API" -The normal C \s-1API\s0 should work fine when used from \*(C+: both ev.h and the -libev sources can be compiled as \*(C+. Therefore, code that uses the C \s-1API\s0 -will work fine. -.PP -Proper exception specifications might have to be added to callbacks passed -to libev: exceptions may be thrown only from watcher callbacks, all -other callbacks (allocator, syserr, loop acquire/release and periodic -reschedule callbacks) must not throw exceptions, and might need a \f(CW\*(C`throw -()\*(C'\fR specification. If you have code that needs to be compiled as both C -and \*(C+ you can use the \f(CW\*(C`EV_THROW\*(C'\fR macro for this: -.PP -.Vb 6 -\& static void -\& fatal_error (const char *msg) EV_THROW -\& { -\& perror (msg); -\& abort (); -\& } -\& -\& ... -\& ev_set_syserr_cb (fatal_error); -.Ve -.PP -The only \s-1API\s0 functions that can currently throw exceptions are \f(CW\*(C`ev_run\*(C'\fR, -\&\f(CW\*(C`ev_invoke\*(C'\fR, \f(CW\*(C`ev_invoke_pending\*(C'\fR and \f(CW\*(C`ev_loop_destroy\*(C'\fR (the latter -because it runs cleanup watchers). -.PP -Throwing exceptions in watcher callbacks is only supported if libev itself -is compiled with a \*(C+ compiler or your C and \*(C+ environments allow -throwing exceptions through C libraries (most do). -.SS "\*(C+ \s-1API\s0" -.IX Subsection " API" -Libev comes with some simplistic wrapper classes for \*(C+ that mainly allow -you to use some convenience methods to start/stop watchers and also change -the callback model to a model using method callbacks on objects. -.PP -To use it, -.PP -.Vb 1 -\& #include -.Ve -.PP -This automatically includes \fIev.h\fR and puts all of its definitions (many -of them macros) into the global namespace. All \*(C+ specific things are -put into the \f(CW\*(C`ev\*(C'\fR namespace. It should support all the same embedding -options as \fIev.h\fR, most notably \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR. -.PP -Care has been taken to keep the overhead low. The only data member the \*(C+ -classes add (compared to plain C\-style watchers) is the event loop pointer -that the watcher is associated with (or no additional members at all if -you disable \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR when embedding libev). -.PP -Currently, functions, static and non-static member functions and classes -with \f(CW\*(C`operator ()\*(C'\fR can be used as callbacks. Other types should be easy -to add as long as they only need one additional pointer for context. If -you need support for other types of functors please contact the author -(preferably after implementing it). -.PP -For all this to work, your \*(C+ compiler either has to use the same calling -conventions as your C compiler (for static member functions), or you have -to embed libev and compile libev itself as \*(C+. -.PP -Here is a list of things available in the \f(CW\*(C`ev\*(C'\fR namespace: -.ie n .IP """ev::READ"", ""ev::WRITE"" etc." 4 -.el .IP "\f(CWev::READ\fR, \f(CWev::WRITE\fR etc." 4 -.IX Item "ev::READ, ev::WRITE etc." -These are just enum values with the same values as the \f(CW\*(C`EV_READ\*(C'\fR etc. -macros from \fIev.h\fR. -.ie n .IP """ev::tstamp"", ""ev::now""" 4 -.el .IP "\f(CWev::tstamp\fR, \f(CWev::now\fR" 4 -.IX Item "ev::tstamp, ev::now" -Aliases to the same types/functions as with the \f(CW\*(C`ev_\*(C'\fR prefix. -.ie n .IP """ev::io"", ""ev::timer"", ""ev::periodic"", ""ev::idle"", ""ev::sig"" etc." 4 -.el .IP "\f(CWev::io\fR, \f(CWev::timer\fR, \f(CWev::periodic\fR, \f(CWev::idle\fR, \f(CWev::sig\fR etc." 4 -.IX Item "ev::io, ev::timer, ev::periodic, ev::idle, ev::sig etc." -For each \f(CW\*(C`ev_TYPE\*(C'\fR watcher in \fIev.h\fR there is a corresponding class of -the same name in the \f(CW\*(C`ev\*(C'\fR namespace, with the exception of \f(CW\*(C`ev_signal\*(C'\fR -which is called \f(CW\*(C`ev::sig\*(C'\fR to avoid clashes with the \f(CW\*(C`signal\*(C'\fR macro -defined by many implementations. -.Sp -All of those classes have these methods: -.RS 4 -.IP "ev::TYPE::TYPE ()" 4 -.IX Item "ev::TYPE::TYPE ()" -.PD 0 -.IP "ev::TYPE::TYPE (loop)" 4 -.IX Item "ev::TYPE::TYPE (loop)" -.IP "ev::TYPE::~TYPE" 4 -.IX Item "ev::TYPE::~TYPE" -.PD -The constructor (optionally) takes an event loop to associate the watcher -with. If it is omitted, it will use \f(CW\*(C`EV_DEFAULT\*(C'\fR. -.Sp -The constructor calls \f(CW\*(C`ev_init\*(C'\fR for you, which means you have to call the -\&\f(CW\*(C`set\*(C'\fR method before starting it. -.Sp -It will not set a callback, however: You have to call the templated \f(CW\*(C`set\*(C'\fR -method to set a callback before you can start the watcher. -.Sp -(The reason why you have to use a method is a limitation in \*(C+ which does -not allow explicit template arguments for constructors). -.Sp -The destructor automatically stops the watcher if it is active. -.IP "w\->set (object *)" 4 -.IX Item "w->set (object *)" -This method sets the callback method to call. The method has to have a -signature of \f(CW\*(C`void (*)(ev_TYPE &, int)\*(C'\fR, it receives the watcher as -first argument and the \f(CW\*(C`revents\*(C'\fR as second. The object must be given as -parameter and is stored in the \f(CW\*(C`data\*(C'\fR member of the watcher. -.Sp -This method synthesizes efficient thunking code to call your method from -the C callback that libev requires. If your compiler can inline your -callback (i.e. it is visible to it at the place of the \f(CW\*(C`set\*(C'\fR call and -your compiler is good :), then the method will be fully inlined into the -thunking function, making it as fast as a direct C callback. -.Sp -Example: simple class declaration and watcher initialisation -.Sp -.Vb 4 -\& struct myclass -\& { -\& void io_cb (ev::io &w, int revents) { } -\& } -\& -\& myclass obj; -\& ev::io iow; -\& iow.set (&obj); -.Ve -.IP "w\->set (object *)" 4 -.IX Item "w->set (object *)" -This is a variation of a method callback \- leaving out the method to call -will default the method to \f(CW\*(C`operator ()\*(C'\fR, which makes it possible to use -functor objects without having to manually specify the \f(CW\*(C`operator ()\*(C'\fR all -the time. Incidentally, you can then also leave out the template argument -list. -.Sp -The \f(CW\*(C`operator ()\*(C'\fR method prototype must be \f(CW\*(C`void operator ()(watcher &w, -int revents)\*(C'\fR. -.Sp -See the method\-\f(CW\*(C`set\*(C'\fR above for more details. -.Sp -Example: use a functor object as callback. -.Sp -.Vb 7 -\& struct myfunctor -\& { -\& void operator() (ev::io &w, int revents) -\& { -\& ... -\& } -\& } -\& -\& myfunctor f; -\& -\& ev::io w; -\& w.set (&f); -.Ve -.IP "w\->set (void *data = 0)" 4 -.IX Item "w->set (void *data = 0)" -Also sets a callback, but uses a static method or plain function as -callback. The optional \f(CW\*(C`data\*(C'\fR argument will be stored in the watcher's -\&\f(CW\*(C`data\*(C'\fR member and is free for you to use. -.Sp -The prototype of the \f(CW\*(C`function\*(C'\fR must be \f(CW\*(C`void (*)(ev::TYPE &w, int)\*(C'\fR. -.Sp -See the method\-\f(CW\*(C`set\*(C'\fR above for more details. -.Sp -Example: Use a plain function as callback. -.Sp -.Vb 2 -\& static void io_cb (ev::io &w, int revents) { } -\& iow.set (); -.Ve -.IP "w\->set (loop)" 4 -.IX Item "w->set (loop)" -Associates a different \f(CW\*(C`struct ev_loop\*(C'\fR with this watcher. You can only -do this when the watcher is inactive (and not pending either). -.IP "w\->set ([arguments])" 4 -.IX Item "w->set ([arguments])" -Basically the same as \f(CW\*(C`ev_TYPE_set\*(C'\fR (except for \f(CW\*(C`ev::embed\*(C'\fR watchers>), -with the same arguments. Either this method or a suitable start method -must be called at least once. Unlike the C counterpart, an active watcher -gets automatically stopped and restarted when reconfiguring it with this -method. -.Sp -For \f(CW\*(C`ev::embed\*(C'\fR watchers this method is called \f(CW\*(C`set_embed\*(C'\fR, to avoid -clashing with the \f(CW\*(C`set (loop)\*(C'\fR method. -.IP "w\->start ()" 4 -.IX Item "w->start ()" -Starts the watcher. Note that there is no \f(CW\*(C`loop\*(C'\fR argument, as the -constructor already stores the event loop. -.IP "w\->start ([arguments])" 4 -.IX Item "w->start ([arguments])" -Instead of calling \f(CW\*(C`set\*(C'\fR and \f(CW\*(C`start\*(C'\fR methods separately, it is often -convenient to wrap them in one call. Uses the same type of arguments as -the configure \f(CW\*(C`set\*(C'\fR method of the watcher. -.IP "w\->stop ()" 4 -.IX Item "w->stop ()" -Stops the watcher if it is active. Again, no \f(CW\*(C`loop\*(C'\fR argument. -.ie n .IP "w\->again () (""ev::timer"", ""ev::periodic"" only)" 4 -.el .IP "w\->again () (\f(CWev::timer\fR, \f(CWev::periodic\fR only)" 4 -.IX Item "w->again () (ev::timer, ev::periodic only)" -For \f(CW\*(C`ev::timer\*(C'\fR and \f(CW\*(C`ev::periodic\*(C'\fR, this invokes the corresponding -\&\f(CW\*(C`ev_TYPE_again\*(C'\fR function. -.ie n .IP "w\->sweep () (""ev::embed"" only)" 4 -.el .IP "w\->sweep () (\f(CWev::embed\fR only)" 4 -.IX Item "w->sweep () (ev::embed only)" -Invokes \f(CW\*(C`ev_embed_sweep\*(C'\fR. -.ie n .IP "w\->update () (""ev::stat"" only)" 4 -.el .IP "w\->update () (\f(CWev::stat\fR only)" 4 -.IX Item "w->update () (ev::stat only)" -Invokes \f(CW\*(C`ev_stat_stat\*(C'\fR. -.RE -.RS 4 -.RE -.PP -Example: Define a class with two I/O and idle watchers, start the I/O -watchers in the constructor. -.PP -.Vb 5 -\& class myclass -\& { -\& ev::io io ; void io_cb (ev::io &w, int revents); -\& ev::io io2 ; void io2_cb (ev::io &w, int revents); -\& ev::idle idle; void idle_cb (ev::idle &w, int revents); -\& -\& myclass (int fd) -\& { -\& io .set (this); -\& io2 .set (this); -\& idle.set (this); -\& -\& io.set (fd, ev::WRITE); // configure the watcher -\& io.start (); // start it whenever convenient -\& -\& io2.start (fd, ev::READ); // set + start in one call -\& } -\& }; -.Ve -.SH "OTHER LANGUAGE BINDINGS" -.IX Header "OTHER LANGUAGE BINDINGS" -Libev does not offer other language bindings itself, but bindings for a -number of languages exist in the form of third-party packages. If you know -any interesting language binding in addition to the ones listed here, drop -me a note. -.IP "Perl" 4 -.IX Item "Perl" -The \s-1EV\s0 module implements the full libev \s-1API\s0 and is actually used to test -libev. \s-1EV\s0 is developed together with libev. Apart from the \s-1EV\s0 core module, -there are additional modules that implement libev-compatible interfaces -to \f(CW\*(C`libadns\*(C'\fR (\f(CW\*(C`EV::ADNS\*(C'\fR, but \f(CW\*(C`AnyEvent::DNS\*(C'\fR is preferred nowadays), -\&\f(CW\*(C`Net::SNMP\*(C'\fR (\f(CW\*(C`Net::SNMP::EV\*(C'\fR) and the \f(CW\*(C`libglib\*(C'\fR event core (\f(CW\*(C`Glib::EV\*(C'\fR -and \f(CW\*(C`EV::Glib\*(C'\fR). -.Sp -It can be found and installed via \s-1CPAN\s0, its homepage is at -. -.IP "Python" 4 -.IX Item "Python" -Python bindings can be found at . It -seems to be quite complete and well-documented. -.IP "Ruby" 4 -.IX Item "Ruby" -Tony Arcieri has written a ruby extension that offers access to a subset -of the libev \s-1API\s0 and adds file handle abstractions, asynchronous \s-1DNS\s0 and -more on top of it. It can be found via gem servers. Its homepage is at -. -.Sp -Roger Pack reports that using the link order \f(CW\*(C`\-lws2_32 \-lmsvcrt\-ruby\-190\*(C'\fR -makes rev work even on mingw. -.IP "Haskell" 4 -.IX Item "Haskell" -A haskell binding to libev is available at -http://hackage.haskell.org/cgi\-bin/hackage\-scripts/package/hlibev . -.IP "D" 4 -.IX Item "D" -Leandro Lucarella has written a D language binding (\fIev.d\fR) for libev, to -be found at . -.IP "Ocaml" 4 -.IX Item "Ocaml" -Erkki Seppala has written Ocaml bindings for libev, to be found at -http://modeemi.cs.tut.fi/~flux/software/ocaml\-ev/ . -.IP "Lua" 4 -.IX Item "Lua" -Brian Maher has written a partial interface to libev for lua (at the -time of this writing, only \f(CW\*(C`ev_io\*(C'\fR and \f(CW\*(C`ev_timer\*(C'\fR), to be found at -http://github.com/brimworks/lua\-ev . -.IP "Javascript" 4 -.IX Item "Javascript" -Node.js () uses libev as the underlying event library. -.IP "Others" 4 -.IX Item "Others" -There are others, and I stopped counting. -.SH "MACRO MAGIC" -.IX Header "MACRO MAGIC" -Libev can be compiled with a variety of options, the most fundamental -of which is \f(CW\*(C`EV_MULTIPLICITY\*(C'\fR. This option determines whether (most) -functions and callbacks have an initial \f(CW\*(C`struct ev_loop *\*(C'\fR argument. -.PP -To make it easier to write programs that cope with either variant, the -following macros are defined: -.ie n .IP """EV_A"", ""EV_A_""" 4 -.el .IP "\f(CWEV_A\fR, \f(CWEV_A_\fR" 4 -.IX Item "EV_A, EV_A_" -This provides the loop \fIargument\fR for functions, if one is required (\*(L"ev -loop argument\*(R"). The \f(CW\*(C`EV_A\*(C'\fR form is used when this is the sole argument, -\&\f(CW\*(C`EV_A_\*(C'\fR is used when other arguments are following. Example: -.Sp -.Vb 3 -\& ev_unref (EV_A); -\& ev_timer_add (EV_A_ watcher); -\& ev_run (EV_A_ 0); -.Ve -.Sp -It assumes the variable \f(CW\*(C`loop\*(C'\fR of type \f(CW\*(C`struct ev_loop *\*(C'\fR is in scope, -which is often provided by the following macro. -.ie n .IP """EV_P"", ""EV_P_""" 4 -.el .IP "\f(CWEV_P\fR, \f(CWEV_P_\fR" 4 -.IX Item "EV_P, EV_P_" -This provides the loop \fIparameter\fR for functions, if one is required (\*(L"ev -loop parameter\*(R"). The \f(CW\*(C`EV_P\*(C'\fR form is used when this is the sole parameter, -\&\f(CW\*(C`EV_P_\*(C'\fR is used when other parameters are following. Example: -.Sp -.Vb 2 -\& // this is how ev_unref is being declared -\& static void ev_unref (EV_P); -\& -\& // this is how you can declare your typical callback -\& static void cb (EV_P_ ev_timer *w, int revents) -.Ve -.Sp -It declares a parameter \f(CW\*(C`loop\*(C'\fR of type \f(CW\*(C`struct ev_loop *\*(C'\fR, quite -suitable for use with \f(CW\*(C`EV_A\*(C'\fR. -.ie n .IP """EV_DEFAULT"", ""EV_DEFAULT_""" 4 -.el .IP "\f(CWEV_DEFAULT\fR, \f(CWEV_DEFAULT_\fR" 4 -.IX Item "EV_DEFAULT, EV_DEFAULT_" -Similar to the other two macros, this gives you the value of the default -loop, if multiple loops are supported (\*(L"ev loop default\*(R"). The default loop -will be initialised if it isn't already initialised. -.Sp -For non-multiplicity builds, these macros do nothing, so you always have -to initialise the loop somewhere. -.ie n .IP """EV_DEFAULT_UC"", ""EV_DEFAULT_UC_""" 4 -.el .IP "\f(CWEV_DEFAULT_UC\fR, \f(CWEV_DEFAULT_UC_\fR" 4 -.IX Item "EV_DEFAULT_UC, EV_DEFAULT_UC_" -Usage identical to \f(CW\*(C`EV_DEFAULT\*(C'\fR and \f(CW\*(C`EV_DEFAULT_\*(C'\fR, but requires that the -default loop has been initialised (\f(CW\*(C`UC\*(C'\fR == unchecked). Their behaviour -is undefined when the default loop has not been initialised by a previous -execution of \f(CW\*(C`EV_DEFAULT\*(C'\fR, \f(CW\*(C`EV_DEFAULT_\*(C'\fR or \f(CW\*(C`ev_default_init (...)\*(C'\fR. -.Sp -It is often prudent to use \f(CW\*(C`EV_DEFAULT\*(C'\fR when initialising the first -watcher in a function but use \f(CW\*(C`EV_DEFAULT_UC\*(C'\fR afterwards. -.PP -Example: Declare and initialise a check watcher, utilising the above -macros so it will work regardless of whether multiple loops are supported -or not. -.PP -.Vb 5 -\& static void -\& check_cb (EV_P_ ev_timer *w, int revents) -\& { -\& ev_check_stop (EV_A_ w); -\& } -\& -\& ev_check check; -\& ev_check_init (&check, check_cb); -\& ev_check_start (EV_DEFAULT_ &check); -\& ev_run (EV_DEFAULT_ 0); -.Ve -.SH "EMBEDDING" -.IX Header "EMBEDDING" -Libev can (and often is) directly embedded into host -applications. Examples of applications that embed it include the Deliantra -Game Server, the \s-1EV\s0 perl module, the \s-1GNU\s0 Virtual Private Ethernet (gvpe) -and rxvt-unicode. -.PP -The goal is to enable you to just copy the necessary files into your -source directory without having to change even a single line in them, so -you can easily upgrade by simply copying (or having a checked-out copy of -libev somewhere in your source tree). -.SS "\s-1FILESETS\s0" -.IX Subsection "FILESETS" -Depending on what features you need you need to include one or more sets of files -in your application. -.PP -\fI\s-1CORE\s0 \s-1EVENT\s0 \s-1LOOP\s0\fR -.IX Subsection "CORE EVENT LOOP" -.PP -To include only the libev core (all the \f(CW\*(C`ev_*\*(C'\fR functions), with manual -configuration (no autoconf): -.PP -.Vb 2 -\& #define EV_STANDALONE 1 -\& #include "ev.c" -.Ve -.PP -This will automatically include \fIev.h\fR, too, and should be done in a -single C source file only to provide the function implementations. To use -it, do the same for \fIev.h\fR in all files wishing to use this \s-1API\s0 (best -done by writing a wrapper around \fIev.h\fR that you can include instead and -where you can put other configuration options): -.PP -.Vb 2 -\& #define EV_STANDALONE 1 -\& #include "ev.h" -.Ve -.PP -Both header files and implementation files can be compiled with a \*(C+ -compiler (at least, that's a stated goal, and breakage will be treated -as a bug). -.PP -You need the following files in your source tree, or in a directory -in your include path (e.g. in libev/ when using \-Ilibev): -.PP -.Vb 4 -\& ev.h -\& ev.c -\& ev_vars.h -\& ev_wrap.h -\& -\& ev_win32.c required on win32 platforms only -\& -\& ev_select.c only when select backend is enabled (which is enabled by default) -\& ev_poll.c only when poll backend is enabled (disabled by default) -\& ev_epoll.c only when the epoll backend is enabled (disabled by default) -\& ev_kqueue.c only when the kqueue backend is enabled (disabled by default) -\& ev_port.c only when the solaris port backend is enabled (disabled by default) -.Ve -.PP -\&\fIev.c\fR includes the backend files directly when enabled, so you only need -to compile this single file. -.PP -\fI\s-1LIBEVENT\s0 \s-1COMPATIBILITY\s0 \s-1API\s0\fR -.IX Subsection "LIBEVENT COMPATIBILITY API" -.PP -To include the libevent compatibility \s-1API\s0, also include: -.PP -.Vb 1 -\& #include "event.c" -.Ve -.PP -in the file including \fIev.c\fR, and: -.PP -.Vb 1 -\& #include "event.h" -.Ve -.PP -in the files that want to use the libevent \s-1API\s0. This also includes \fIev.h\fR. -.PP -You need the following additional files for this: -.PP -.Vb 2 -\& event.h -\& event.c -.Ve -.PP -\fI\s-1AUTOCONF\s0 \s-1SUPPORT\s0\fR -.IX Subsection "AUTOCONF SUPPORT" -.PP -Instead of using \f(CW\*(C`EV_STANDALONE=1\*(C'\fR and providing your configuration in -whatever way you want, you can also \f(CW\*(C`m4_include([libev.m4])\*(C'\fR in your -\&\fIconfigure.ac\fR and leave \f(CW\*(C`EV_STANDALONE\*(C'\fR undefined. \fIev.c\fR will then -include \fIconfig.h\fR and configure itself accordingly. -.PP -For this of course you need the m4 file: -.PP -.Vb 1 -\& libev.m4 -.Ve -.SS "\s-1PREPROCESSOR\s0 \s-1SYMBOLS/MACROS\s0" -.IX Subsection "PREPROCESSOR SYMBOLS/MACROS" -Libev can be configured via a variety of preprocessor symbols you have to -define before including (or compiling) any of its files. The default in -the absence of autoconf is documented for every option. -.PP -Symbols marked with \*(L"(h)\*(R" do not change the \s-1ABI\s0, and can have different -values when compiling libev vs. including \fIev.h\fR, so it is permissible -to redefine them before including \fIev.h\fR without breaking compatibility -to a compiled library. All other symbols change the \s-1ABI\s0, which means all -users of libev and the libev code itself must be compiled with compatible -settings. -.IP "\s-1EV_COMPAT3\s0 (h)" 4 -.IX Item "EV_COMPAT3 (h)" -Backwards compatibility is a major concern for libev. This is why this -release of libev comes with wrappers for the functions and symbols that -have been renamed between libev version 3 and 4. -.Sp -You can disable these wrappers (to test compatibility with future -versions) by defining \f(CW\*(C`EV_COMPAT3\*(C'\fR to \f(CW0\fR when compiling your -sources. This has the additional advantage that you can drop the \f(CW\*(C`struct\*(C'\fR -from \f(CW\*(C`struct ev_loop\*(C'\fR declarations, as libev will provide an \f(CW\*(C`ev_loop\*(C'\fR -typedef in that case. -.Sp -In some future version, the default for \f(CW\*(C`EV_COMPAT3\*(C'\fR will become \f(CW0\fR, -and in some even more future version the compatibility code will be -removed completely. -.IP "\s-1EV_STANDALONE\s0 (h)" 4 -.IX Item "EV_STANDALONE (h)" -Must always be \f(CW1\fR if you do not use autoconf configuration, which -keeps libev from including \fIconfig.h\fR, and it also defines dummy -implementations for some libevent functions (such as logging, which is not -supported). It will also not define any of the structs usually found in -\&\fIevent.h\fR that are not directly supported by the libev core alone. -.Sp -In standalone mode, libev will still try to automatically deduce the -configuration, but has to be more conservative. -.IP "\s-1EV_USE_FLOOR\s0" 4 -.IX Item "EV_USE_FLOOR" -If defined to be \f(CW1\fR, libev will use the \f(CW\*(C`floor ()\*(C'\fR function for its -periodic reschedule calculations, otherwise libev will fall back on a -portable (slower) implementation. If you enable this, you usually have to -link against libm or something equivalent. Enabling this when the \f(CW\*(C`floor\*(C'\fR -function is not available will fail, so the safe default is to not enable -this. -.IP "\s-1EV_USE_MONOTONIC\s0" 4 -.IX Item "EV_USE_MONOTONIC" -If defined to be \f(CW1\fR, libev will try to detect the availability of the -monotonic clock option at both compile time and runtime. Otherwise no -use of the monotonic clock option will be attempted. If you enable this, -you usually have to link against librt or something similar. Enabling it -when the functionality isn't available is safe, though, although you have -to make sure you link against any libraries where the \f(CW\*(C`clock_gettime\*(C'\fR -function is hiding in (often \fI\-lrt\fR). See also \f(CW\*(C`EV_USE_CLOCK_SYSCALL\*(C'\fR. -.IP "\s-1EV_USE_REALTIME\s0" 4 -.IX Item "EV_USE_REALTIME" -If defined to be \f(CW1\fR, libev will try to detect the availability of the -real-time clock option at compile time (and assume its availability -at runtime if successful). Otherwise no use of the real-time clock -option will be attempted. This effectively replaces \f(CW\*(C`gettimeofday\*(C'\fR -by \f(CW\*(C`clock_get (CLOCK_REALTIME, ...)\*(C'\fR and will not normally affect -correctness. See the note about libraries in the description of -\&\f(CW\*(C`EV_USE_MONOTONIC\*(C'\fR, though. Defaults to the opposite value of -\&\f(CW\*(C`EV_USE_CLOCK_SYSCALL\*(C'\fR. -.IP "\s-1EV_USE_CLOCK_SYSCALL\s0" 4 -.IX Item "EV_USE_CLOCK_SYSCALL" -If defined to be \f(CW1\fR, libev will try to use a direct syscall instead -of calling the system-provided \f(CW\*(C`clock_gettime\*(C'\fR function. This option -exists because on GNU/Linux, \f(CW\*(C`clock_gettime\*(C'\fR is in \f(CW\*(C`librt\*(C'\fR, but \f(CW\*(C`librt\*(C'\fR -unconditionally pulls in \f(CW\*(C`libpthread\*(C'\fR, slowing down single-threaded -programs needlessly. Using a direct syscall is slightly slower (in -theory), because no optimised vdso implementation can be used, but avoids -the pthread dependency. Defaults to \f(CW1\fR on GNU/Linux with glibc 2.x or -higher, as it simplifies linking (no need for \f(CW\*(C`\-lrt\*(C'\fR). -.IP "\s-1EV_USE_NANOSLEEP\s0" 4 -.IX Item "EV_USE_NANOSLEEP" -If defined to be \f(CW1\fR, libev will assume that \f(CW\*(C`nanosleep ()\*(C'\fR is available -and will use it for delays. Otherwise it will use \f(CW\*(C`select ()\*(C'\fR. -.IP "\s-1EV_USE_EVENTFD\s0" 4 -.IX Item "EV_USE_EVENTFD" -If defined to be \f(CW1\fR, then libev will assume that \f(CW\*(C`eventfd ()\*(C'\fR is -available and will probe for kernel support at runtime. This will improve -\&\f(CW\*(C`ev_signal\*(C'\fR and \f(CW\*(C`ev_async\*(C'\fR performance and reduce resource consumption. -If undefined, it will be enabled if the headers indicate GNU/Linux + Glibc -2.7 or newer, otherwise disabled. -.IP "\s-1EV_USE_SELECT\s0" 4 -.IX Item "EV_USE_SELECT" -If undefined or defined to be \f(CW1\fR, libev will compile in support for the -\&\f(CW\*(C`select\*(C'\fR(2) backend. No attempt at auto-detection will be done: if no -other method takes over, select will be it. Otherwise the select backend -will not be compiled in. -.IP "\s-1EV_SELECT_USE_FD_SET\s0" 4 -.IX Item "EV_SELECT_USE_FD_SET" -If defined to \f(CW1\fR, then the select backend will use the system \f(CW\*(C`fd_set\*(C'\fR -structure. This is useful if libev doesn't compile due to a missing -\&\f(CW\*(C`NFDBITS\*(C'\fR or \f(CW\*(C`fd_mask\*(C'\fR definition or it mis-guesses the bitset layout -on exotic systems. This usually limits the range of file descriptors to -some low limit such as 1024 or might have other limitations (winsocket -only allows 64 sockets). The \f(CW\*(C`FD_SETSIZE\*(C'\fR macro, set before compilation, -configures the maximum size of the \f(CW\*(C`fd_set\*(C'\fR. -.IP "\s-1EV_SELECT_IS_WINSOCKET\s0" 4 -.IX Item "EV_SELECT_IS_WINSOCKET" -When defined to \f(CW1\fR, the select backend will assume that -select/socket/connect etc. don't understand file descriptors but -wants osf handles on win32 (this is the case when the select to -be used is the winsock select). This means that it will call -\&\f(CW\*(C`_get_osfhandle\*(C'\fR on the fd to convert it to an \s-1OS\s0 handle. Otherwise, -it is assumed that all these functions actually work on fds, even -on win32. Should not be defined on non\-win32 platforms. -.IP "\s-1EV_FD_TO_WIN32_HANDLE\s0(fd)" 4 -.IX Item "EV_FD_TO_WIN32_HANDLE(fd)" -If \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR is enabled, then libev needs a way to map -file descriptors to socket handles. When not defining this symbol (the -default), then libev will call \f(CW\*(C`_get_osfhandle\*(C'\fR, which is usually -correct. In some cases, programs use their own file descriptor management, -in which case they can provide this function to map fds to socket handles. -.IP "\s-1EV_WIN32_HANDLE_TO_FD\s0(handle)" 4 -.IX Item "EV_WIN32_HANDLE_TO_FD(handle)" -If \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR then libev maps handles to file descriptors -using the standard \f(CW\*(C`_open_osfhandle\*(C'\fR function. For programs implementing -their own fd to handle mapping, overwriting this function makes it easier -to do so. This can be done by defining this macro to an appropriate value. -.IP "\s-1EV_WIN32_CLOSE_FD\s0(fd)" 4 -.IX Item "EV_WIN32_CLOSE_FD(fd)" -If programs implement their own fd to handle mapping on win32, then this -macro can be used to override the \f(CW\*(C`close\*(C'\fR function, useful to unregister -file descriptors again. Note that the replacement function has to close -the underlying \s-1OS\s0 handle. -.IP "\s-1EV_USE_WSASOCKET\s0" 4 -.IX Item "EV_USE_WSASOCKET" -If defined to be \f(CW1\fR, libev will use \f(CW\*(C`WSASocket\*(C'\fR to create its internal -communication socket, which works better in some environments. Otherwise, -the normal \f(CW\*(C`socket\*(C'\fR function will be used, which works better in other -environments. -.IP "\s-1EV_USE_POLL\s0" 4 -.IX Item "EV_USE_POLL" -If defined to be \f(CW1\fR, libev will compile in support for the \f(CW\*(C`poll\*(C'\fR(2) -backend. Otherwise it will be enabled on non\-win32 platforms. It -takes precedence over select. -.IP "\s-1EV_USE_EPOLL\s0" 4 -.IX Item "EV_USE_EPOLL" -If defined to be \f(CW1\fR, libev will compile in support for the Linux -\&\f(CW\*(C`epoll\*(C'\fR(7) backend. Its availability will be detected at runtime, -otherwise another method will be used as fallback. This is the preferred -backend for GNU/Linux systems. If undefined, it will be enabled if the -headers indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled. -.IP "\s-1EV_USE_KQUEUE\s0" 4 -.IX Item "EV_USE_KQUEUE" -If defined to be \f(CW1\fR, libev will compile in support for the \s-1BSD\s0 style -\&\f(CW\*(C`kqueue\*(C'\fR(2) backend. Its actual availability will be detected at runtime, -otherwise another method will be used as fallback. This is the preferred -backend for \s-1BSD\s0 and BSD-like systems, although on most BSDs kqueue only -supports some types of fds correctly (the only platform we found that -supports ptys for example was NetBSD), so kqueue might be compiled in, but -not be used unless explicitly requested. The best way to use it is to find -out whether kqueue supports your type of fd properly and use an embedded -kqueue loop. -.IP "\s-1EV_USE_PORT\s0" 4 -.IX Item "EV_USE_PORT" -If defined to be \f(CW1\fR, libev will compile in support for the Solaris -10 port style backend. Its availability will be detected at runtime, -otherwise another method will be used as fallback. This is the preferred -backend for Solaris 10 systems. -.IP "\s-1EV_USE_DEVPOLL\s0" 4 -.IX Item "EV_USE_DEVPOLL" -Reserved for future expansion, works like the \s-1USE\s0 symbols above. -.IP "\s-1EV_USE_INOTIFY\s0" 4 -.IX Item "EV_USE_INOTIFY" -If defined to be \f(CW1\fR, libev will compile in support for the Linux inotify -interface to speed up \f(CW\*(C`ev_stat\*(C'\fR watchers. Its actual availability will -be detected at runtime. If undefined, it will be enabled if the headers -indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled. -.IP "\s-1EV_NO_SMP\s0" 4 -.IX Item "EV_NO_SMP" -If defined to be \f(CW1\fR, libev will assume that memory is always coherent -between threads, that is, threads can be used, but threads never run on -different cpus (or different cpu cores). This reduces dependencies -and makes libev faster. -.IP "\s-1EV_NO_THREADS\s0" 4 -.IX Item "EV_NO_THREADS" -If defined to be \f(CW1\fR, libev will assume that it will never be called from -different threads (that includes signal handlers), which is a stronger -assumption than \f(CW\*(C`EV_NO_SMP\*(C'\fR, above. This reduces dependencies and makes -libev faster. -.IP "\s-1EV_ATOMIC_T\s0" 4 -.IX Item "EV_ATOMIC_T" -Libev requires an integer type (suitable for storing \f(CW0\fR or \f(CW1\fR) whose -access is atomic with respect to other threads or signal contexts. No -such type is easily found in the C language, so you can provide your own -type that you know is safe for your purposes. It is used both for signal -handler \*(L"locking\*(R" as well as for signal and thread safety in \f(CW\*(C`ev_async\*(C'\fR -watchers. -.Sp -In the absence of this define, libev will use \f(CW\*(C`sig_atomic_t volatile\*(C'\fR -(from \fIsignal.h\fR), which is usually good enough on most platforms. -.IP "\s-1EV_H\s0 (h)" 4 -.IX Item "EV_H (h)" -The name of the \fIev.h\fR header file used to include it. The default if -undefined is \f(CW"ev.h"\fR in \fIevent.h\fR, \fIev.c\fR and \fIev++.h\fR. This can be -used to virtually rename the \fIev.h\fR header file in case of conflicts. -.IP "\s-1EV_CONFIG_H\s0 (h)" 4 -.IX Item "EV_CONFIG_H (h)" -If \f(CW\*(C`EV_STANDALONE\*(C'\fR isn't \f(CW1\fR, this variable can be used to override -\&\fIev.c\fR's idea of where to find the \fIconfig.h\fR file, similarly to -\&\f(CW\*(C`EV_H\*(C'\fR, above. -.IP "\s-1EV_EVENT_H\s0 (h)" 4 -.IX Item "EV_EVENT_H (h)" -Similarly to \f(CW\*(C`EV_H\*(C'\fR, this macro can be used to override \fIevent.c\fR's idea -of how the \fIevent.h\fR header can be found, the default is \f(CW"event.h"\fR. -.IP "\s-1EV_PROTOTYPES\s0 (h)" 4 -.IX Item "EV_PROTOTYPES (h)" -If defined to be \f(CW0\fR, then \fIev.h\fR will not define any function -prototypes, but still define all the structs and other symbols. This is -occasionally useful if you want to provide your own wrapper functions -around libev functions. -.IP "\s-1EV_MULTIPLICITY\s0" 4 -.IX Item "EV_MULTIPLICITY" -If undefined or defined to \f(CW1\fR, then all event-loop-specific functions -will have the \f(CW\*(C`struct ev_loop *\*(C'\fR as first argument, and you can create -additional independent event loops. Otherwise there will be no support -for multiple event loops and there is no first event loop pointer -argument. Instead, all functions act on the single default loop. -.Sp -Note that \f(CW\*(C`EV_DEFAULT\*(C'\fR and \f(CW\*(C`EV_DEFAULT_\*(C'\fR will no longer provide a -default loop when multiplicity is switched off \- you always have to -initialise the loop manually in this case. -.IP "\s-1EV_MINPRI\s0" 4 -.IX Item "EV_MINPRI" -.PD 0 -.IP "\s-1EV_MAXPRI\s0" 4 -.IX Item "EV_MAXPRI" -.PD -The range of allowed priorities. \f(CW\*(C`EV_MINPRI\*(C'\fR must be smaller or equal to -\&\f(CW\*(C`EV_MAXPRI\*(C'\fR, but otherwise there are no non-obvious limitations. You can -provide for more priorities by overriding those symbols (usually defined -to be \f(CW\*(C`\-2\*(C'\fR and \f(CW2\fR, respectively). -.Sp -When doing priority-based operations, libev usually has to linearly search -all the priorities, so having many of them (hundreds) uses a lot of space -and time, so using the defaults of five priorities (\-2 .. +2) is usually -fine. -.Sp -If your embedding application does not need any priorities, defining these -both to \f(CW0\fR will save some memory and \s-1CPU\s0. -.IP "\s-1EV_PERIODIC_ENABLE\s0, \s-1EV_IDLE_ENABLE\s0, \s-1EV_EMBED_ENABLE\s0, \s-1EV_STAT_ENABLE\s0, \s-1EV_PREPARE_ENABLE\s0, \s-1EV_CHECK_ENABLE\s0, \s-1EV_FORK_ENABLE\s0, \s-1EV_SIGNAL_ENABLE\s0, \s-1EV_ASYNC_ENABLE\s0, \s-1EV_CHILD_ENABLE\s0." 4 -.IX Item "EV_PERIODIC_ENABLE, EV_IDLE_ENABLE, EV_EMBED_ENABLE, EV_STAT_ENABLE, EV_PREPARE_ENABLE, EV_CHECK_ENABLE, EV_FORK_ENABLE, EV_SIGNAL_ENABLE, EV_ASYNC_ENABLE, EV_CHILD_ENABLE." -If undefined or defined to be \f(CW1\fR (and the platform supports it), then -the respective watcher type is supported. If defined to be \f(CW0\fR, then it -is not. Disabling watcher types mainly saves code size. -.IP "\s-1EV_FEATURES\s0" 4 -.IX Item "EV_FEATURES" -If you need to shave off some kilobytes of code at the expense of some -speed (but with the full \s-1API\s0), you can define this symbol to request -certain subsets of functionality. The default is to enable all features -that can be enabled on the platform. -.Sp -A typical way to use this symbol is to define it to \f(CW0\fR (or to a bitset -with some broad features you want) and then selectively re-enable -additional parts you want, for example if you want everything minimal, -but multiple event loop support, async and child watchers and the poll -backend, use this: -.Sp -.Vb 5 -\& #define EV_FEATURES 0 -\& #define EV_MULTIPLICITY 1 -\& #define EV_USE_POLL 1 -\& #define EV_CHILD_ENABLE 1 -\& #define EV_ASYNC_ENABLE 1 -.Ve -.Sp -The actual value is a bitset, it can be a combination of the following -values (by default, all of these are enabled): -.RS 4 -.ie n .IP "1 \- faster/larger code" 4 -.el .IP "\f(CW1\fR \- faster/larger code" 4 -.IX Item "1 - faster/larger code" -Use larger code to speed up some operations. -.Sp -Currently this is used to override some inlining decisions (enlarging the -code size by roughly 30% on amd64). -.Sp -When optimising for size, use of compiler flags such as \f(CW\*(C`\-Os\*(C'\fR with -gcc is recommended, as well as \f(CW\*(C`\-DNDEBUG\*(C'\fR, as libev contains a number of -assertions. -.Sp -The default is off when \f(CW\*(C`_\|_OPTIMIZE_SIZE_\|_\*(C'\fR is defined by your compiler -(e.g. gcc with \f(CW\*(C`\-Os\*(C'\fR). -.ie n .IP "2 \- faster/larger data structures" 4 -.el .IP "\f(CW2\fR \- faster/larger data structures" 4 -.IX Item "2 - faster/larger data structures" -Replaces the small 2\-heap for timer management by a faster 4\-heap, larger -hash table sizes and so on. This will usually further increase code size -and can additionally have an effect on the size of data structures at -runtime. -.Sp -The default is off when \f(CW\*(C`_\|_OPTIMIZE_SIZE_\|_\*(C'\fR is defined by your compiler -(e.g. gcc with \f(CW\*(C`\-Os\*(C'\fR). -.ie n .IP "4 \- full \s-1API\s0 configuration" 4 -.el .IP "\f(CW4\fR \- full \s-1API\s0 configuration" 4 -.IX Item "4 - full API configuration" -This enables priorities (sets \f(CW\*(C`EV_MAXPRI\*(C'\fR=2 and \f(CW\*(C`EV_MINPRI\*(C'\fR=\-2), and -enables multiplicity (\f(CW\*(C`EV_MULTIPLICITY\*(C'\fR=1). -.ie n .IP "8 \- full \s-1API\s0" 4 -.el .IP "\f(CW8\fR \- full \s-1API\s0" 4 -.IX Item "8 - full API" -This enables a lot of the \*(L"lesser used\*(R" \s-1API\s0 functions. See \f(CW\*(C`ev.h\*(C'\fR for -details on which parts of the \s-1API\s0 are still available without this -feature, and do not complain if this subset changes over time. -.ie n .IP "16 \- enable all optional watcher types" 4 -.el .IP "\f(CW16\fR \- enable all optional watcher types" 4 -.IX Item "16 - enable all optional watcher types" -Enables all optional watcher types. If you want to selectively enable -only some watcher types other than I/O and timers (e.g. prepare, -embed, async, child...) you can enable them manually by defining -\&\f(CW\*(C`EV_watchertype_ENABLE\*(C'\fR to \f(CW1\fR instead. -.ie n .IP "32 \- enable all backends" 4 -.el .IP "\f(CW32\fR \- enable all backends" 4 -.IX Item "32 - enable all backends" -This enables all backends \- without this feature, you need to enable at -least one backend manually (\f(CW\*(C`EV_USE_SELECT\*(C'\fR is a good choice). -.ie n .IP "64 \- enable OS-specific ""helper"" APIs" 4 -.el .IP "\f(CW64\fR \- enable OS-specific ``helper'' APIs" 4 -.IX Item "64 - enable OS-specific helper APIs" -Enable inotify, eventfd, signalfd and similar OS-specific helper APIs by -default. -.RE -.RS 4 -.Sp -Compiling with \f(CW\*(C`gcc \-Os \-DEV_STANDALONE \-DEV_USE_EPOLL=1 \-DEV_FEATURES=0\*(C'\fR -reduces the compiled size of libev from 24.7Kb code/2.8Kb data to 6.5Kb -code/0.3Kb data on my GNU/Linux amd64 system, while still giving you I/O -watchers, timers and monotonic clock support. -.Sp -With an intelligent-enough linker (gcc+binutils are intelligent enough -when you use \f(CW\*(C`\-Wl,\-\-gc\-sections \-ffunction\-sections\*(C'\fR) functions unused by -your program might be left out as well \- a binary starting a timer and an -I/O watcher then might come out at only 5Kb. -.RE -.IP "\s-1EV_API_STATIC\s0" 4 -.IX Item "EV_API_STATIC" -If this symbol is defined (by default it is not), then all identifiers -will have static linkage. This means that libev will not export any -identifiers, and you cannot link against libev anymore. This can be useful -when you embed libev, only want to use libev functions in a single file, -and do not want its identifiers to be visible. -.Sp -To use this, define \f(CW\*(C`EV_API_STATIC\*(C'\fR and include \fIev.c\fR in the file that -wants to use libev. -.Sp -This option only works when libev is compiled with a C compiler, as \*(C+ -doesn't support the required declaration syntax. -.IP "\s-1EV_AVOID_STDIO\s0" 4 -.IX Item "EV_AVOID_STDIO" -If this is set to \f(CW1\fR at compiletime, then libev will avoid using stdio -functions (printf, scanf, perror etc.). This will increase the code size -somewhat, but if your program doesn't otherwise depend on stdio and your -libc allows it, this avoids linking in the stdio library which is quite -big. -.Sp -Note that error messages might become less precise when this option is -enabled. -.IP "\s-1EV_NSIG\s0" 4 -.IX Item "EV_NSIG" -The highest supported signal number, +1 (or, the number of -signals): Normally, libev tries to deduce the maximum number of signals -automatically, but sometimes this fails, in which case it can be -specified. Also, using a lower number than detected (\f(CW32\fR should be -good for about any system in existence) can save some memory, as libev -statically allocates some 12\-24 bytes per signal number. -.IP "\s-1EV_PID_HASHSIZE\s0" 4 -.IX Item "EV_PID_HASHSIZE" -\&\f(CW\*(C`ev_child\*(C'\fR watchers use a small hash table to distribute workload by -pid. The default size is \f(CW16\fR (or \f(CW1\fR with \f(CW\*(C`EV_FEATURES\*(C'\fR disabled), -usually more than enough. If you need to manage thousands of children you -might want to increase this value (\fImust\fR be a power of two). -.IP "\s-1EV_INOTIFY_HASHSIZE\s0" 4 -.IX Item "EV_INOTIFY_HASHSIZE" -\&\f(CW\*(C`ev_stat\*(C'\fR watchers use a small hash table to distribute workload by -inotify watch id. The default size is \f(CW16\fR (or \f(CW1\fR with \f(CW\*(C`EV_FEATURES\*(C'\fR -disabled), usually more than enough. If you need to manage thousands of -\&\f(CW\*(C`ev_stat\*(C'\fR watchers you might want to increase this value (\fImust\fR be a -power of two). -.IP "\s-1EV_USE_4HEAP\s0" 4 -.IX Item "EV_USE_4HEAP" -Heaps are not very cache-efficient. To improve the cache-efficiency of the -timer and periodics heaps, libev uses a 4\-heap when this symbol is defined -to \f(CW1\fR. The 4\-heap uses more complicated (longer) code but has noticeably -faster performance with many (thousands) of watchers. -.Sp -The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it -will be \f(CW0\fR. -.IP "\s-1EV_HEAP_CACHE_AT\s0" 4 -.IX Item "EV_HEAP_CACHE_AT" -Heaps are not very cache-efficient. To improve the cache-efficiency of the -timer and periodics heaps, libev can cache the timestamp (\fIat\fR) within -the heap structure (selected by defining \f(CW\*(C`EV_HEAP_CACHE_AT\*(C'\fR to \f(CW1\fR), -which uses 8\-12 bytes more per watcher and a few hundred bytes more code, -but avoids random read accesses on heap changes. This improves performance -noticeably with many (hundreds) of watchers. -.Sp -The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it -will be \f(CW0\fR. -.IP "\s-1EV_VERIFY\s0" 4 -.IX Item "EV_VERIFY" -Controls how much internal verification (see \f(CW\*(C`ev_verify ()\*(C'\fR) will -be done: If set to \f(CW0\fR, no internal verification code will be compiled -in. If set to \f(CW1\fR, then verification code will be compiled in, but not -called. If set to \f(CW2\fR, then the internal verification code will be -called once per loop, which can slow down libev. If set to \f(CW3\fR, then the -verification code will be called very frequently, which will slow down -libev considerably. -.Sp -The default is \f(CW1\fR, unless \f(CW\*(C`EV_FEATURES\*(C'\fR overrides it, in which case it -will be \f(CW0\fR. -.IP "\s-1EV_COMMON\s0" 4 -.IX Item "EV_COMMON" -By default, all watchers have a \f(CW\*(C`void *data\*(C'\fR member. By redefining -this macro to something else you can include more and other types of -members. You have to define it each time you include one of the files, -though, and it must be identical each time. -.Sp -For example, the perl \s-1EV\s0 module uses something like this: -.Sp -.Vb 3 -\& #define EV_COMMON \e -\& SV *self; /* contains this struct */ \e -\& SV *cb_sv, *fh /* note no trailing ";" */ -.Ve -.IP "\s-1EV_CB_DECLARE\s0 (type)" 4 -.IX Item "EV_CB_DECLARE (type)" -.PD 0 -.IP "\s-1EV_CB_INVOKE\s0 (watcher, revents)" 4 -.IX Item "EV_CB_INVOKE (watcher, revents)" -.IP "ev_set_cb (ev, cb)" 4 -.IX Item "ev_set_cb (ev, cb)" -.PD -Can be used to change the callback member declaration in each watcher, -and the way callbacks are invoked and set. Must expand to a struct member -definition and a statement, respectively. See the \fIev.h\fR header file for -their default definitions. One possible use for overriding these is to -avoid the \f(CW\*(C`struct ev_loop *\*(C'\fR as first argument in all cases, or to use -method calls instead of plain function calls in \*(C+. -.SS "\s-1EXPORTED\s0 \s-1API\s0 \s-1SYMBOLS\s0" -.IX Subsection "EXPORTED API SYMBOLS" -If you need to re-export the \s-1API\s0 (e.g. via a \s-1DLL\s0) and you need a list of -exported symbols, you can use the provided \fISymbol.*\fR files which list -all public symbols, one per line: -.PP -.Vb 2 -\& Symbols.ev for libev proper -\& Symbols.event for the libevent emulation -.Ve -.PP -This can also be used to rename all public symbols to avoid clashes with -multiple versions of libev linked together (which is obviously bad in -itself, but sometimes it is inconvenient to avoid this). -.PP -A sed command like this will create wrapper \f(CW\*(C`#define\*(C'\fR's that you need to -include before including \fIev.h\fR: -.PP -.Vb 1 -\& wrap.h -.Ve -.PP -This would create a file \fIwrap.h\fR which essentially looks like this: -.PP -.Vb 4 -\& #define ev_backend myprefix_ev_backend -\& #define ev_check_start myprefix_ev_check_start -\& #define ev_check_stop myprefix_ev_check_stop -\& ... -.Ve -.SS "\s-1EXAMPLES\s0" -.IX Subsection "EXAMPLES" -For a real-world example of a program the includes libev -verbatim, you can have a look at the \s-1EV\s0 perl module -(). It has the libev files in -the \fIlibev/\fR subdirectory and includes them in the \fI\s-1EV/EVAPI\s0.h\fR (public -interface) and \fI\s-1EV\s0.xs\fR (implementation) files. Only the \fI\s-1EV\s0.xs\fR file -will be compiled. It is pretty complex because it provides its own header -file. -.PP -The usage in rxvt-unicode is simpler. It has a \fIev_cpp.h\fR header file -that everybody includes and which overrides some configure choices: -.PP -.Vb 8 -\& #define EV_FEATURES 8 -\& #define EV_USE_SELECT 1 -\& #define EV_PREPARE_ENABLE 1 -\& #define EV_IDLE_ENABLE 1 -\& #define EV_SIGNAL_ENABLE 1 -\& #define EV_CHILD_ENABLE 1 -\& #define EV_USE_STDEXCEPT 0 -\& #define EV_CONFIG_H -\& -\& #include "ev++.h" -.Ve -.PP -And a \fIev_cpp.C\fR implementation file that contains libev proper and is compiled: -.PP -.Vb 2 -\& #include "ev_cpp.h" -\& #include "ev.c" -.Ve -.SH "INTERACTION WITH OTHER PROGRAMS, LIBRARIES OR THE ENVIRONMENT" -.IX Header "INTERACTION WITH OTHER PROGRAMS, LIBRARIES OR THE ENVIRONMENT" -.SS "\s-1THREADS\s0 \s-1AND\s0 \s-1COROUTINES\s0" -.IX Subsection "THREADS AND COROUTINES" -\fI\s-1THREADS\s0\fR -.IX Subsection "THREADS" -.PP -All libev functions are reentrant and thread-safe unless explicitly -documented otherwise, but libev implements no locking itself. This means -that you can use as many loops as you want in parallel, as long as there -are no concurrent calls into any libev function with the same loop -parameter (\f(CW\*(C`ev_default_*\*(C'\fR calls have an implicit default loop parameter, -of course): libev guarantees that different event loops share no data -structures that need any locking. -.PP -Or to put it differently: calls with different loop parameters can be done -concurrently from multiple threads, calls with the same loop parameter -must be done serially (but can be done from different threads, as long as -only one thread ever is inside a call at any point in time, e.g. by using -a mutex per loop). -.PP -Specifically to support threads (and signal handlers), libev implements -so-called \f(CW\*(C`ev_async\*(C'\fR watchers, which allow some limited form of -concurrency on the same event loop, namely waking it up \*(L"from the -outside\*(R". -.PP -If you want to know which design (one loop, locking, or multiple loops -without or something else still) is best for your problem, then I cannot -help you, but here is some generic advice: -.IP "\(bu" 4 -most applications have a main thread: use the default libev loop -in that thread, or create a separate thread running only the default loop. -.Sp -This helps integrating other libraries or software modules that use libev -themselves and don't care/know about threading. -.IP "\(bu" 4 -one loop per thread is usually a good model. -.Sp -Doing this is almost never wrong, sometimes a better-performance model -exists, but it is always a good start. -.IP "\(bu" 4 -other models exist, such as the leader/follower pattern, where one -loop is handed through multiple threads in a kind of round-robin fashion. -.Sp -Choosing a model is hard \- look around, learn, know that usually you can do -better than you currently do :\-) -.IP "\(bu" 4 -often you need to talk to some other thread which blocks in the -event loop. -.Sp -\&\f(CW\*(C`ev_async\*(C'\fR watchers can be used to wake them up from other threads safely -(or from signal contexts...). -.Sp -An example use would be to communicate signals or other events that only -work in the default loop by registering the signal watcher with the -default loop and triggering an \f(CW\*(C`ev_async\*(C'\fR watcher from the default loop -watcher callback into the event loop interested in the signal. -.PP -See also \*(L"\s-1THREAD\s0 \s-1LOCKING\s0 \s-1EXAMPLE\s0\*(R". -.PP -\fI\s-1COROUTINES\s0\fR -.IX Subsection "COROUTINES" -.PP -Libev is very accommodating to coroutines (\*(L"cooperative threads\*(R"): -libev fully supports nesting calls to its functions from different -coroutines (e.g. you can call \f(CW\*(C`ev_run\*(C'\fR on the same loop from two -different coroutines, and switch freely between both coroutines running -the loop, as long as you don't confuse yourself). The only exception is -that you must not do this from \f(CW\*(C`ev_periodic\*(C'\fR reschedule callbacks. -.PP -Care has been taken to ensure that libev does not keep local state inside -\&\f(CW\*(C`ev_run\*(C'\fR, and other calls do not usually allow for coroutine switches as -they do not call any callbacks. -.SS "\s-1COMPILER\s0 \s-1WARNINGS\s0" -.IX Subsection "COMPILER WARNINGS" -Depending on your compiler and compiler settings, you might get no or a -lot of warnings when compiling libev code. Some people are apparently -scared by this. -.PP -However, these are unavoidable for many reasons. For one, each compiler -has different warnings, and each user has different tastes regarding -warning options. \*(L"Warn-free\*(R" code therefore cannot be a goal except when -targeting a specific compiler and compiler-version. -.PP -Another reason is that some compiler warnings require elaborate -workarounds, or other changes to the code that make it less clear and less -maintainable. -.PP -And of course, some compiler warnings are just plain stupid, or simply -wrong (because they don't actually warn about the condition their message -seems to warn about). For example, certain older gcc versions had some -warnings that resulted in an extreme number of false positives. These have -been fixed, but some people still insist on making code warn-free with -such buggy versions. -.PP -While libev is written to generate as few warnings as possible, -\&\*(L"warn-free\*(R" code is not a goal, and it is recommended not to build libev -with any compiler warnings enabled unless you are prepared to cope with -them (e.g. by ignoring them). Remember that warnings are just that: -warnings, not errors, or proof of bugs. -.SS "\s-1VALGRIND\s0" -.IX Subsection "VALGRIND" -Valgrind has a special section here because it is a popular tool that is -highly useful. Unfortunately, valgrind reports are very hard to interpret. -.PP -If you think you found a bug (memory leak, uninitialised data access etc.) -in libev, then check twice: If valgrind reports something like: -.PP -.Vb 3 -\& ==2274== definitely lost: 0 bytes in 0 blocks. -\& ==2274== possibly lost: 0 bytes in 0 blocks. -\& ==2274== still reachable: 256 bytes in 1 blocks. -.Ve -.PP -Then there is no memory leak, just as memory accounted to global variables -is not a memleak \- the memory is still being referenced, and didn't leak. -.PP -Similarly, under some circumstances, valgrind might report kernel bugs -as if it were a bug in libev (e.g. in realloc or in the poll backend, -although an acceptable workaround has been found here), or it might be -confused. -.PP -Keep in mind that valgrind is a very good tool, but only a tool. Don't -make it into some kind of religion. -.PP -If you are unsure about something, feel free to contact the mailing list -with the full valgrind report and an explanation on why you think this -is a bug in libev (best check the archives, too :). However, don't be -annoyed when you get a brisk \*(L"this is no bug\*(R" answer and take the chance -of learning how to interpret valgrind properly. -.PP -If you need, for some reason, empty reports from valgrind for your project -I suggest using suppression lists. -.SH "PORTABILITY NOTES" -.IX Header "PORTABILITY NOTES" -.SS "\s-1GNU/LINUX\s0 32 \s-1BIT\s0 \s-1LIMITATIONS\s0" -.IX Subsection "GNU/LINUX 32 BIT LIMITATIONS" -GNU/Linux is the only common platform that supports 64 bit file/large file -interfaces but \fIdisables\fR them by default. -.PP -That means that libev compiled in the default environment doesn't support -files larger than 2GiB or so, which mainly affects \f(CW\*(C`ev_stat\*(C'\fR watchers. -.PP -Unfortunately, many programs try to work around this GNU/Linux issue -by enabling the large file \s-1API\s0, which makes them incompatible with the -standard libev compiled for their system. -.PP -Likewise, libev cannot enable the large file \s-1API\s0 itself as this would -suddenly make it incompatible to the default compile time environment, -i.e. all programs not using special compile switches. -.SS "\s-1OS/X\s0 \s-1AND\s0 \s-1DARWIN\s0 \s-1BUGS\s0" -.IX Subsection "OS/X AND DARWIN BUGS" -The whole thing is a bug if you ask me \- basically any system interface -you touch is broken, whether it is locales, poll, kqueue or even the -OpenGL drivers. -.PP -\fI\f(CI\*(C`kqueue\*(C'\fI is buggy\fR -.IX Subsection "kqueue is buggy" -.PP -The kqueue syscall is broken in all known versions \- most versions support -only sockets, many support pipes. -.PP -Libev tries to work around this by not using \f(CW\*(C`kqueue\*(C'\fR by default on this -rotten platform, but of course you can still ask for it when creating a -loop \- embedding a socket-only kqueue loop into a select-based one is -probably going to work well. -.PP -\fI\f(CI\*(C`poll\*(C'\fI is buggy\fR -.IX Subsection "poll is buggy" -.PP -Instead of fixing \f(CW\*(C`kqueue\*(C'\fR, Apple replaced their (working) \f(CW\*(C`poll\*(C'\fR -implementation by something calling \f(CW\*(C`kqueue\*(C'\fR internally around the 10.5.6 -release, so now \f(CW\*(C`kqueue\*(C'\fR \fIand\fR \f(CW\*(C`poll\*(C'\fR are broken. -.PP -Libev tries to work around this by not using \f(CW\*(C`poll\*(C'\fR by default on -this rotten platform, but of course you can still ask for it when creating -a loop. -.PP -\fI\f(CI\*(C`select\*(C'\fI is buggy\fR -.IX Subsection "select is buggy" -.PP -All that's left is \f(CW\*(C`select\*(C'\fR, and of course Apple found a way to fuck this -one up as well: On \s-1OS/X\s0, \f(CW\*(C`select\*(C'\fR actively limits the number of file -descriptors you can pass in to 1024 \- your program suddenly crashes when -you use more. -.PP -There is an undocumented \*(L"workaround\*(R" for this \- defining -\&\f(CW\*(C`_DARWIN_UNLIMITED_SELECT\*(C'\fR, which libev tries to use, so select \fIshould\fR -work on \s-1OS/X\s0. -.SS "\s-1SOLARIS\s0 \s-1PROBLEMS\s0 \s-1AND\s0 \s-1WORKAROUNDS\s0" -.IX Subsection "SOLARIS PROBLEMS AND WORKAROUNDS" -\fI\f(CI\*(C`errno\*(C'\fI reentrancy\fR -.IX Subsection "errno reentrancy" -.PP -The default compile environment on Solaris is unfortunately so -thread-unsafe that you can't even use components/libraries compiled -without \f(CW\*(C`\-D_REENTRANT\*(C'\fR in a threaded program, which, of course, isn't -defined by default. A valid, if stupid, implementation choice. -.PP -If you want to use libev in threaded environments you have to make sure -it's compiled with \f(CW\*(C`_REENTRANT\*(C'\fR defined. -.PP -\fIEvent port backend\fR -.IX Subsection "Event port backend" -.PP -The scalable event interface for Solaris is called \*(L"event -ports\*(R". Unfortunately, this mechanism is very buggy in all major -releases. If you run into high \s-1CPU\s0 usage, your program freezes or you get -a large number of spurious wakeups, make sure you have all the relevant -and latest kernel patches applied. No, I don't know which ones, but there -are multiple ones to apply, and afterwards, event ports actually work -great. -.PP -If you can't get it to work, you can try running the program by setting -the environment variable \f(CW\*(C`LIBEV_FLAGS=3\*(C'\fR to only allow \f(CW\*(C`poll\*(C'\fR and -\&\f(CW\*(C`select\*(C'\fR backends. -.SS "\s-1AIX\s0 \s-1POLL\s0 \s-1BUG\s0" -.IX Subsection "AIX POLL BUG" -\&\s-1AIX\s0 unfortunately has a broken \f(CW\*(C`poll.h\*(C'\fR header. Libev works around -this by trying to avoid the poll backend altogether (i.e. it's not even -compiled in), which normally isn't a big problem as \f(CW\*(C`select\*(C'\fR works fine -with large bitsets on \s-1AIX\s0, and \s-1AIX\s0 is dead anyway. -.SS "\s-1WIN32\s0 \s-1PLATFORM\s0 \s-1LIMITATIONS\s0 \s-1AND\s0 \s-1WORKAROUNDS\s0" -.IX Subsection "WIN32 PLATFORM LIMITATIONS AND WORKAROUNDS" -\fIGeneral issues\fR -.IX Subsection "General issues" -.PP -Win32 doesn't support any of the standards (e.g. \s-1POSIX\s0) that libev -requires, and its I/O model is fundamentally incompatible with the \s-1POSIX\s0 -model. Libev still offers limited functionality on this platform in -the form of the \f(CW\*(C`EVBACKEND_SELECT\*(C'\fR backend, and only supports socket -descriptors. This only applies when using Win32 natively, not when using -e.g. cygwin. Actually, it only applies to the microsofts own compilers, -as every compiler comes with a slightly differently broken/incompatible -environment. -.PP -Lifting these limitations would basically require the full -re-implementation of the I/O system. If you are into this kind of thing, -then note that glib does exactly that for you in a very portable way (note -also that glib is the slowest event library known to man). -.PP -There is no supported compilation method available on windows except -embedding it into other applications. -.PP -Sensible signal handling is officially unsupported by Microsoft \- libev -tries its best, but under most conditions, signals will simply not work. -.PP -Not a libev limitation but worth mentioning: windows apparently doesn't -accept large writes: instead of resulting in a partial write, windows will -either accept everything or return \f(CW\*(C`ENOBUFS\*(C'\fR if the buffer is too large, -so make sure you only write small amounts into your sockets (less than a -megabyte seems safe, but this apparently depends on the amount of memory -available). -.PP -Due to the many, low, and arbitrary limits on the win32 platform and -the abysmal performance of winsockets, using a large number of sockets -is not recommended (and not reasonable). If your program needs to use -more than a hundred or so sockets, then likely it needs to use a totally -different implementation for windows, as libev offers the \s-1POSIX\s0 readiness -notification model, which cannot be implemented efficiently on windows -(due to Microsoft monopoly games). -.PP -A typical way to use libev under windows is to embed it (see the embedding -section for details) and use the following \fIevwrap.h\fR header file instead -of \fIev.h\fR: -.PP -.Vb 2 -\& #define EV_STANDALONE /* keeps ev from requiring config.h */ -\& #define EV_SELECT_IS_WINSOCKET 1 /* configure libev for windows select */ -\& -\& #include "ev.h" -.Ve -.PP -And compile the following \fIevwrap.c\fR file into your project (make sure -you do \fInot\fR compile the \fIev.c\fR or any other embedded source files!): -.PP -.Vb 2 -\& #include "evwrap.h" -\& #include "ev.c" -.Ve -.PP -\fIThe winsocket \f(CI\*(C`select\*(C'\fI function\fR -.IX Subsection "The winsocket select function" -.PP -The winsocket \f(CW\*(C`select\*(C'\fR function doesn't follow \s-1POSIX\s0 in that it -requires socket \fIhandles\fR and not socket \fIfile descriptors\fR (it is -also extremely buggy). This makes select very inefficient, and also -requires a mapping from file descriptors to socket handles (the Microsoft -C runtime provides the function \f(CW\*(C`_open_osfhandle\*(C'\fR for this). See the -discussion of the \f(CW\*(C`EV_SELECT_USE_FD_SET\*(C'\fR, \f(CW\*(C`EV_SELECT_IS_WINSOCKET\*(C'\fR and -\&\f(CW\*(C`EV_FD_TO_WIN32_HANDLE\*(C'\fR preprocessor symbols for more info. -.PP -The configuration for a \*(L"naked\*(R" win32 using the Microsoft runtime -libraries and raw winsocket select is: -.PP -.Vb 2 -\& #define EV_USE_SELECT 1 -\& #define EV_SELECT_IS_WINSOCKET 1 /* forces EV_SELECT_USE_FD_SET, too */ -.Ve -.PP -Note that winsockets handling of fd sets is O(n), so you can easily get a -complexity in the O(nA\*^X) range when using win32. -.PP -\fILimited number of file descriptors\fR -.IX Subsection "Limited number of file descriptors" -.PP -Windows has numerous arbitrary (and low) limits on things. -.PP -Early versions of winsocket's select only supported waiting for a maximum -of \f(CW64\fR handles (probably owning to the fact that all windows kernels -can only wait for \f(CW64\fR things at the same time internally; Microsoft -recommends spawning a chain of threads and wait for 63 handles and the -previous thread in each. Sounds great!). -.PP -Newer versions support more handles, but you need to define \f(CW\*(C`FD_SETSIZE\*(C'\fR -to some high number (e.g. \f(CW2048\fR) before compiling the winsocket select -call (which might be in libev or elsewhere, for example, perl and many -other interpreters do their own select emulation on windows). -.PP -Another limit is the number of file descriptors in the Microsoft runtime -libraries, which by default is \f(CW64\fR (there must be a hidden \fI64\fR -fetish or something like this inside Microsoft). You can increase this -by calling \f(CW\*(C`_setmaxstdio\*(C'\fR, which can increase this limit to \f(CW2048\fR -(another arbitrary limit), but is broken in many versions of the Microsoft -runtime libraries. This might get you to about \f(CW512\fR or \f(CW2048\fR sockets -(depending on windows version and/or the phase of the moon). To get more, -you need to wrap all I/O functions and provide your own fd management, but -the cost of calling select (O(nA\*^X)) will likely make this unworkable. -.SS "\s-1PORTABILITY\s0 \s-1REQUIREMENTS\s0" -.IX Subsection "PORTABILITY REQUIREMENTS" -In addition to a working ISO-C implementation and of course the -backend-specific APIs, libev relies on a few additional extensions: -.ie n .IP """void (*)(ev_watcher_type *, int revents)"" must have compatible calling conventions regardless of ""ev_watcher_type *""." 4 -.el .IP "\f(CWvoid (*)(ev_watcher_type *, int revents)\fR must have compatible calling conventions regardless of \f(CWev_watcher_type *\fR." 4 -.IX Item "void (*)(ev_watcher_type *, int revents) must have compatible calling conventions regardless of ev_watcher_type *." -Libev assumes not only that all watcher pointers have the same internal -structure (guaranteed by \s-1POSIX\s0 but not by \s-1ISO\s0 C for example), but it also -assumes that the same (machine) code can be used to call any watcher -callback: The watcher callbacks have different type signatures, but libev -calls them using an \f(CW\*(C`ev_watcher *\*(C'\fR internally. -.IP "pointer accesses must be thread-atomic" 4 -.IX Item "pointer accesses must be thread-atomic" -Accessing a pointer value must be atomic, it must both be readable and -writable in one piece \- this is the case on all current architectures. -.ie n .IP """sig_atomic_t volatile"" must be thread-atomic as well" 4 -.el .IP "\f(CWsig_atomic_t volatile\fR must be thread-atomic as well" 4 -.IX Item "sig_atomic_t volatile must be thread-atomic as well" -The type \f(CW\*(C`sig_atomic_t volatile\*(C'\fR (or whatever is defined as -\&\f(CW\*(C`EV_ATOMIC_T\*(C'\fR) must be atomic with respect to accesses from different -threads. This is not part of the specification for \f(CW\*(C`sig_atomic_t\*(C'\fR, but is -believed to be sufficiently portable. -.ie n .IP """sigprocmask"" must work in a threaded environment" 4 -.el .IP "\f(CWsigprocmask\fR must work in a threaded environment" 4 -.IX Item "sigprocmask must work in a threaded environment" -Libev uses \f(CW\*(C`sigprocmask\*(C'\fR to temporarily block signals. This is not -allowed in a threaded program (\f(CW\*(C`pthread_sigmask\*(C'\fR has to be used). Typical -pthread implementations will either allow \f(CW\*(C`sigprocmask\*(C'\fR in the \*(L"main -thread\*(R" or will block signals process-wide, both behaviours would -be compatible with libev. Interaction between \f(CW\*(C`sigprocmask\*(C'\fR and -\&\f(CW\*(C`pthread_sigmask\*(C'\fR could complicate things, however. -.Sp -The most portable way to handle signals is to block signals in all threads -except the initial one, and run the signal handling loop in the initial -thread as well. -.ie n .IP """long"" must be large enough for common memory allocation sizes" 4 -.el .IP "\f(CWlong\fR must be large enough for common memory allocation sizes" 4 -.IX Item "long must be large enough for common memory allocation sizes" -To improve portability and simplify its \s-1API\s0, libev uses \f(CW\*(C`long\*(C'\fR internally -instead of \f(CW\*(C`size_t\*(C'\fR when allocating its data structures. On non-POSIX -systems (Microsoft...) this might be unexpectedly low, but is still at -least 31 bits everywhere, which is enough for hundreds of millions of -watchers. -.ie n .IP """double"" must hold a time value in seconds with enough accuracy" 4 -.el .IP "\f(CWdouble\fR must hold a time value in seconds with enough accuracy" 4 -.IX Item "double must hold a time value in seconds with enough accuracy" -The type \f(CW\*(C`double\*(C'\fR is used to represent timestamps. It is required to -have at least 51 bits of mantissa (and 9 bits of exponent), which is -good enough for at least into the year 4000 with millisecond accuracy -(the design goal for libev). This requirement is overfulfilled by -implementations using \s-1IEEE\s0 754, which is basically all existing ones. -.Sp -With \s-1IEEE\s0 754 doubles, you get microsecond accuracy until at least the -year 2255 (and millisecond accuracy till the year 287396 \- by then, libev -is either obsolete or somebody patched it to use \f(CW\*(C`long double\*(C'\fR or -something like that, just kidding). -.PP -If you know of other additional requirements drop me a note. -.SH "ALGORITHMIC COMPLEXITIES" -.IX Header "ALGORITHMIC COMPLEXITIES" -In this section the complexities of (many of) the algorithms used inside -libev will be documented. For complexity discussions about backends see -the documentation for \f(CW\*(C`ev_default_init\*(C'\fR. -.PP -All of the following are about amortised time: If an array needs to be -extended, libev needs to realloc and move the whole array, but this -happens asymptotically rarer with higher number of elements, so O(1) might -mean that libev does a lengthy realloc operation in rare cases, but on -average it is much faster and asymptotically approaches constant time. -.IP "Starting and stopping timer/periodic watchers: O(log skipped_other_timers)" 4 -.IX Item "Starting and stopping timer/periodic watchers: O(log skipped_other_timers)" -This means that, when you have a watcher that triggers in one hour and -there are 100 watchers that would trigger before that, then inserting will -have to skip roughly seven (\f(CW\*(C`ld 100\*(C'\fR) of these watchers. -.IP "Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)" 4 -.IX Item "Changing timer/periodic watchers (by autorepeat or calling again): O(log skipped_other_timers)" -That means that changing a timer costs less than removing/adding them, -as only the relative motion in the event queue has to be paid for. -.IP "Starting io/check/prepare/idle/signal/child/fork/async watchers: O(1)" 4 -.IX Item "Starting io/check/prepare/idle/signal/child/fork/async watchers: O(1)" -These just add the watcher into an array or at the head of a list. -.IP "Stopping check/prepare/idle/fork/async watchers: O(1)" 4 -.IX Item "Stopping check/prepare/idle/fork/async watchers: O(1)" -.PD 0 -.IP "Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % \s-1EV_PID_HASHSIZE\s0))" 4 -.IX Item "Stopping an io/signal/child watcher: O(number_of_watchers_for_this_(fd/signal/pid % EV_PID_HASHSIZE))" -.PD -These watchers are stored in lists, so they need to be walked to find the -correct watcher to remove. The lists are usually short (you don't usually -have many watchers waiting for the same fd or signal: one is typical, two -is rare). -.IP "Finding the next timer in each loop iteration: O(1)" 4 -.IX Item "Finding the next timer in each loop iteration: O(1)" -By virtue of using a binary or 4\-heap, the next timer is always found at a -fixed position in the storage array. -.IP "Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)" 4 -.IX Item "Each change on a file descriptor per loop iteration: O(number_of_watchers_for_this_fd)" -A change means an I/O watcher gets started or stopped, which requires -libev to recalculate its status (and possibly tell the kernel, depending -on backend and whether \f(CW\*(C`ev_io_set\*(C'\fR was used). -.IP "Activating one watcher (putting it into the pending state): O(1)" 4 -.IX Item "Activating one watcher (putting it into the pending state): O(1)" -.PD 0 -.IP "Priority handling: O(number_of_priorities)" 4 -.IX Item "Priority handling: O(number_of_priorities)" -.PD -Priorities are implemented by allocating some space for each -priority. When doing priority-based operations, libev usually has to -linearly search all the priorities, but starting/stopping and activating -watchers becomes O(1) with respect to priority handling. -.IP "Sending an ev_async: O(1)" 4 -.IX Item "Sending an ev_async: O(1)" -.PD 0 -.IP "Processing ev_async_send: O(number_of_async_watchers)" 4 -.IX Item "Processing ev_async_send: O(number_of_async_watchers)" -.IP "Processing signals: O(max_signal_number)" 4 -.IX Item "Processing signals: O(max_signal_number)" -.PD -Sending involves a system call \fIiff\fR there were no other \f(CW\*(C`ev_async_send\*(C'\fR -calls in the current loop iteration and the loop is currently -blocked. Checking for async and signal events involves iterating over all -running async watchers or all signal numbers. -.SH "PORTING FROM LIBEV 3.X TO 4.X" -.IX Header "PORTING FROM LIBEV 3.X TO 4.X" -The major version 4 introduced some incompatible changes to the \s-1API\s0. -.PP -At the moment, the \f(CW\*(C`ev.h\*(C'\fR header file provides compatibility definitions -for all changes, so most programs should still compile. The compatibility -layer might be removed in later versions of libev, so better update to the -new \s-1API\s0 early than late. -.ie n .IP """EV_COMPAT3"" backwards compatibility mechanism" 4 -.el .IP "\f(CWEV_COMPAT3\fR backwards compatibility mechanism" 4 -.IX Item "EV_COMPAT3 backwards compatibility mechanism" -The backward compatibility mechanism can be controlled by -\&\f(CW\*(C`EV_COMPAT3\*(C'\fR. See \*(L"\s-1PREPROCESSOR\s0 \s-1SYMBOLS/MACROS\s0\*(R" in the \*(L"\s-1EMBEDDING\s0\*(R" -section. -.ie n .IP """ev_default_destroy"" and ""ev_default_fork"" have been removed" 4 -.el .IP "\f(CWev_default_destroy\fR and \f(CWev_default_fork\fR have been removed" 4 -.IX Item "ev_default_destroy and ev_default_fork have been removed" -These calls can be replaced easily by their \f(CW\*(C`ev_loop_xxx\*(C'\fR counterparts: -.Sp -.Vb 2 -\& ev_loop_destroy (EV_DEFAULT_UC); -\& ev_loop_fork (EV_DEFAULT); -.Ve -.IP "function/symbol renames" 4 -.IX Item "function/symbol renames" -A number of functions and symbols have been renamed: -.Sp -.Vb 3 -\& ev_loop => ev_run -\& EVLOOP_NONBLOCK => EVRUN_NOWAIT -\& EVLOOP_ONESHOT => EVRUN_ONCE -\& -\& ev_unloop => ev_break -\& EVUNLOOP_CANCEL => EVBREAK_CANCEL -\& EVUNLOOP_ONE => EVBREAK_ONE -\& EVUNLOOP_ALL => EVBREAK_ALL -\& -\& EV_TIMEOUT => EV_TIMER -\& -\& ev_loop_count => ev_iteration -\& ev_loop_depth => ev_depth -\& ev_loop_verify => ev_verify -.Ve -.Sp -Most functions working on \f(CW\*(C`struct ev_loop\*(C'\fR objects don't have an -\&\f(CW\*(C`ev_loop_\*(C'\fR prefix, so it was removed; \f(CW\*(C`ev_loop\*(C'\fR, \f(CW\*(C`ev_unloop\*(C'\fR and -associated constants have been renamed to not collide with the \f(CW\*(C`struct -ev_loop\*(C'\fR anymore and \f(CW\*(C`EV_TIMER\*(C'\fR now follows the same naming scheme -as all other watcher types. Note that \f(CW\*(C`ev_loop_fork\*(C'\fR is still called -\&\f(CW\*(C`ev_loop_fork\*(C'\fR because it would otherwise clash with the \f(CW\*(C`ev_fork\*(C'\fR -typedef. -.ie n .IP """EV_MINIMAL"" mechanism replaced by ""EV_FEATURES""" 4 -.el .IP "\f(CWEV_MINIMAL\fR mechanism replaced by \f(CWEV_FEATURES\fR" 4 -.IX Item "EV_MINIMAL mechanism replaced by EV_FEATURES" -The preprocessor symbol \f(CW\*(C`EV_MINIMAL\*(C'\fR has been replaced by a different -mechanism, \f(CW\*(C`EV_FEATURES\*(C'\fR. Programs using \f(CW\*(C`EV_MINIMAL\*(C'\fR usually compile -and work, but the library code will of course be larger. -.SH "GLOSSARY" -.IX Header "GLOSSARY" -.IP "active" 4 -.IX Item "active" -A watcher is active as long as it has been started and not yet stopped. -See \*(L"\s-1WATCHER\s0 \s-1STATES\s0\*(R" for details. -.IP "application" 4 -.IX Item "application" -In this document, an application is whatever is using libev. -.IP "backend" 4 -.IX Item "backend" -The part of the code dealing with the operating system interfaces. -.IP "callback" 4 -.IX Item "callback" -The address of a function that is called when some event has been -detected. Callbacks are being passed the event loop, the watcher that -received the event, and the actual event bitset. -.IP "callback/watcher invocation" 4 -.IX Item "callback/watcher invocation" -The act of calling the callback associated with a watcher. -.IP "event" 4 -.IX Item "event" -A change of state of some external event, such as data now being available -for reading on a file descriptor, time having passed or simply not having -any other events happening anymore. -.Sp -In libev, events are represented as single bits (such as \f(CW\*(C`EV_READ\*(C'\fR or -\&\f(CW\*(C`EV_TIMER\*(C'\fR). -.IP "event library" 4 -.IX Item "event library" -A software package implementing an event model and loop. -.IP "event loop" 4 -.IX Item "event loop" -An entity that handles and processes external events and converts them -into callback invocations. -.IP "event model" 4 -.IX Item "event model" -The model used to describe how an event loop handles and processes -watchers and events. -.IP "pending" 4 -.IX Item "pending" -A watcher is pending as soon as the corresponding event has been -detected. See \*(L"\s-1WATCHER\s0 \s-1STATES\s0\*(R" for details. -.IP "real time" 4 -.IX Item "real time" -The physical time that is observed. It is apparently strictly monotonic :) -.IP "wall-clock time" 4 -.IX Item "wall-clock time" -The time and date as shown on clocks. Unlike real time, it can actually -be wrong and jump forwards and backwards, e.g. when you adjust your -clock. -.IP "watcher" 4 -.IX Item "watcher" -A data structure that describes interest in certain events. Watchers need -to be started (attached to an event loop) before they can receive events. -.SH "AUTHOR" -.IX Header "AUTHOR" -Marc Lehmann , with repeated corrections by Mikael -Magnusson and Emanuele Giaquinta, and minor corrections by many others. From d1ec0548faf07e82679f421f335a29eeb180748c Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 26 Sep 2014 12:16:24 -0700 Subject: [PATCH 144/148] Add in a little hack to tag mbufs with bridge directionality. This allows for users of uinet_if_bridge to know what the origin interface set was - internal or external interfaces. * interfaces passed to uinet_if_bridge already have directionality tagged. * change uinet_if_bridge to store the bif in the physical interface if_bridge parameter, rather than the bridge softc. * This allows the input path to check the directionality of the interface. * Whilst here, don't forward packets to interfaces that are the same directionality of the interface. * Tag the bridge packet with an mbuf flag indicating the directionality. * Percolate that through to l2info * .. and into libuinet_api. --- lib/libuinet/api_include/uinet_api_types.h | 2 + lib/libuinet/uinet_if_bridge.c | 52 +++++++++++++--------- sys/net/ethernet.h | 2 + sys/net/if_ethersubr.c | 8 ++++ sys/netinet/in_promisc.h | 2 + 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/lib/libuinet/api_include/uinet_api_types.h b/lib/libuinet/api_include/uinet_api_types.h index 67645a4..cf3fdc6 100644 --- a/lib/libuinet/api_include/uinet_api_types.h +++ b/lib/libuinet/api_include/uinet_api_types.h @@ -424,6 +424,8 @@ struct uinet_in_l2tagstack { #define UINET_INL2I_TAG_ANY 0x01 +#define UINET_INL2I_TAG_SRCIF_EXT 0x02 +#define UINET_INL2I_TAG_SRCIF_INT 0x04 struct uinet_in_l2info { uint8_t inl2i_local_addr[UINET_IN_L2INFO_ADDR_MAX]; diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 4720363..4c24737 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -61,10 +61,12 @@ extern int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); struct if_bridge_member; +struct if_bridge_softc; struct if_bridge_member { LIST_ENTRY(if_bridge_member) bif_next; struct ifnet *ifp; + struct if_bridge_softc *br_softc; int is_inside; int is_outside; }; @@ -90,11 +92,12 @@ static struct mbuf * if_bridge_input(struct ifnet *ifp, struct mbuf *m) { struct if_bridge_softc *sc; - struct if_bridge_member *bif; + struct if_bridge_member *bif, *bif_m; struct ifnet *bifp; struct mbuf *mc2; - sc = ifp->if_bridge; + bif = ifp->if_bridge; + sc = bif->br_softc; bifp = sc->sc_ifp; // printf("%s: m=%p: called\n", __func__, m); @@ -117,6 +120,13 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) * ie, from is_input? send to only is_output. */ + /* Tag the mbuf with the correct direction information */ + if (bif->is_inside) { + m->m_flags |= M_BRIDGEIF_DIR_INT; + } else if (bif->is_outside) { + m->m_flags |= M_BRIDGEIF_DIR_EXT; + } + /* * XXX TODO: don't hold the lock across sending to the two * (or more) ports - it's highly inefficient and effectively @@ -124,37 +134,33 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) * LOCK2REF/etc stuff to do this without holding a lock. */ mtx_lock(&sc->sc_mtx); - LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { - if (bif->ifp == ifp) + LIST_FOREACH(bif_m, &sc->sc_iflist, bif_next) { + /* Don't send traffic back to the same interface */ + if (bif_m->ifp == ifp) + continue; + + /* Don't send traffic to the same interface type */ + if (bif_m->is_inside && bif->is_inside) + continue; + if (bif_m->is_outside && bif->is_outside) continue; + //mc2 = m_copypacket(m, M_DONTWAIT); mc2 = m_dup(m, M_DONTWAIT); /* XXX count failure */ if (mc2 == NULL) continue; /* XXX count failure */ - (void) bif->ifp->if_transmit(bif->ifp, mc2); + (void) bif_m->ifp->if_transmit(bif_m->ifp, mc2); } mtx_unlock(&sc->sc_mtx); - /* We don't do local processing; just punt to the bridge */ - + /* We don't do local processing; just punt to the stack as if_bridge */ m->m_pkthdr.rcvif = bifp; (*bifp->if_input)(bifp, m); - return (NULL); -#if 0 - /* Duplicate; pass up to the stack */ - mc2 = m_copypacket(m, M_DONTWAIT); - /* XXX count failure */ - if (mc2 != NULL) { - mc2->m_pkthdr.rcvif = bifp; - (*bifp->if_input)(bifp, mc2); - } - - /* Return the original packet for local processing. */ - return (m); -#endif + /* We're not passing this up the stack for local processing */ + return (NULL); } /* @@ -284,8 +290,12 @@ if_bridge_addm(struct if_bridge_softc *sc, const char *ifname, int isin) /* Add to list; link back from the ifnet to the parent bridge */ bif->ifp = nifp; + /* And a link back to the bridge softc */ + bif->br_softc = sc; + /* Add to the member list */ LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next); - nifp->if_bridge = sc; + + nifp->if_bridge = bif; mtx_unlock(&sc->sc_mtx); diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index 9bdc6bd..179d13f 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -51,6 +51,8 @@ * Ethernet-specific mbuf flags. */ #define M_HASFCS M_PROTO5 /* FCS included at end of frame */ +#define M_BRIDGEIF_DIR_EXT M_PROTO6 /* Bridge traffic; from external interface */ +#define M_BRIDGEIF_DIR_INT M_PROTO7 /* Bridge traffic; from internal interface */ /* * Ethernet CRC32 polynomials (big- and little-endian verions). diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 7f8347f..9b5cef1 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -793,6 +793,14 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) ETHER_ADDR_COPY(l2info->inl2i_foreign_addr, eh->ether_shost); l2ts->inl2t_cnt = 0; + /* + * Handle the bridge mbuf flags to set interface directionality. + */ + if (m->m_flags & M_BRIDGEIF_DIR_INT) + l2info->inl2i_flags |= INL2I_TAG_SRCIF_INT; + else if (m->m_flags & M_BRIDGEIF_DIR_EXT) + l2info->inl2i_flags |= INL2I_TAG_SRCIF_EXT; + /* * If the interface is in IFF_PROMISCINET mode and the hardware * processed an 802.1Q tag, copy it to the l2info mbuf tag and clear diff --git a/sys/netinet/in_promisc.h b/sys/netinet/in_promisc.h index f587b90..d1e3a1e 100644 --- a/sys/netinet/in_promisc.h +++ b/sys/netinet/in_promisc.h @@ -66,6 +66,8 @@ struct in_l2tagstack { /* flags for inl2i_flags */ #define INL2I_TAG_ANY 0x01 +#define INL2I_TAG_SRCIF_EXT 0x02 +#define INL2I_TAG_SRCIF_INT 0x04 struct in_l2info { uint8_t inl2i_local_addr[IN_L2INFO_ADDR_MAX]; From c0ccc142f0d39db07a923b5e2a833ca53b04b64c Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 30 Sep 2014 10:53:47 -0700 Subject: [PATCH 145/148] Name the sysctl thread. --- lib/libuinet/uinet_host_sysctl_api.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/libuinet/uinet_host_sysctl_api.c b/lib/libuinet/uinet_host_sysctl_api.c index b943f1a..7b4939f 100644 --- a/lib/libuinet/uinet_host_sysctl_api.c +++ b/lib/libuinet/uinet_host_sysctl_api.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -471,6 +472,13 @@ uinet_host_sysctl_listener_thread(void *arg) (void) unlink(path); + /* Set thread title */ +#if defined(__FreeBSD__) + pthread_set_name_np(pthread_self(), "sysctl thread"); +#elif defined(__linux__) + pthread_setname_np(pthread_self(), "sysctl thread"); +#endif + bzero(&sun, sizeof(sun)); strcpy(sun.sun_path, path); sun.sun_len = 0; From 1ff3d1eda7c02c08304fc2afcae2b944073b0405 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Tue, 30 Sep 2014 11:12:24 -0700 Subject: [PATCH 146/148] Make it build again. --- bin/passive/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/passive/Makefile b/bin/passive/Makefile index e1d06ba..7e26e97 100644 --- a/bin/passive/Makefile +++ b/bin/passive/Makefile @@ -11,7 +11,7 @@ UINET_LIBES+=uinetnv endif -CFLAGS+= -I${TOPDIR}/lib/libuinetnv +CFLAGS+= -I${TOPDIR}/lib/libuinetnv -I${TOPDIR}/lib/libev LDADD= -L${UINET_DESTDIR}/lib/ ${TOPDIR}/lib/libev/.libs/libev.a ${TOPDIR}/lib/libuinetnv/libuinetnv.a -lm -lpcap ifndef NO_EXTRACT From 89fc9bbbd50a16619546b75eea6dd08e1e066607 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 1 Oct 2014 09:04:25 -0700 Subject: [PATCH 147/148] The new tag allocator doesn't seem to be zero'ing things. Clear flags before we use it. --- sys/net/if_ethersubr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 9b5cef1..923234b 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -792,6 +792,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) ETHER_ADDR_COPY(l2info->inl2i_local_addr, eh->ether_dhost); ETHER_ADDR_COPY(l2info->inl2i_foreign_addr, eh->ether_shost); l2ts->inl2t_cnt = 0; + l2info->inl2i_flags = 0; /* * Handle the bridge mbuf flags to set interface directionality. From efb8dd4743ab1294f8be9a0ef29074c7a48c4958 Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Wed, 1 Oct 2014 09:04:44 -0700 Subject: [PATCH 148/148] Be defensive - clear direction flags before we set one. --- lib/libuinet/uinet_if_bridge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/libuinet/uinet_if_bridge.c b/lib/libuinet/uinet_if_bridge.c index 4c24737..00c4b6f 100644 --- a/lib/libuinet/uinet_if_bridge.c +++ b/lib/libuinet/uinet_if_bridge.c @@ -121,6 +121,7 @@ if_bridge_input(struct ifnet *ifp, struct mbuf *m) */ /* Tag the mbuf with the correct direction information */ + m->m_flags &= ~(M_BRIDGEIF_DIR_INT | M_BRIDGEIF_DIR_EXT); if (bif->is_inside) { m->m_flags |= M_BRIDGEIF_DIR_INT; } else if (bif->is_outside) {