Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
Makefile | 15 +++++ src/capt-mmap-v2.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++ src/capt-mmap-v2.h | 6 ++ src/capt-recvmmsg.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ src/capt-recvmmsg.h | 6 ++ src/capt-recvmsg.c | 89 ++++++++++++++++++++++++++++ src/capt-recvmsg.h | 6 ++ src/capt.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++ src/capt.h | 31 ++++++++++ src/capture-pkt.c | 18 +++--- src/detstats.c | 27 ++++----- src/hostmon.c | 26 ++++----- src/ifstats.c | 21 ++++--- src/iptraf-ng-compat.h | 1 + src/itrafmon.c | 23 +++----- src/packet.c | 92 +---------------------------- src/packet.h | 13 +---- src/pktsize.c | 25 ++++---- src/serv.c | 26 ++++----- 19 files changed, 663 insertions(+), 200 deletions(-) create mode 100644 src/capt-mmap-v2.c create mode 100644 src/capt-mmap-v2.h create mode 100644 src/capt-recvmmsg.c create mode 100644 src/capt-recvmmsg.h create mode 100644 src/capt-recvmsg.c create mode 100644 src/capt-recvmsg.h create mode 100644 src/capt.c create mode 100644 src/capt.h
... and use it where appropriate.
Signed-off-by: Vitezslav Samel vitezslav@samel.cz --- Makefile | 4 ++ src/capt-recvmsg.c | 89 ++++++++++++++++++++++++++++++++++ src/capt-recvmsg.h | 6 +++ src/capt.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/capt.h | 31 ++++++++++++ src/capture-pkt.c | 18 +++---- src/detstats.c | 27 ++++------- src/hostmon.c | 26 ++++------ src/ifstats.c | 21 ++++---- src/itrafmon.c | 23 +++------ src/packet.c | 92 ++--------------------------------- src/packet.h | 13 +---- src/pktsize.c | 25 ++++------ src/serv.c | 26 ++++------ 14 files changed, 340 insertions(+), 200 deletions(-) create mode 100644 src/capt-recvmsg.c create mode 100644 src/capt-recvmsg.h create mode 100644 src/capt.c create mode 100644 src/capt.h
diff --git a/Makefile b/Makefile index 59b7c6f..64ca213 100644 --- a/Makefile +++ b/Makefile @@ -115,6 +115,8 @@ iptraf-h += src/counters.h iptraf-h += src/rate.h iptraf-h += src/built-in.h iptraf-h += src/sockaddr.h +iptraf-h += src/capt.h +iptraf-h += src/capt-recvmsg.h
iptraf-o += src/tui/input.o iptraf-o += src/tui/labels.o @@ -157,6 +159,8 @@ iptraf-o += src/counters.o iptraf-o += src/rate.o iptraf-o += src/capture-pkt.o iptraf-o += src/sockaddr.o +iptraf-o += src/capt.o +iptraf-o += src/capt-recvmsg.o
rvnamed-o += src/rvnamed.o rvnamed-o += src/getpath.o diff --git a/src/capt-recvmsg.c b/src/capt-recvmsg.c new file mode 100644 index 0000000..a9ad343 --- /dev/null +++ b/src/capt-recvmsg.c @@ -0,0 +1,89 @@ +/* For terms of usage/redistribution/modification see the LICENSE file */ +/* For authors and contributors see the AUTHORS file */ + +#include "iptraf-ng-compat.h" + +#include "packet.h" +#include "capt.h" + +struct capt_data_recvmsg { + char *buf; + + struct iovec iov; + struct msghdr *msg; + struct sockaddr_ll *from; +}; + +static unsigned int capt_have_packet_recvmsg(struct capt *capt __unused) +{ + return 0; +} + +static int capt_get_packet_recvmsg(struct capt *capt, struct pkt_hdr *pkt) +{ + struct capt_data_recvmsg *data = capt->priv; + + /* these are set upon return from recvmsg() so clean */ + /* them beforehand */ + data->msg->msg_controllen = 0; + data->msg->msg_flags = 0; + + ssize_t len = recvmsg(capt->fd, data->msg, MSG_TRUNC | MSG_DONTWAIT); + if (len > 0) { + pkt->pkt_len = len; + pkt->pkt_caplen = len; + if (pkt->pkt_caplen > MAX_PACKET_SIZE) + pkt->pkt_caplen = MAX_PACKET_SIZE; + pkt->pkt_buf = data->buf; + pkt->from = data->from; + pkt->pkt_payload = NULL; + pkt->pkt_protocol = ntohs(pkt->from->sll_protocol); + } + return len; +} + +static void capt_cleanup_recvmsg(struct capt *capt) +{ + struct capt_data_recvmsg *data = capt->priv; + + capt->cleanup = NULL; + capt->put_packet = NULL; + capt->get_packet = NULL; + capt->have_packet = NULL; + + free(data->from); + data->from = NULL; + free(data->msg); + data->msg = NULL; + + free(data->buf); + data->buf = NULL; + free(capt->priv); + capt->priv = NULL; +} + +int capt_setup_recvmsg(struct capt *capt) +{ + struct capt_data_recvmsg *data = xmallocz(sizeof(struct capt_data_recvmsg)); + + data->buf = xmallocz(MAX_PACKET_SIZE); + data->iov.iov_len = MAX_PACKET_SIZE; + data->iov.iov_base = data->buf; + + data->msg = xmallocz(sizeof(*data->msg)); + data->from = xmallocz(sizeof(*data->from)); + + data->msg->msg_name = data->from; + data->msg->msg_namelen = sizeof(*data->from); + data->msg->msg_iov = &data->iov; + data->msg->msg_iovlen = 1; + data->msg->msg_control = NULL; + + capt->priv = data; + capt->have_packet = capt_have_packet_recvmsg; + capt->get_packet = capt_get_packet_recvmsg; + capt->put_packet = NULL; + capt->cleanup = capt_cleanup_recvmsg; + + return 0; +} diff --git a/src/capt-recvmsg.h b/src/capt-recvmsg.h new file mode 100644 index 0000000..243761e --- /dev/null +++ b/src/capt-recvmsg.h @@ -0,0 +1,6 @@ +#ifndef IPTRAF_NG_CAPT_RECVMSG_H +#define IPTRAF_NG_CAPT_RECVMSG_H + +int capt_setup_recvmsg(struct capt *capt); + +#endif /* IPTRAF_NG_CAPT_RECVMSG_H */ diff --git a/src/capt.c b/src/capt.c new file mode 100644 index 0000000..9c0d6d4 --- /dev/null +++ b/src/capt.c @@ -0,0 +1,139 @@ +/* For terms of usage/redistribution/modification see the LICENSE file */ +/* For authors and contributors see the AUTHORS file */ + +#include "iptraf-ng-compat.h" + +#include "error.h" +#include "ifaces.h" +#include "packet.h" +#include "capt.h" +#include "capt-recvmsg.h" + +static int capt_set_recv_timeout(int fd, unsigned int msec) +{ + struct timeval timeout; + socklen_t len = sizeof(timeout); + + timeout.tv_sec = msec / 1000; + timeout.tv_usec = (msec % 1000) * 1000; + if(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, len) != 0) + return -1; + else + return 0; +} + +int capt_init(struct capt *capt, char *ifname) +{ + capt->have_packet = NULL; + capt->get_packet = NULL; + capt->put_packet = NULL; + capt->cleanup = NULL; + + capt->dropped = 0UL; + + /* initialize socket */ + int fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); + if (fd == -1) + return fd; + capt->fd = fd; + + /* bind socket to interface */ + if (ifname && dev_bind_ifname(capt->fd, ifname) == -1) + goto out; + + /* set socket receive timeout */ + if (capt_set_recv_timeout(capt->fd, 250) == -1) + goto out; + + /* try packet recvmsg() */ + if (capt_setup_recvmsg(capt) == 0) + return 0; +out: + close(capt->fd); + capt->fd = -1; + + return -1; +} + +void capt_destroy(struct capt *capt) +{ + if (capt->cleanup) + capt->cleanup(capt); + + close(capt->fd); + capt->fd = -1; +} + +unsigned long capt_get_dropped(struct capt *capt) +{ + struct tpacket_stats stats; + socklen_t len = sizeof(stats); + + memset(&stats, 0, len); + int err = getsockopt(capt->fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len); + if (err < 0) + die_errno("%s(): getsockopt(PACKET_STATISTICS)", __func__); + + capt->dropped += stats.tp_drops; + + return capt->dropped; +} + +int capt_get_packet(struct capt *capt, struct pkt_hdr *pkt, int *ch, WINDOW *win) +{ + struct pollfd pfds[2]; + nfds_t nfds = 0; + int pfd_packet = -1; + int pfd_key = -1; + int ss = 0; + int poll_packet = !capt->have_packet(capt); + int timeout = DEFAULT_UPDATE_DELAY; + + /* Monitor raw socket */ + if (poll_packet) { + pfds[nfds].fd = capt->fd; + pfds[nfds].events = POLLIN; + pfd_packet = nfds; + nfds++; + } + + /* Monitor stdin only if in interactive, not daemon mode. */ + if (ch && !daemonized) { + pfds[nfds].fd = 0; + pfds[nfds].events = POLLIN; + pfd_key = nfds; + nfds++; + if (!poll_packet) + timeout = 0; + } + + if (nfds > 0) + do { + ss = poll(pfds, nfds, timeout); + } while ((ss == -1) && (errno == EINTR)); + + /* no packet ready yet */ + pkt->pkt_len = 0; + + if (!poll_packet || ((pfd_packet != -1) && (ss > 0) && ((pfds[pfd_packet].revents & POLLIN) != 0))) { + int ret = capt->get_packet(capt, pkt); + if (ret <= 0) + ss = ret; + } + + if (ch) { + *ch = ERR; /* signalize we have no key ready */ + if (!daemonized && (((pfd_key != -1) && ((ss > 0) && ((pfds[pfd_key].revents & POLLIN) != 0))))) + *ch = wgetch(win); + } + + return ss; +} + +int capt_put_packet(struct capt *capt, struct pkt_hdr *pkt) +{ + if (capt->put_packet) + capt->put_packet(capt, pkt); + + return 0; +} diff --git a/src/capt.h b/src/capt.h new file mode 100644 index 0000000..cec57ba --- /dev/null +++ b/src/capt.h @@ -0,0 +1,31 @@ +#ifndef IPTRAF_NG_CAPT_H +#define IPTRAF_NG_CAPT_H + +/* + * Number of bytes from captured packet to move into a buffer. + * 96 bytes should be enough for the IP header, TCP/UDP/ICMP/whatever header + * with reasonable numbers of options. + * + * NOTE: keep it aligned to multiple of 16 + */ +#define MAX_PACKET_SIZE 96 + +struct capt { + int fd; + unsigned long dropped; + + void *priv; + + unsigned int (*have_packet)(struct capt *capt); + int (*get_packet)(struct capt *capt, struct pkt_hdr *pkt); + int (*put_packet)(struct capt *capt, struct pkt_hdr *pkt); + void (*cleanup)(struct capt *capt); +}; + +int capt_init(struct capt *capt, char *ifname); +void capt_destroy(struct capt *capt); +unsigned long capt_get_dropped(struct capt *capt); +int capt_get_packet(struct capt *capt, struct pkt_hdr *pkt, int *ch, WINDOW *win); +int capt_put_packet(struct capt *capt, struct pkt_hdr *pkt); + +#endif /* IPTRAF_NG_CAPT_H */ diff --git a/src/capture-pkt.c b/src/capture-pkt.c index 93f0b58..1faccec 100644 --- a/src/capture-pkt.c +++ b/src/capture-pkt.c @@ -7,6 +7,7 @@ #include "parse-options.h" #include "ifaces.h" #include "packet.h" +#include "capt.h"
static const char *const capture_usage[] = { IPTRAF_NAME " capture [-c] <device>", @@ -33,12 +34,9 @@ int cmd_capture(int argc, char **argv)
char *dev = argv[0];
- int fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if (fd < 0) - die_errno("Unable to obtain monitoring socket"); - - if (dev_bind_ifname(fd, dev) < 0) - perror("Unable to bind device on the socket"); + struct capt capt; + if (capt_init(&capt, dev) == -1) + die_errno("Unable to initialize packet capture interface");
FILE *fp = NULL; if (ofilename) { @@ -52,7 +50,7 @@ int cmd_capture(int argc, char **argv)
int captured = 0; for (;;) { - if (packet_get(fd, &pkt, NULL, NULL) == -1) + if (capt_get_packet(&capt, &pkt, NULL, NULL) == -1) die_errno("fail to get packet");
if (!pkt.pkt_len) @@ -64,6 +62,8 @@ int cmd_capture(int argc, char **argv) if (fp) fwrite(&pkt, sizeof(pkt), 1, fp);
+ capt_put_packet(&capt, &pkt); + if (++captured == cap_nr_pkt) break; } @@ -71,10 +71,10 @@ int cmd_capture(int argc, char **argv)
packet_destroy(&pkt);
- close(fd); - if (fp) fclose(fp);
+ capt_destroy(&capt); + return 0; } diff --git a/src/detstats.c b/src/detstats.c index fd1b99a..9315055 100644 --- a/src/detstats.c +++ b/src/detstats.c @@ -27,6 +27,7 @@ detstats.c - the interface statistics module #include "error.h" #include "detstats.h" #include "rate.h" +#include "capt.h"
struct ifcounts { struct proto_counter total; @@ -517,12 +518,10 @@ void detstats(char *iface, time_t facilitytime)
int ch;
- int fd; + struct capt capt;
struct pkt_hdr pkt;
- unsigned long dropped = 0UL; - if (!dev_up(iface)) { err_iface_down(); return; @@ -551,15 +550,10 @@ void detstats(char *iface, time_t facilitytime) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, iface) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; } - if(dev_bind_ifname(fd, iface) == -1) { - write_error("Unable to bind interface on the socket"); - goto err_close; - }
ifcounts_init(&ifcounts); ifrates_init(&ifrates); @@ -623,8 +617,7 @@ void detstats(char *iface, time_t facilitytime) wattrset(statwin, BOXATTR); printelapsedtime(now.tv_sec - starttime, 1, statwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, statwin, 49); + print_packet_drops(capt_get_dropped(&capt), statwin, 49);
if (now.tv_sec > endtime) exitloop = 1; @@ -647,7 +640,7 @@ void detstats(char *iface, time_t facilitytime) last_update = now; }
- if (packet_get(fd, &pkt, &ch, statwin) == -1) { + if (capt_get_packet(&capt, &pkt, &ch, statwin) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -656,8 +649,10 @@ void detstats(char *iface, time_t facilitytime) if (ch != ERR) detstats_process_key(ch);
- if (pkt.pkt_len > 0) + if (pkt.pkt_len > 0) { detstats_process_packet(&ifcounts, &pkt); + capt_put_packet(&capt, &pkt); + }
} packet_destroy(&pkt); @@ -674,9 +669,7 @@ void detstats(char *iface, time_t facilitytime)
ifrates_destroy(&ifrates); ifcounts_destroy(&ifcounts); - -err_close: - close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc); diff --git a/src/hostmon.c b/src/hostmon.c index 8711513..49d8da5 100644 --- a/src/hostmon.c +++ b/src/hostmon.c @@ -28,6 +28,7 @@ Discovers LAN hosts and displays packet statistics for them #include "promisc.h" #include "error.h" #include "rate.h" +#include "capt.h"
#define SCROLLUP 0 #define SCROLLDOWN 1 @@ -905,12 +906,10 @@ void hostmon(time_t facilitytime, char *ifptr)
FILE *logfile = NULL;
- int fd; + struct capt capt;
struct pkt_hdr pkt;
- unsigned long dropped = 0UL; - if (ifptr && !dev_up(ifptr)) { err_iface_down(); return; @@ -924,15 +923,10 @@ void hostmon(time_t facilitytime, char *ifptr) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, ifptr) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; } - if(ifptr && dev_bind_ifname(fd, ifptr) == -1) { - write_error("Unable to bind interface on the socket"); - goto err_close; - }
if (logging) { if (strcmp(current_logfile, "") == 0) { @@ -986,8 +980,7 @@ void hostmon(time_t facilitytime, char *ifptr)
printelapsedtime(now.tv_sec - starttime, 15, table.borderwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, table.borderwin, 49); + print_packet_drops(capt_get_dropped(&capt), table.borderwin, 49);
if (logging && (now.tv_sec > log_next)) { check_rotate_flag(&logfile); @@ -1009,7 +1002,7 @@ void hostmon(time_t facilitytime, char *ifptr) last_update = now; }
- if (packet_get(fd, &pkt, &ch, table.tabwin) == -1) { + if (capt_get_packet(&capt, &pkt, &ch, table.tabwin) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -1018,8 +1011,10 @@ void hostmon(time_t facilitytime, char *ifptr) if (ch != ERR) hostmon_process_key(&table, ch);
- if (pkt.pkt_len > 0) + if (pkt.pkt_len > 0) { hostmon_process_packet(&table, &pkt, ifptr); + capt_put_packet(&capt, &pkt); + }
}
@@ -1034,8 +1029,7 @@ void hostmon(time_t facilitytime, char *ifptr) } strcpy(current_logfile, "");
-err_close: - close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc); diff --git a/src/ifstats.c b/src/ifstats.c index f095b08..ac0b0ee 100644 --- a/src/ifstats.c +++ b/src/ifstats.c @@ -29,6 +29,7 @@ ifstats.c - the interface statistics module #include "error.h" #include "ifstats.h" #include "rate.h" +#include "capt.h"
#define SCROLLUP 0 #define SCROLLDOWN 1 @@ -500,12 +501,10 @@ void ifstats(time_t facilitytime)
int ch;
- int fd; + struct capt capt;
struct pkt_hdr pkt;
- unsigned long dropped = 0UL; - initiflist(&(table.head)); if (!table.head) { no_ifaces_error(); @@ -520,9 +519,8 @@ void ifstats(time_t facilitytime) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, NULL) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; }
@@ -584,8 +582,7 @@ void ifstats(time_t facilitytime)
printelapsedtime(now.tv_sec - starttime, 1, table.borderwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, table.borderwin, 49); + print_packet_drops(capt_get_dropped(&capt), table.borderwin, 49);
if (logging && (now.tv_sec > log_next)) { check_rotate_flag(&logfile); @@ -606,7 +603,7 @@ void ifstats(time_t facilitytime) last_update = now; }
- if (packet_get(fd, &pkt, &ch, table.statwin) == -1) { + if (capt_get_packet(&capt, &pkt, &ch, table.statwin) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -615,8 +612,10 @@ void ifstats(time_t facilitytime) if (ch != ERR) ifstats_process_key(&table, ch);
- if (pkt.pkt_len > 0) + if (pkt.pkt_len > 0) { ifstats_process_packet(&table, &pkt); + capt_put_packet(&capt, &pkt); + }
} packet_destroy(&pkt); @@ -630,7 +629,7 @@ void ifstats(time_t facilitytime) } strcpy(current_logfile, "");
- close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc); diff --git a/src/itrafmon.c b/src/itrafmon.c index f19eb61..eb5f54b 100644 --- a/src/itrafmon.c +++ b/src/itrafmon.c @@ -31,6 +31,7 @@ itrafmon.c - the IP traffic monitor module #include "logvars.h" #include "itrafmon.h" #include "sockaddr.h" +#include "capt.h"
#define SCROLLUP 0 #define SCROLLDOWN 1 @@ -848,12 +849,10 @@ void ipmon(time_t facilitytime, char *ifptr)
struct othptable othptbl;
- int fd; + struct capt capt;
struct pkt_hdr pkt;
- unsigned long dropped = 0UL; - int ch;
int rvnfd = 0; @@ -871,15 +870,10 @@ void ipmon(time_t facilitytime, char *ifptr) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, ifptr) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; } - if(ifptr && dev_bind_ifname(fd, ifptr) == -1) { - write_error("Unable to bind interface on the socket"); - goto err_close; - }
if (revlook) { if (checkrvnamed()) @@ -977,8 +971,7 @@ void ipmon(time_t facilitytime, char *ifptr) /* print timer at bottom of screen */ printelapsedtime(now.tv_sec - starttime, 15, othptbl.borderwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, othptbl.borderwin, 40); + print_packet_drops(capt_get_dropped(&capt), othptbl.borderwin, 40);
mark_timeouted_entries(&table, logging, logfile);
@@ -1005,7 +998,8 @@ void ipmon(time_t facilitytime, char *ifptr) last_time = now; }
- if (packet_get(fd, &pkt, &ch, table.tcpscreen) == -1) { + capt_put_packet(&capt, &pkt); + if (capt_get_packet(&capt, &pkt, &ch, table.tcpscreen) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -1043,8 +1037,7 @@ void ipmon(time_t facilitytime, char *ifptr) killrvnamed(); close_rvn_socket(rvnfd);
-err_close: - close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc); diff --git a/src/packet.c b/src/packet.c index eed02dd..2e4188f 100644 --- a/src/packet.c +++ b/src/packet.c @@ -121,59 +121,6 @@ static void packet_set_l3_hdrp(struct pkt_hdr *pkt) } }
-/* IPTraf input function; reads both keystrokes and network packets. */ -int packet_get(int fd, struct pkt_hdr *pkt, int *ch, WINDOW *win) -{ - struct pollfd pfds[2]; - nfds_t nfds = 0; - int ss; - - /* Monitor raw socket */ - pfds[0].fd = fd; - pfds[0].events = POLLIN; - nfds++; - - /* Monitor stdin only if in interactive, not daemon mode. */ - if (ch && !daemonized) { - pfds[1].fd = 0; - pfds[1].events = POLLIN; - nfds++; - } - do { - ss = poll(pfds, nfds, DEFAULT_UPDATE_DELAY); - } while ((ss == -1) && (errno == EINTR)); - - /* no packet ready yet */ - pkt->pkt_len = 0; - - if ((ss > 0) && (pfds[0].revents & POLLIN) != 0) { - - /* these are set upon return from recvmsg() so clean */ - /* them beforehand */ - pkt->msg->msg_controllen = 0; - pkt->msg->msg_flags = 0; - - ssize_t len = recvmsg(fd, pkt->msg, MSG_TRUNC | MSG_DONTWAIT); - if (len > 0) { - pkt->pkt_len = len; - pkt->pkt_caplen = len; - if (pkt->pkt_caplen > pkt->pkt_bufsize) - pkt->pkt_caplen = pkt->pkt_bufsize; - pkt->pkt_payload = NULL; - pkt->pkt_protocol = ntohs(pkt->from->sll_protocol); - } else - ss = len; - } - - if (ch) { - *ch = ERR; /* signalize we have no key ready */ - if (!daemonized && (ss > 0) && ((pfds[1].revents & POLLIN) != 0)) - *ch = wgetch(win); - } - - return ss; -} - int packet_process(struct pkt_hdr *pkt, unsigned int *total_br, in_port_t *sport, in_port_t *dport, int match_opposite, int v6inv4asv6) @@ -319,8 +266,6 @@ again:
int packet_init(struct pkt_hdr *pkt) { - pkt->pkt_buf = xmallocz(MAX_PACKET_SIZE); - pkt->pkt_bufsize = MAX_PACKET_SIZE; pkt->pkt_payload = NULL; pkt->ethhdr = NULL; pkt->fddihdr = NULL; @@ -328,48 +273,17 @@ int packet_init(struct pkt_hdr *pkt) pkt->ip6_hdr = NULL; pkt->pkt_len = 0; /* signalize we have no packet prepared */
- pkt->iov.iov_len = pkt->pkt_bufsize; - pkt->iov.iov_base = pkt->pkt_buf; - - pkt->from = xmallocz(sizeof(*pkt->from)); - pkt->msg = xmallocz(sizeof(*pkt->msg)); - - pkt->msg->msg_name = pkt->from; - pkt->msg->msg_namelen = sizeof(*pkt->from); - pkt->msg->msg_iov = &pkt->iov; - pkt->msg->msg_iovlen = 1; - pkt->msg->msg_control = NULL; + pkt->pkt_buf = NULL; + pkt->from = NULL;
return 0; /* all O.K. */ }
-void packet_destroy(struct pkt_hdr *pkt) +void packet_destroy(struct pkt_hdr *pkt __unused) { - free(pkt->msg); - pkt->msg = NULL; - - free(pkt->from); - pkt->from = NULL; - - free(pkt->pkt_buf); - pkt->pkt_buf = NULL; - destroyfraglist(); }
-unsigned int packet_get_dropped(int fd) -{ - struct tpacket_stats stats; - socklen_t len = sizeof(stats); - - memset(&stats, 0, len); - int err = getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len); - if (err < 0) - die_errno("%s(): getsockopt(PACKET_STATISTICS)", __func__); - - return stats.tp_drops; -} - int packet_is_first_fragment(struct pkt_hdr *pkt) { switch (pkt->pkt_protocol) { diff --git a/src/packet.h b/src/packet.h index 1f01336..2ab69ce 100644 --- a/src/packet.h +++ b/src/packet.h @@ -7,13 +7,6 @@ packet.h - external declarations for packet.c
***/
-/* - * Number of bytes from captured packet to move into a buffer. - * 96 bytes should be enough for the IP header, TCP/UDP/ICMP/whatever header - * with reasonable numbers of options. - */ -#define MAX_PACKET_SIZE 96 - #define INVALID_PACKET 0 #define PACKET_OK 1 #define CHECKSUM_ERROR 2 @@ -22,18 +15,16 @@ packet.h - external declarations for packet.c
struct pkt_hdr { char *pkt_buf; - size_t pkt_bufsize; char *pkt_payload; size_t pkt_caplen; /* bytes captured */ size_t pkt_len; /* bytes on-the-wire */ unsigned short pkt_protocol; /* Physical layer protocol: ETH_P_* */
- struct iovec iov; struct sockaddr_ll *from; - struct msghdr *msg;
struct ethhdr *ethhdr; struct fddihdr *fddihdr; + struct iphdr *iphdr; struct ip6_hdr *ip6_hdr; }; @@ -61,13 +52,11 @@ static inline __u8 pkt_ip_protocol(const struct pkt_hdr *p) return 0; }
-int packet_get(int fd, struct pkt_hdr *pkt, int *ch, WINDOW *win); int packet_process(struct pkt_hdr *pkt, unsigned int *total_br, in_port_t *sport, in_port_t *dport, int match_opposite, int v6inv4asv6); int packet_init(struct pkt_hdr *pkt); void packet_destroy(struct pkt_hdr *pkt); -unsigned int packet_get_dropped(int fd); int packet_is_first_fragment(struct pkt_hdr *pkt);
#endif /* IPTRAF_NG_PACKET_H */ diff --git a/src/pktsize.c b/src/pktsize.c index 9e57a17..27384d2 100644 --- a/src/pktsize.c +++ b/src/pktsize.c @@ -24,6 +24,7 @@ pktsize.c - the packet size breakdown facility #include "log.h" #include "logvars.h" #include "promisc.h" +#include "capt.h"
#define SIZES 20
@@ -232,12 +233,10 @@ void packet_size_breakdown(char *ifname, time_t facilitytime)
struct psizetab table;
- int fd; + struct capt capt;
struct pkt_hdr pkt;
- unsigned long dropped = 0UL; - if (!dev_up(ifname)) { err_iface_down(); return; @@ -251,15 +250,10 @@ void packet_size_breakdown(char *ifname, time_t facilitytime) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, ifname) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; } - if(dev_bind_ifname(fd, ifname) == -1) { - write_error("Unable to bind interface on the socket"); - goto err_close; - }
int mtu = dev_get_mtu(ifname); if (mtu < 0) { @@ -321,8 +315,7 @@ void packet_size_breakdown(char *ifname, time_t facilitytime) if (now.tv_sec > last_time.tv_sec) { printelapsedtime(now.tv_sec - starttime, 1, table.borderwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, table.borderwin, 49); + print_packet_drops(capt_get_dropped(&capt), table.borderwin, 49);
if (logging && (now.tv_sec > log_next)) { check_rotate_flag(&logfile); @@ -346,7 +339,7 @@ void packet_size_breakdown(char *ifname, time_t facilitytime) last_update = now; }
- if (packet_get(fd, &pkt, &ch, table.win) == -1) { + if (capt_get_packet(&capt, &pkt, &ch, table.win) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -355,8 +348,10 @@ void packet_size_breakdown(char *ifname, time_t facilitytime) if (ch != ERR) psize_process_key(ch);
- if (pkt.pkt_len > 0) + if (pkt.pkt_len > 0) { psize_process_packet(&table, &pkt); + capt_put_packet(&capt, &pkt); + } }
packet_destroy(&pkt); @@ -371,7 +366,7 @@ void packet_size_breakdown(char *ifname, time_t facilitytime) strcpy(current_logfile, "");
err_close: - close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc); diff --git a/src/serv.c b/src/serv.c index 6bd89fa..a3442d1 100644 --- a/src/serv.c +++ b/src/serv.c @@ -32,6 +32,7 @@ serv.c - TCP/UDP port statistics module #include "error.h" #include "counters.h" #include "rate.h" +#include "capt.h"
#define SCROLLUP 0 #define SCROLLDOWN 1 @@ -899,9 +900,7 @@ void servmon(char *ifname, time_t facilitytime)
FILE *logfile = NULL;
- int fd; - - unsigned long dropped = 0UL; + struct capt capt;
struct porttab *ports;
@@ -921,15 +920,10 @@ void servmon(char *ifname, time_t facilitytime) promisc_set_list(&promisc); }
- fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); - if(fd == -1) { - write_error("Unable to obtain monitoring socket"); + if (capt_init(&capt, ifname) == -1) { + write_error("Unable to initialize packet capture interface"); goto err; } - if(dev_bind_ifname(fd, ifname) == -1) { - write_error("Unable to bind interface on the socket"); - goto err_close; - }
if (logging) { if (strcmp(current_logfile, "") == 0) { @@ -987,8 +981,7 @@ void servmon(char *ifname, time_t facilitytime)
printelapsedtime(now.tv_sec - starttime, 20, list.borderwin);
- dropped += packet_get_dropped(fd); - print_packet_drops(dropped, list.borderwin, 49); + print_packet_drops(capt_get_dropped(&capt), list.borderwin, 49);
if (now.tv_sec > endtime) exitloop = 1; @@ -1012,7 +1005,7 @@ void servmon(char *ifname, time_t facilitytime) last_update = now; }
- if (packet_get(fd, &pkt, &ch, list.win) == -1) { + if (capt_get_packet(&capt, &pkt, &ch, list.win) == -1) { write_error("Packet receive failed"); exitloop = 1; break; @@ -1021,8 +1014,10 @@ void servmon(char *ifname, time_t facilitytime) if (ch != ERR) serv_process_key(&list, ch);
- if (pkt.pkt_len > 0) + if (pkt.pkt_len > 0) { serv_process_packet(&list, &pkt, ports); + capt_put_packet(&capt, &pkt); + } } packet_destroy(&pkt);
@@ -1038,8 +1033,7 @@ void servmon(char *ifname, time_t facilitytime) } strcpy(current_logfile, "");
-err_close: - close(fd); + capt_destroy(&capt); err: if (options.promisc) { promisc_restore_list(&promisc);
Since recvmmsg() was introduced in linux-2.6.34 and glibc-2.12, you need to explicitly turn it on in Makefile.
Signed-off-by: Vitezslav Samel vitezslav@samel.cz --- Makefile | 9 ++++ src/capt-recvmmsg.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/capt-recvmmsg.h | 6 +++ src/capt.c | 9 ++++ 4 files changed, 162 insertions(+) create mode 100644 src/capt-recvmmsg.c create mode 100644 src/capt-recvmmsg.h
diff --git a/Makefile b/Makefile index 64ca213..71439a7 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,9 @@ all:: # Define NEEDS_NCURSES6 if you need linking with ncurses6. # # Define NEEDS_NCURSESW6 if you need linking with ncursesw6. +# +# Define USE_RECVMMSG if you want to use recvmmsg() packet capturing interface. +#USE_RECVMMSG := 1
VERSION-FILE: FORCE @@ -276,6 +279,12 @@ ifndef NCURSES_LDFLAGS endif endif
+ifdef USE_RECVMMSG + CFLAGS += -D_USE_GNU -DUSE_RECVMMSG + iptraf-h += src/capt-recvmmsg.h + iptraf-o += src/capt-recvmmsg.o +endif + QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir QUIET_SUBDIR1 =
diff --git a/src/capt-recvmmsg.c b/src/capt-recvmmsg.c new file mode 100644 index 0000000..8bcf1a8 --- /dev/null +++ b/src/capt-recvmmsg.c @@ -0,0 +1,138 @@ +/* For terms of usage/redistribution/modification see the LICENSE file */ +/* For authors and contributors see the AUTHORS file */ + +#include "iptraf-ng-compat.h" + +#include "packet.h" +#include "capt.h" + +#define FRAMES 128 + +struct capt_data_recvmmsg { + char *buf; + + struct mmsghdr *msgvec; + struct iovec *iov; + struct sockaddr_ll *from; + + unsigned int lastslot; + unsigned int slot; +}; + +static unsigned int capt_recvmmsg_find_filled_slot(struct capt_data_recvmmsg *data) +{ + for (unsigned int slot = data->lastslot; slot < FRAMES; slot++) + if (data->msgvec[slot].msg_len != 0) + return slot; + + return FRAMES; +} + +static unsigned int capt_have_packet_recvmmsg(struct capt *capt) +{ + struct capt_data_recvmmsg *data = capt->priv; + + return capt_recvmmsg_find_filled_slot(data) != FRAMES; +} + +static int capt_get_packet_recvmmsg(struct capt *capt, struct pkt_hdr *pkt) +{ + struct capt_data_recvmmsg *data = capt->priv; + int ret = 0; + + unsigned int slot = capt_recvmmsg_find_filled_slot(data); + if (slot == FRAMES) { + /* these are set upon return from recvmsg() so clean */ + /* them beforehand */ + for (unsigned int i = 0; i < FRAMES; i++) { + data->msgvec[i].msg_hdr.msg_controllen = 0; + data->msgvec[i].msg_hdr.msg_flags = 0; + data->msgvec[i].msg_len = 0; + } + + int received = recvmmsg(capt->fd, data->msgvec, FRAMES, MSG_TRUNC | MSG_DONTWAIT, NULL); + if (received <= 0) + return received; + slot = 0; + } + pkt->pkt_len = data->msgvec[slot].msg_len; + pkt->pkt_caplen = data->msgvec[slot].msg_len; + if (pkt->pkt_caplen > MAX_PACKET_SIZE) + pkt->pkt_caplen = MAX_PACKET_SIZE; + pkt->pkt_buf = data->buf + slot * MAX_PACKET_SIZE; + pkt->from = &data->from[slot]; + pkt->pkt_payload = NULL; + pkt->pkt_protocol = ntohs(pkt->from->sll_protocol); + + data->slot = slot; + + return ret; +} + +static int capt_put_packet_recvmmsg(struct capt *capt, struct pkt_hdr *pkt __unused) +{ + struct capt_data_recvmmsg *data = capt->priv; + + /* hand out processed slot to kernel */ + if (data->slot < FRAMES) { + data->msgvec[data->slot].msg_len = 0; + data->lastslot = data->slot; + } else + data->slot = FRAMES; + + return 0; +} + +static void capt_cleanup_recvmmsg(struct capt *capt) +{ + struct capt_data_recvmmsg *data = capt->priv; + + capt->cleanup = NULL; + capt->put_packet = NULL; + capt->get_packet = NULL; + capt->have_packet = NULL; + + free(data->from); + data->from = NULL; + free(data->iov); + data->iov = NULL; + free(data->msgvec); + data->msgvec = NULL; + free(data->buf); + data->buf = NULL; + + free(capt->priv); + capt->priv = NULL; +} + +int capt_setup_recvmmsg(struct capt *capt) +{ + struct capt_data_recvmmsg *data; + + data = xmallocz(sizeof(struct capt_data_recvmmsg)); + data->buf = xmallocz(FRAMES * MAX_PACKET_SIZE); + data->msgvec = xmallocz(FRAMES * sizeof(*data->msgvec)); + data->iov = xmallocz(FRAMES * sizeof(*data->iov)); + data->from = xmallocz(FRAMES * sizeof(*data->from)); + + for (unsigned int i = 0; i < FRAMES; i++) { + data->iov[i].iov_len = MAX_PACKET_SIZE; + data->iov[i].iov_base = data->buf + i * MAX_PACKET_SIZE; + + data->msgvec[i].msg_hdr.msg_name = &data->from[i]; + data->msgvec[i].msg_hdr.msg_namelen = sizeof(data->from[i]); + data->msgvec[i].msg_hdr.msg_iov = &data->iov[i]; + data->msgvec[i].msg_hdr.msg_iovlen = 1; + data->msgvec[i].msg_hdr.msg_control = NULL; + } + data->slot = FRAMES; + data->lastslot = 0; + + capt->priv = data; + capt->have_packet = capt_have_packet_recvmmsg; + capt->get_packet = capt_get_packet_recvmmsg; + capt->put_packet = capt_put_packet_recvmmsg; + capt->cleanup = capt_cleanup_recvmmsg; + + return 0; +} diff --git a/src/capt-recvmmsg.h b/src/capt-recvmmsg.h new file mode 100644 index 0000000..8461eed --- /dev/null +++ b/src/capt-recvmmsg.h @@ -0,0 +1,6 @@ +#ifndef IPTRAF_NG_CAPT_RECVMMSG_H +#define IPTRAF_NG_CAPT_RECVMMSG_H + +int capt_setup_recvmmsg(struct capt *capt); + +#endif /* IPTRAF_NG_CAPT_RECVMMSG_H */ diff --git a/src/capt.c b/src/capt.c index 9c0d6d4..b7a75ec 100644 --- a/src/capt.c +++ b/src/capt.c @@ -8,6 +8,9 @@ #include "packet.h" #include "capt.h" #include "capt-recvmsg.h" +#ifdef USE_RECVMMSG +#include "capt-recvmmsg.h" +#endif
static int capt_set_recv_timeout(int fd, unsigned int msec) { @@ -45,6 +48,12 @@ int capt_init(struct capt *capt, char *ifname) if (capt_set_recv_timeout(capt->fd, 250) == -1) goto out;
+#ifdef USE_RECVMMSG + /* try packet recvmmsg() */ + if (capt_setup_recvmmsg(capt) == 0) + return 0; +#endif + /* try packet recvmsg() */ if (capt_setup_recvmsg(capt) == 0) return 0;
Signed-off-by: Vitezslav Samel vitezslav@samel.cz --- Makefile | 2 + src/capt-mmap-v2.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ src/capt-mmap-v2.h | 6 ++ src/capt.c | 5 ++ src/iptraf-ng-compat.h | 1 + 5 files changed, 161 insertions(+) create mode 100644 src/capt-mmap-v2.c create mode 100644 src/capt-mmap-v2.h
diff --git a/Makefile b/Makefile index 71439a7..7326c3b 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,7 @@ iptraf-h += src/built-in.h iptraf-h += src/sockaddr.h iptraf-h += src/capt.h iptraf-h += src/capt-recvmsg.h +iptraf-h += src/capt-mmap-v2.h
iptraf-o += src/tui/input.o iptraf-o += src/tui/labels.o @@ -164,6 +165,7 @@ iptraf-o += src/capture-pkt.o iptraf-o += src/sockaddr.o iptraf-o += src/capt.o iptraf-o += src/capt-recvmsg.o +iptraf-o += src/capt-mmap-v2.o
rvnamed-o += src/rvnamed.o rvnamed-o += src/getpath.o diff --git a/src/capt-mmap-v2.c b/src/capt-mmap-v2.c new file mode 100644 index 0000000..ebbc125 --- /dev/null +++ b/src/capt-mmap-v2.c @@ -0,0 +1,147 @@ +/* For terms of usage/redistribution/modification see the LICENSE file */ +/* For authors and contributors see the AUTHORS file */ + +#include "iptraf-ng-compat.h" + +#include "packet.h" +#include "capt.h" + +struct capt_data_mmap_v2 { + void *mmap; + size_t mmap_size; + struct tpacket2_hdr **hdr; + struct sockaddr_ll **sll; + unsigned int lastslot; + unsigned int slot; +}; + +#define FRAMES 512 + +static unsigned int capt_mmap_find_filled_slot(struct capt_data_mmap_v2 *data) +{ + for (unsigned int i = data->lastslot; i < data->lastslot + FRAMES; i++) { + unsigned int slot = i >= FRAMES ? i - FRAMES : i; + + if (data->hdr[slot]->tp_status & TP_STATUS_USER) + return slot; + } + return FRAMES; +} + +static unsigned int capt_have_packet_mmap_v2(struct capt *capt) +{ + struct capt_data_mmap_v2 *data = capt->priv; + + return capt_mmap_find_filled_slot(data) != FRAMES; +} + +static int capt_get_packet_mmap_v2(struct capt *capt, struct pkt_hdr *pkt) +{ + struct capt_data_mmap_v2 *data = capt->priv; + int ss = 0; + + unsigned int slot = capt_mmap_find_filled_slot(data); + if (slot < FRAMES) { + struct tpacket2_hdr *hdr = data->hdr[slot]; + struct sockaddr_ll *sll = data->sll[slot]; + + pkt->pkt_buf = (char *)hdr + hdr->tp_mac; + pkt->pkt_payload = NULL; + pkt->pkt_caplen = hdr->tp_snaplen; + pkt->pkt_len = hdr->tp_len; + pkt->from = sll; + pkt->pkt_protocol = ntohs(sll->sll_protocol); + + data->slot = slot; + ss = hdr->tp_len; + } + return ss; +} + +static int capt_put_packet_mmap_v2(struct capt *capt, struct pkt_hdr *pkt __unused) +{ + struct capt_data_mmap_v2 *data = capt->priv; + + /* hand out processed slot to kernel */ + if (data->slot < FRAMES) { + data->hdr[data->slot]->tp_status = TP_STATUS_KERNEL; + data->lastslot = data->slot; + } else + data->slot = FRAMES; + + return 0; +} + +static void capt_cleanup_mmap_v2(struct capt *capt) +{ + struct capt_data_mmap_v2 *data = capt->priv; + + free(data->sll); + data->sll = NULL; + + free(data->hdr); + data->hdr = NULL; + + munmap(data->mmap, data->mmap_size); + data->mmap = NULL; + data->mmap_size = 0; + + free(capt->priv); + capt->priv = NULL; +} + +int capt_setup_mmap_v2(struct capt *capt) +{ + int version = TPACKET_V2; + if (setsockopt(capt->fd, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)) != 0) + return -1; + + int hdrlen = version; + socklen_t socklen = sizeof(hdrlen); + if (getsockopt(capt->fd, SOL_PACKET, PACKET_HDRLEN, &hdrlen, &socklen) != 0) + return -1; + + /* computed not exactly, but almost there */ + size_t frame_size = TPACKET_ALIGN(hdrlen) + + TPACKET_ALIGN(sizeof(struct sockaddr_ll)) + + TPACKET_ALIGN(MAX_PACKET_SIZE); + + struct tpacket_req req; + + req.tp_block_nr = 1; + req.tp_frame_nr = FRAMES; + /* frame_size must be a multiple of TPACKET_ALIGNMENT */ + req.tp_frame_size = frame_size; + /* block_size must be a multiple of PAGE_SIZE */ + /* TODO: check for this condition (incidently we are OK now) */ + req.tp_block_size = req.tp_frame_nr * req.tp_frame_size; + + if(setsockopt(capt->fd, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req)) != 0) + return -1; + + size_t size = req.tp_block_size * req.tp_block_nr; + void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, capt->fd, 0); + if (map == MAP_FAILED) + return -1; + + struct capt_data_mmap_v2 *data = xmallocz(sizeof(struct capt_data_mmap_v2)); + + data->mmap = map; + data->mmap_size = size; + data->hdr = xmallocz(FRAMES * sizeof(*data->hdr)); + data->sll = xmallocz(FRAMES * sizeof(*data->sll)); + for (int i = 0; i < FRAMES; i++) { + data->hdr[i] = (struct tpacket2_hdr *)((char *)map + i * frame_size); + data->sll[i] = (struct sockaddr_ll *)((char *)data->hdr[i] + TPACKET_ALIGN(hdrlen)); + } + data->lastslot = 0; + data->slot = FRAMES; + + capt->priv = data; + capt->have_packet = capt_have_packet_mmap_v2; + capt->get_packet = capt_get_packet_mmap_v2; + capt->put_packet = capt_put_packet_mmap_v2; + capt->cleanup = capt_cleanup_mmap_v2; + + return 0; /* All O.K. */ +} diff --git a/src/capt-mmap-v2.h b/src/capt-mmap-v2.h new file mode 100644 index 0000000..daa92e7 --- /dev/null +++ b/src/capt-mmap-v2.h @@ -0,0 +1,6 @@ +#ifndef IPTRAF_NG_CAPT_MMAP_V2_H +#define IPTRAF_NG_CAPT_MMAP_V2_H + +int capt_setup_mmap_v2(struct capt *capt); + +#endif /* IPTRAF_NG_CAPT_MMAP_V2_H */ diff --git a/src/capt.c b/src/capt.c index b7a75ec..f704e13 100644 --- a/src/capt.c +++ b/src/capt.c @@ -11,6 +11,7 @@ #ifdef USE_RECVMMSG #include "capt-recvmmsg.h" #endif +#include "capt-mmap-v2.h"
static int capt_set_recv_timeout(int fd, unsigned int msec) { @@ -48,6 +49,10 @@ int capt_init(struct capt *capt, char *ifname) if (capt_set_recv_timeout(capt->fd, 250) == -1) goto out;
+ /* try packet mmap() TPACKET_V2 */ + if (capt_setup_mmap_v2(capt) == 0) + return 0; + #ifdef USE_RECVMMSG /* try packet recvmmsg() */ if (capt_setup_recvmmsg(capt) == 0) diff --git a/src/iptraf-ng-compat.h b/src/iptraf-ng-compat.h index aaa2807..8fe5154 100644 --- a/src/iptraf-ng-compat.h +++ b/src/iptraf-ng-compat.h @@ -26,6 +26,7 @@ #include <sys/ioctl.h> #include <sys/wait.h> #include <sys/un.h> +#include <sys/mman.h>
#include <netinet/in.h> #include <netinet/udp.h>
Vitezslav Samel vitezslav@samel.cz writes:
Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
why do we using now recvmsg, recvmmsg and mmap? Which one is faster? Please elaborate deeply, because you don't have useful commit message.
Makefile | 15 +++++ src/capt-mmap-v2.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++ src/capt-mmap-v2.h | 6 ++ src/capt-recvmmsg.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ src/capt-recvmmsg.h | 6 ++ src/capt-recvmsg.c | 89 ++++++++++++++++++++++++++++ src/capt-recvmsg.h | 6 ++ src/capt.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++ src/capt.h | 31 ++++++++++ src/capture-pkt.c | 18 +++--- src/detstats.c | 27 ++++----- src/hostmon.c | 26 ++++----- src/ifstats.c | 21 ++++--- src/iptraf-ng-compat.h | 1 + src/itrafmon.c | 23 +++----- src/packet.c | 92 +---------------------------- src/packet.h | 13 +---- src/pktsize.c | 25 ++++---- src/serv.c | 26 ++++----- 19 files changed, 663 insertions(+), 200 deletions(-) create mode 100644 src/capt-mmap-v2.c create mode 100644 src/capt-mmap-v2.h create mode 100644 src/capt-recvmmsg.c create mode 100644 src/capt-recvmmsg.h create mode 100644 src/capt-recvmsg.c create mode 100644 src/capt-recvmsg.h create mode 100644 src/capt.c create mode 100644 src/capt.h
On Sun, Jul 13, 2014 at 06:02:19PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
why do we using now recvmsg, recvmmsg and mmap? Which one is faster? Please elaborate deeply, because you don't have useful commit message.
It's all about speed:
- in recvmsg() case there are 2 syscalls per packet (poll() and then recvmsg() gives us one packet); - in recvmmsg() case there are 2 syscalls (poll() and then recvmmsg() can give us more packets if available); - in mmaped case there is syscall only in case there's no packet (poll())
On my workloads going from recvmsg() to mmap-style receiver lowers number of dropped packets 100x (from ten thousands to hundreds). And this is still with one thread.
The packet capturing abstraction was chosen to have modular packet receiving techniques: in our case recvmsg(), recvmmsg() and mmap. recvmsg() is available (almost) always, recvmmsg() is available only in linux-2.6.34+ and glibc-2.12+ and mmap-style receiver can be turned off in the kernel. The capturing interface tries mmap-style first, then tries recvmmsg() (if configured in) and recvmsg() is the slowest fallback.
Hope this helps.
Cheers, Vita
Makefile | 15 +++++ src/capt-mmap-v2.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++ src/capt-mmap-v2.h | 6 ++ src/capt-recvmmsg.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ src/capt-recvmmsg.h | 6 ++ src/capt-recvmsg.c | 89 ++++++++++++++++++++++++++++ src/capt-recvmsg.h | 6 ++ src/capt.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++++ src/capt.h | 31 ++++++++++ src/capture-pkt.c | 18 +++--- src/detstats.c | 27 ++++----- src/hostmon.c | 26 ++++----- src/ifstats.c | 21 ++++--- src/iptraf-ng-compat.h | 1 + src/itrafmon.c | 23 +++----- src/packet.c | 92 +---------------------------- src/packet.h | 13 +---- src/pktsize.c | 25 ++++---- src/serv.c | 26 ++++----- 19 files changed, 663 insertions(+), 200 deletions(-) create mode 100644 src/capt-mmap-v2.c create mode 100644 src/capt-mmap-v2.h create mode 100644 src/capt-recvmmsg.c create mode 100644 src/capt-recvmmsg.h create mode 100644 src/capt-recvmsg.c create mode 100644 src/capt-recvmsg.h create mode 100644 src/capt.c create mode 100644 src/capt.h
-- Nikola
Vitezslav Samel vitezslav@samel.cz writes:
On Sun, Jul 13, 2014 at 06:02:19PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
why do we using now recvmsg, recvmmsg and mmap? Which one is faster? Please elaborate deeply, because you don't have useful commit message.
It's all about speed:
- in recvmsg() case there are 2 syscalls per packet (poll() and then recvmsg() gives us one packet);
- in recvmmsg() case there are 2 syscalls (poll() and then recvmmsg() can give us more packets if available);
- in mmaped case there is syscall only in case there's no packet (poll())
On my workloads going from recvmsg() to mmap-style receiver lowers number of dropped packets 100x (from ten thousands to hundreds). And this is still with one thread.
The packet capturing abstraction was chosen to have modular packet receiving techniques: in our case recvmsg(), recvmmsg() and mmap. recvmsg() is available (almost) always, recvmmsg() is available only in linux-2.6.34+ and glibc-2.12+ and mmap-style receiver can be turned off in the kernel. The capturing interface tries mmap-style first, then tries recvmmsg() (if configured in) and recvmsg() is the slowest fallback.
To be honest, it took me awhile to find out, what the patches all about. Now, I have better picture, that you want to implement *zero copy* for rx.
linux/Documentation/networking/packet_mmap.txt
Since commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP."), kills CONFIG_PACKET_MMAP and have enabled struct packet_ring_buffer rx_ring and tx_ring by default, the recvmmsg() becames not that interesting. I'm not saying, that we should not implement it, but I would rather go with mmap as default and recvmsg() as fallback. I haven't check if RHEL6 has CONFIG_PACKET_MMAP enabled, but it would not surprise me, if it has.
Since we are doing because of speeding things up, we should avoid *trying* like in recvmsg() and then do fallback to continue like in
[PATCH 2/3] capt.c: add capturing using recvmmsg()
and rather *doing* it. The trying things will became huge bottleneck and waste of time.
I'm still reading your code over and over and over. It's making more and more sense ;).
One way of implementing it as via weak functions, where weak functions are recv*() and strong are mmaped/whatever (choose during build). Another option is, that we will build all of them (recvmsg(), recvmmsg(), mmap(), ...) and set one as default, which one can be override via cmd like --recv recvmsg/recvmmsg/mmap/... (or enable/disable during build via [NO_]MMAP=YesPlease)
Or have it like in linux kernel. Have module_init/module_exit and pkt_ops struct, which will hold pointer functions like you have, some config file with
CONFIG_MMAP=y # for enabled # CONFIG_MMAP not set
and build it according to config.
I have been wondering how modules work in linux kernel and who the heck they can call static functions from modules via module_init/module_exit.
So what do you think?
On Wed, Jul 16, 2014 at 09:32:51PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
On Sun, Jul 13, 2014 at 06:02:19PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
why do we using now recvmsg, recvmmsg and mmap? Which one is faster? Please elaborate deeply, because you don't have useful commit message.
It's all about speed:
- in recvmsg() case there are 2 syscalls per packet (poll() and then recvmsg() gives us one packet);
- in recvmmsg() case there are 2 syscalls (poll() and then recvmmsg() can give us more packets if available);
- in mmaped case there is syscall only in case there's no packet (poll())
On my workloads going from recvmsg() to mmap-style receiver lowers number of dropped packets 100x (from ten thousands to hundreds). And this is still with one thread.
The packet capturing abstraction was chosen to have modular packet receiving techniques: in our case recvmsg(), recvmmsg() and mmap. recvmsg() is available (almost) always, recvmmsg() is available only in linux-2.6.34+ and glibc-2.12+ and mmap-style receiver can be turned off in the kernel. The capturing interface tries mmap-style first, then tries recvmmsg() (if configured in) and recvmsg() is the slowest fallback.
To be honest, it took me awhile to find out, what the patches all about. Now, I have better picture, that you want to implement *zero copy* for rx.
linux/Documentation/networking/packet_mmap.txt
Since commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP."), kills CONFIG_PACKET_MMAP and have enabled struct packet_ring_buffer rx_ring and tx_ring by default, the recvmmsg() becames not that interesting. I'm not
Didn't know about that.
saying, that we should not implement it, but I would rather go with mmap as default and recvmsg() as fallback. I haven't check if RHEL6 has CONFIG_PACKET_MMAP enabled, but it would not surprise me, if it has.
Since we are doing because of speeding things up, we should avoid *trying* like in recvmsg() and then do fallback to continue like in
[PATCH 2/3] capt.c: add capturing using recvmmsg()
and rather *doing* it. The trying things will became huge bottleneck and waste of time.
Trying is done only at packet capturing initialization; then only initialized capturing function is done without any trying in the fast path.
I'm still reading your code over and over and over. It's making more and more sense ;).
One way of implementing it as via weak functions, where weak functions are recv*() and strong are mmaped/whatever (choose during build).
I don't think weak functions are the best solution for us. The choice is done in compile time, but I want to make iptraf-ng versatile and to make the choice at run time.
Another option is, that we will build all of them (recvmsg(),
recvmmsg(), mmap(), ...) and set one as default, which one can be override via cmd like --recv recvmsg/recvmmsg/mmap/... (or enable/disable during build via [NO_]MMAP=YesPlease)
Could be. The default one would be mmap and can be overriden to something else. But ...
Or have it like in linux kernel. Have module_init/module_exit and pkt_ops struct, which will hold pointer functions like you have, some config file with
CONFIG_MMAP=y # for enabled # CONFIG_MMAP not set
and build it according to config.
I have been wondering how modules work in linux kernel and who the heck they can call static functions from modules via module_init/module_exit.
So what do you think?
... I think my approach is best: when initializing try the best one, then try others when the best one isn't available/buggy/... with recvmsg() as sane fallback/default. Then in hot path just use what was initialized.
What could be added is override from command line. Should I add it?
Cheers,
Vita
On Wed, Jul 16, 2014 at 12:41:39PM +0200, Vitezslav Samel wrote:
On Wed, Jul 16, 2014 at 09:32:51PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
On Sun, Jul 13, 2014 at 06:02:19PM +1200, Nikola Pajkovsky wrote:
Vitezslav Samel vitezslav@samel.cz writes:
Vitezslav Samel (3): introduce packet capturing abstraction capt.c: add capturing using recvmmsg() capt.c: add capturing using mmap()ed PACKET_RX_RING memory
why do we using now recvmsg, recvmmsg and mmap? Which one is faster? Please elaborate deeply, because you don't have useful commit message.
It's all about speed:
- in recvmsg() case there are 2 syscalls per packet (poll() and then recvmsg() gives us one packet);
- in recvmmsg() case there are 2 syscalls (poll() and then recvmmsg() can give us more packets if available);
- in mmaped case there is syscall only in case there's no packet (poll())
On my workloads going from recvmsg() to mmap-style receiver lowers number of dropped packets 100x (from ten thousands to hundreds). And this is still with one thread.
The packet capturing abstraction was chosen to have modular packet receiving techniques: in our case recvmsg(), recvmmsg() and mmap. recvmsg() is available (almost) always, recvmmsg() is available only in linux-2.6.34+ and glibc-2.12+ and mmap-style receiver can be turned off in the kernel. The capturing interface tries mmap-style first, then tries recvmmsg() (if configured in) and recvmsg() is the slowest fallback.
To be honest, it took me awhile to find out, what the patches all about. Now, I have better picture, that you want to implement *zero copy* for rx.
linux/Documentation/networking/packet_mmap.txt
Since commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP."), kills CONFIG_PACKET_MMAP and have enabled struct packet_ring_buffer rx_ring and tx_ring by default, the recvmmsg() becames not that interesting. I'm not
Didn't know about that.
saying, that we should not implement it, but I would rather go with mmap as default and recvmsg() as fallback. I haven't check if RHEL6 has CONFIG_PACKET_MMAP enabled, but it would not surprise me, if it has.
Since we are doing because of speeding things up, we should avoid *trying* like in recvmsg() and then do fallback to continue like in
[PATCH 2/3] capt.c: add capturing using recvmmsg()
and rather *doing* it. The trying things will became huge bottleneck and waste of time.
Trying is done only at packet capturing initialization; then only initialized capturing function is done without any trying in the fast path.
I'm still reading your code over and over and over. It's making more and more sense ;).
One way of implementing it as via weak functions, where weak functions are recv*() and strong are mmaped/whatever (choose during build).
I don't think weak functions are the best solution for us. The choice is done in compile time, but I want to make iptraf-ng versatile and to make the choice at run time.
Another option is, that we will build all of them (recvmsg(),
recvmmsg(), mmap(), ...) and set one as default, which one can be override via cmd like --recv recvmsg/recvmmsg/mmap/... (or enable/disable during build via [NO_]MMAP=YesPlease)
Could be. The default one would be mmap and can be overriden to something else. But ...
Or have it like in linux kernel. Have module_init/module_exit and pkt_ops struct, which will hold pointer functions like you have, some config file with
CONFIG_MMAP=y # for enabled # CONFIG_MMAP not set
and build it according to config.
I have been wondering how modules work in linux kernel and who the heck they can call static functions from modules via module_init/module_exit.
So what do you think?
... I think my approach is best: when initializing try the best one, then try others when the best one isn't available/buggy/... with recvmsg() as sane fallback/default. Then in hot path just use what was initialized.
What could be added is override from command line. Should I add it?
Ping?
Is there anything I should do to have this series included?
Cheers, Vita
On Thu, Aug 21, 2014 at 09:37:43AM +0200, Vitezslav Samel wrote:
... I think my approach is best: when initializing try the best one, then try others when the best one isn't available/buggy/... with recvmsg() as sane fallback/default. Then in hot path just use what was initialized.
What could be added is override from command line. Should I add it?
Ping?
Is there anything I should do to have this series included?
Ping 2?
Are you alive?
Cheers, Vita
iptraf-ng@lists.fedorahosted.org