The code tries to avoid as much as possible to use BPF
scratch memory areas, do maths or move data due to
performance reasons. Also, it's not possible to jump back.
The packet information is implict on the each branch taken.
The BPF code is quite long if all protocols are enabled, but
that shouldn't be a problem because the largest jump offset
~120 (goto vlan branch) is not close to socket_filter->k limit
of 256, it is expense as much as a jump to the next instruction
and the CPU should always take the shortest and quick path
possible for any scenario.
Signed-off-by: Flavio Leitner <fbl(a)redhat.com>
---
man/teamd.conf.5 | 21 ++
teamd/teamd_bpf_chef.c | 938 +++++++++++++++++++++++++++++++++++++++++++++---
teamd/teamd_bpf_chef.h | 30 +-
teamd/teamd_hash_func.c | 133 +++----
4 files changed, 965 insertions(+), 157 deletions(-)
diff --git a/man/teamd.conf.5 b/man/teamd.conf.5
index 24eb011..2453b8b 100644
--- a/man/teamd.conf.5
+++ b/man/teamd.conf.5
@@ -103,11 +103,32 @@ List of fragment types (strings) which should be used for packet Tx hash computa
.BR "eth "\(em
Uses source and destination MAC addresses.
.PP
+.BR "vlan "\(em
+Uses VLAN id.
+.PP
.BR "ipv4 "\(em
Uses source and destination IPv4 addresses.
.PP
.BR "ipv6 "\(em
Uses source and destination IPv6 addresses.
+.PP
+.BR "ip "\(em
+Uses source and destination IPv4 and IPv6 addresses.
+.PP
+.BR "l3 "\(em
+Uses source and destination IPv4 and IPv6 addresses.
+.PP
+.BR "tcp "\(em
+Uses source and destination TCP ports.
+.PP
+.BR "udp "\(em
+Uses source and destination UDP ports.
+.PP
+.BR "sctp "\(em
+Uses source and destination SCTP ports.
+.PP
+.BR "l4 "\(em
+Uses source and destination TCP and UDP and SCTP ports.
.RE
.TP
.BR "runner.tx_balancer.name " (string)
diff --git a/teamd/teamd_bpf_chef.c b/teamd/teamd_bpf_chef.c
index a38513f..c6a2429 100644
--- a/teamd/teamd_bpf_chef.c
+++ b/teamd/teamd_bpf_chef.c
@@ -19,10 +19,38 @@
#include <stdlib.h>
#include <errno.h>
+#include <stdbool.h>
#include <linux/filter.h>
#include "teamd_bpf_chef.h"
+/* protocol offsets */
+#define ETH_TYPE_OFFSET 12
+#define IPV4_FLAGS_OFFSET 20
+#define IPV4_PROTO_OFFSET 23
+#define IPV4_FRAG_BITS 0x1fff
+#define IPV6_NEXTHEADER_OFFSET 20
+
+
+/* protocol codes */
+#define PROTOID_IPV4 0x800
+#define PROTOID_IPV6 0x86dd
+#define PROTOID_TCP 0x6
+#define PROTOID_UDP 0x11
+#define PROTOID_SCTP 0x84
+
+/* jump stack flags */
+#define FIX_JT 0x1
+#define FIX_JF 0x2
+#define FIX_K 0x4
+
+
+#define VLAN_HEADER_SIZE 2
+static int vlan_hdr_shift(unsigned int offset)
+{
+ return offset + VLAN_HEADER_SIZE;
+}
+
static int __add_inst(struct sock_fprog *fprog, struct sock_filter *inst)
{
ssize_t newsize;
@@ -46,86 +74,888 @@ static int __add_inst(struct sock_fprog *fprog, struct sock_filter *inst)
goto err_add_inst; \
}
-static void __compile_init(struct sock_fprog *fprog)
+#define bpf_load_byte(pos) \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_B + BPF_ABS, pos))
+
+#define bpf_load_half(pos) \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_H + BPF_ABS, pos))
+
+#define bpf_load_word(pos) \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_W + BPF_ABS, pos))
+
+#define bpf_push_a() \
+ add_inst(fprog, BPF_STMT(BPF_ST, 0))
+
+#define bpf_push_x() \
+ add_inst(fprog, BPF_STMT(BPF_STX, 0))
+
+#define bpf_pop_x() \
+ add_inst(fprog, BPF_STMT(BPF_LDX + BPF_W + BPF_MEM, 0))
+
+#define bpf_calc_hash() \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_B + BPF_ABS, \
+ SKF_AD_OFF + SKF_AD_ALU_XOR_X))
+
+#define bpf_move_to_x() \
+ add_inst(fprog, BPF_STMT(BPF_MISC + BPF_TAX, 0));
+
+#define bpf_move_to_a() \
+ add_inst(fprog, BPF_STMT(BPF_MISC + BPF_TXA, 0));
+
+#define bpf_return_a() \
+ add_inst(fprog, BPF_STMT(BPF_RET + BPF_A, 0));
+
+#define bpf_cmp(jt, jf, k, flags) \
+ do { \
+ add_inst(fprog, BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, \
+ k, jt, jf)); \
+ push_addr(fprog, flags); \
+ } while (0)
+
+#define bpf_jump(k) \
+ do { \
+ add_inst(fprog, BPF_JUMP(BPF_JMP + BPF_JA, k, 0, 0)); \
+ push_addr(fprog, FIX_K); \
+ } while (0)
+
+
+#define bpf_and(jt, jf, k, flags) \
+ do { \
+ add_inst(fprog, BPF_JUMP(BPF_JMP + BPF_JSET + BPF_K, \
+ k, jt, jf)); \
+ push_addr(fprog, flags); \
+ } while (0)
+
+
+#define bpf_vlan_tag_present() \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_B + BPF_ABS, \
+ SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT))
+
+#define bpf_vlan_tag_id() \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_B + BPF_ABS, \
+ SKF_AD_OFF + SKF_AD_VLAN_TAG))
+
+#define bpf_ipv4_len_to_x(pos) \
+ add_inst(fprog, BPF_STMT(BPF_LDX + BPF_B + BPF_MSH, pos))
+
+#define bpf_l4v4_port_to_a(pos) \
+ add_inst(fprog, BPF_STMT(BPF_LD + BPF_H + BPF_IND, pos))
+
+#define bpf_hash_return() \
+ do { \
+ bpf_move_to_a(); \
+ bpf_return_a(); \
+ } while(0)
+
+
+enum bpf_labels {
+ LABEL_VLAN_BRANCH,
+ LABEL_NOVLAN_IPV6,
+ LABEL_NOVLAN_L4v4_OUT,
+ LABEL_NOVLAN_TRY_UDP4,
+ LABEL_NOVLAN_L4v4_HASH,
+ LABEL_NOVLAN_TRY_STCP4,
+ LABEL_NOVLAN_IPV6_CONTINUE,
+ LABEL_NOVLAN_TRY_UDP6,
+ LABEL_NOVLAN_L4v6_OUT,
+ LABEL_NOVLAN_L4v6_HASH,
+ LABEL_NOVLAN_TRY_STCP6,
+ LABEL_VLAN_IPV6,
+ LABEL_VLAN_L4v4_OUT,
+ LABEL_VLAN_TRY_UDP4,
+ LABEL_VLAN_L4v4_HASH,
+ LABEL_VLAN_TRY_STCP4,
+ LABEL_VLAN_IPV6_CONTINUE,
+ LABEL_VLAN_TRY_UDP6,
+ LABEL_VLAN_L4v6_OUT,
+ LABEL_VLAN_L4v6_HASH,
+ LABEL_VLAN_TRY_STCP6,
+};
+
+/* stack */
+struct stack_entry {
+ struct stack_entry *next;
+ unsigned int addr;
+ union {
+ enum bpf_labels label;
+ int flags;
+ };
+};
+
+static struct stack_entry *stack_labels;
+static struct stack_entry *stack_addrs;
+
+enum hashing_flags {
+ HASH_ETH,
+ HASH_VLAN,
+ HASH_VLAN_IPV4,
+ HASH_VLAN_IPV6,
+ HASH_VLAN_UDP4,
+ HASH_VLAN_TCP4,
+ HASH_VLAN_SCTP4,
+ HASH_VLAN_UDP6,
+ HASH_VLAN_TCP6,
+ HASH_VLAN_SCTP6,
+ HASH_NOVLAN_IPV4,
+ HASH_NOVLAN_IPV6,
+ HASH_NOVLAN_UDP4,
+ HASH_NOVLAN_TCP4,
+ HASH_NOVLAN_SCTP4,
+ HASH_NOVLAN_UDP6,
+ HASH_NOVLAN_TCP6,
+ HASH_NOVLAN_SCTP6,
+};
+
+struct hash_flags {
+ unsigned int required;
+ unsigned int built;
+};
+
+static struct hash_flags hflags;
+
+static void hash_flags_init(struct hash_flags *flags)
{
- fprog->len = 0;
- fprog->filter = NULL;
+ flags->required = 0;
+ flags->built = 0;
}
-void teamd_bpf_desc_compile_start(struct sock_fprog *fprog)
+static int hash_test_and_set_flag(struct hash_flags *flags,
+ enum hashing_flags hflag)
{
- __compile_init(fprog);
+ int flag = 1 << hflag;
+ int ret;
+
+ ret = 0;
+ if (flags->required & flag) {
+ flags->built |= flag;
+ ret = 1;
+ }
+
+ return ret;
}
-void teamd_bpf_desc_compile_release(struct sock_fprog *fprog)
+static int hash_is_complete(struct hash_flags *flags)
{
- free(fprog->filter);
- __compile_init(fprog);
+ return flags->built == flags->required;
}
-int teamd_bpf_desc_compile_finish(struct sock_fprog *fprog)
+static int hash_set_enable(struct hash_flags *flags, enum hashing_flags hflag)
+{
+ flags->required |= (1 << hflag);
+ return 0;
+}
+
+static int hash_is_enabled(struct hash_flags *flags, enum hashing_flags hflag)
+{
+ return (flags->required & (1 << hflag));
+}
+
+static int hash_is_l4v4_enabled(struct hash_flags *flags)
+{
+ if (hash_is_enabled(flags, HASH_NOVLAN_TCP4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_UDP4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_SCTP4) ||
+ hash_is_enabled(flags, HASH_VLAN_TCP4) ||
+ hash_is_enabled(flags, HASH_VLAN_UDP4) ||
+ hash_is_enabled(flags, HASH_VLAN_SCTP4))
+ return 1;
+
+ return 0;
+}
+
+static int hash_is_l4v6_enabled(struct hash_flags *flags)
+{
+ if (hash_is_enabled(flags, HASH_NOVLAN_TCP6) ||
+ hash_is_enabled(flags, HASH_NOVLAN_UDP6) ||
+ hash_is_enabled(flags, HASH_NOVLAN_SCTP6) ||
+ hash_is_enabled(flags, HASH_VLAN_TCP6) ||
+ hash_is_enabled(flags, HASH_VLAN_UDP6) ||
+ hash_is_enabled(flags, HASH_VLAN_SCTP6))
+ return 1;
+
+ return 0;
+}
+
+static int hash_is_novlan_l3l4_enabled(struct hash_flags *flags)
+{
+
+ if (hash_is_enabled(flags, HASH_NOVLAN_IPV4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_IPV6) ||
+ hash_is_enabled(flags, HASH_NOVLAN_TCP4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_UDP4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_SCTP4) ||
+ hash_is_enabled(flags, HASH_NOVLAN_TCP6) ||
+ hash_is_enabled(flags, HASH_NOVLAN_UDP6) ||
+ hash_is_enabled(flags, HASH_NOVLAN_SCTP6))
+ return 1;
+
+ return 0;
+}
+
+static void stack_init(void)
+{
+ stack_labels = NULL;
+ stack_addrs = NULL;
+}
+
+static void __stack_release(struct stack_entry *root)
+{
+ struct stack_entry *p;
+ struct stack_entry *pn;
+
+ p = root;
+ while (p) {
+ pn = p->next;
+ free(p);
+ p = pn;
+ }
+}
+
+static void stack_release(void)
+{
+ __stack_release(stack_labels);
+ __stack_release(stack_addrs);
+ stack_init();
+}
+
+static int push_addr(struct sock_fprog *fprog, int flags)
+{
+ struct stack_entry *pa;
+
+ pa = malloc(sizeof(struct stack_entry));
+ if (!pa)
+ return -ENOMEM;
+
+ pa->addr = fprog->len - 1;
+ pa->flags = flags;
+
+ pa->next = stack_addrs;
+ stack_addrs = pa;
+
+ return 0;
+}
+
+static struct stack_entry *__find_label(enum bpf_labels label)
+{
+ struct stack_entry *p;
+
+ p = stack_labels;
+ while (p) {
+ if (p->label == label)
+ return p;
+
+ p = p->next;
+ }
+
+ return NULL;
+}
+
+static int push_label(struct sock_fprog *fprog, enum bpf_labels label)
+{
+ struct stack_entry *pl;
+
+ if (__find_label(label))
+ return -EEXIST;
+
+ pl = malloc(sizeof(struct stack_entry));
+ if (!pl)
+ return -ENOMEM;
+
+ pl->addr = fprog->len;
+ pl->label = label;
+
+ pl->next = stack_labels;
+ stack_labels = pl;
+
+ return 0;
+}
+
+static int stack_resolve_offsets(struct sock_fprog *fprog)
+{
+ struct stack_entry *paddr;
+ struct stack_entry *naddr;
+ struct stack_entry *plabel;
+ struct stack_entry *nlabel;
+ struct sock_filter *sf;
+ int offset;
+
+ paddr = stack_addrs;
+ while (paddr) {
+ sf = fprog->filter + paddr->addr;
+
+ if (paddr->flags & ~(FIX_K|FIX_JT|FIX_JF))
+ return -EINVAL;
+
+ if (paddr->flags & FIX_K) {
+ plabel = __find_label(sf->k);
+ if (!plabel)
+ return -ENOENT;
+
+ offset = plabel->addr - paddr->addr - 1;
+ if (offset < 0 || offset > 255)
+ return -EINVAL;
+ sf->k = offset;
+ }
+
+ if (paddr->flags & FIX_JT) {
+ plabel = __find_label(sf->jt);
+ if (!plabel)
+ return -ENOENT;
+
+ offset = plabel->addr - paddr->addr - 1;
+ if (offset < 0 || offset > 255)
+ return -EINVAL;
+ sf->jt = offset;
+ }
+
+ if (paddr->flags & FIX_JF) {
+ plabel = __find_label(sf->jf);
+ if (!plabel)
+ return -ENOENT;
+
+ offset = plabel->addr - paddr->addr - 1;
+ if (offset < 0 || offset > 255)
+ return -EINVAL;
+ sf->jf = offset;
+ }
+
+ naddr = paddr->next;
+ free(paddr);
+ paddr = naddr;
+ }
+
+ plabel = stack_labels;
+ while (plabel) {
+ nlabel = plabel->next;
+ free(plabel);
+ plabel = nlabel;
+ }
+
+ stack_init();
+ return 0;
+}
+
+static int bpf_eth_hash(struct sock_fprog *fprog)
{
int err;
- /*
- * Return hash which is in X. Note that in case of no pattern match,
- * X will have value 0.
- */
- add_inst(fprog, BPF_STMT(BPF_MISC + BPF_TXA, 0));
- add_inst(fprog, BPF_STMT(BPF_RET + BPF_A, 0));
+ /* hash dest mac addr */
+ bpf_load_word(2);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_half(0);
+ bpf_calc_hash();
+ bpf_move_to_x();
+
+ /* hash source mac addr */
+ bpf_load_word(8);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_half(6);
+ bpf_calc_hash();
+ bpf_move_to_x();
return 0;
+
err_add_inst:
return err;
}
-int teamd_bpf_desc_compile_frag(struct sock_fprog *fprog,
- const struct teamd_bpf_desc_frag *frag)
+static int bpf_vlan_hash(struct sock_fprog *fprog)
{
int err;
- int i;
- unsigned short start_index = fprog->len;
-
- /*
- * Put pattern matching first. Patterns are checked sequentially,
- * if one pattern match fails, jump to the end is going to be done.
- * Note that end of frag is not known atm so put __JMP_END there for
- * now. Last loop in this function will correct this.
- */
-#define __JMP_END 0xFF
-
- for (i = 0; i < frag->pattern_count; i++) {
- const struct teamd_bpf_pattern *pattern = &frag->pattern[i];
-
- add_inst(fprog, BPF_STMT(BPF_LD + pattern->type + BPF_ABS,
- pattern->offset));
- add_inst(fprog, BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K,
- pattern->value, 0, __JMP_END));
- }
- /*
- * If patterns matches (no JUMP_TO_END), compute hash from specified
- * memory fields. Store the hash in X.
- */
- for (i = 0; i < frag->hash_field_count; i++) {
- const struct teamd_bpf_hash_field *hfield = &frag->hash_field[i];
-
- add_inst(fprog, BPF_STMT(BPF_LD + hfield->type + BPF_ABS,
- hfield->offset));
- add_inst(fprog, BPF_STMT(BPF_LD + BPF_B + BPF_ABS,
- SKF_AD_OFF + SKF_AD_ALU_XOR_X));
- add_inst(fprog, BPF_STMT(BPF_MISC + BPF_TAX, 0));
+ bpf_vlan_tag_id();
+ bpf_calc_hash();
+ bpf_move_to_x();
+
+err_add_inst:
+ return err;
+}
+
+static int __bpf_ipv4_hash(struct sock_fprog *fprog, bool vlan)
+{
+ int vlan_shift = vlan ? vlan_hdr_shift(0) : 0;
+ int err;
+
+ bpf_load_word(26 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(30 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ return 0;
+
+err_add_inst:
+ return err;
+}
+
+static int bpf_vlan_ipv4_hash(struct sock_fprog *fprog)
+{
+ return __bpf_ipv4_hash(fprog, true);
+}
+
+static int bpf_novlan_ipv4_hash(struct sock_fprog *fprog)
+{
+ return __bpf_ipv4_hash(fprog, false);
+}
+
+static int __bpf_ipv6_hash(struct sock_fprog *fprog, bool vlan)
+{
+ int vlan_shift = vlan ? vlan_hdr_shift(0) : 0;
+ int err;
+
+ bpf_load_word(22 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(26 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(30 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(34 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(38 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(42 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(46 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ bpf_load_word(50 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ return 0;
+
+err_add_inst:
+ return err;
+}
+
+static int bpf_vlan_ipv6_hash(struct sock_fprog *fprog)
+{
+ return __bpf_ipv6_hash(fprog, true);
+}
+
+static int bpf_novlan_ipv6_hash(struct sock_fprog *fprog)
+{
+ return __bpf_ipv6_hash(fprog, false);
+}
+
+static int __bpf_l4v4_hash(struct sock_fprog *fprog, bool vlan)
+{
+ int vlan_shift = vlan ? vlan_hdr_shift(0) : 0;
+ int err;
+
+ bpf_push_x();
+ bpf_ipv4_len_to_x(14 + vlan_shift);
+ /* source port offset */
+ bpf_l4v4_port_to_a(14 + vlan_shift);
+ bpf_pop_x();
+ bpf_calc_hash();
+ bpf_push_a();
+ bpf_ipv4_len_to_x(14 + vlan_shift);
+ /* dest port offset */
+ bpf_l4v4_port_to_a(16 + vlan_shift);
+ bpf_pop_x();
+ bpf_calc_hash();
+ bpf_move_to_x();
+ return 0;
+
+err_add_inst:
+ return err;
+}
+
+static int bpf_vlan_l4v4_hash(struct sock_fprog *fprog)
+{
+ return __bpf_l4v4_hash(fprog, true);
+}
+
+static int bpf_novlan_l4v4_hash(struct sock_fprog *fprog)
+{
+ return __bpf_l4v4_hash(fprog, false);
+}
+
+static int __bpf_l4v6_hash(struct sock_fprog *fprog, bool vlan)
+{
+ int vlan_shift = vlan ? vlan_hdr_shift(0) : 0;
+ int err;
+
+ bpf_load_half(54 + vlan_shift);
+ bpf_calc_hash();
+ bpf_load_half(56 + vlan_shift);
+ bpf_calc_hash();
+ bpf_move_to_x();
+ return 0;
+
+err_add_inst:
+ return err;
+}
+
+static int bpf_vlan_l4v6_hash(struct sock_fprog *fprog)
+{
+ return __bpf_l4v6_hash(fprog, true);
+}
+
+static int bpf_novlan_l4v6_hash(struct sock_fprog *fprog)
+{
+ return __bpf_l4v6_hash(fprog, false);
+}
+
+/* bpf_create_code:
+ * This function creates the entire bpf hashing code and follows
+ * this scheme:
+ *
+ * start
+ * V
+ * handle ethernet
+ * V
+ * check vlan tag presense
+ * |yes | no
+ * handle |
+ * vlan |
+ * V V
+ * +----- ipv4/ipv6? +-- ipv4/ipv6?--+
+ * |ipv4 |ipv6 | ipv4 |ipv6
+ * +--frag?-+ | +-frag?-+ |
+ * |yes |no | |yes |no |
+ * return V V return V V
+ * handle handle handle handle
+ * ipv4 ipv6 ipv4 ipv6
+ * | | | |
+ * proto? proto? proto? proto?
+ *
+ * for each branch above:
+ * |tcp |udp |sctp |none
+ * | | | |
+ * handle |
+ * return <------+
+ */
+static int bpf_create_code(struct sock_fprog *fprog, struct hash_flags *flags)
+{
+ int err;
+
+ /* generate the ethernet hashing code */
+ if (hash_test_and_set_flag(flags, HASH_ETH))
+ bpf_eth_hash(fprog);
+
+ if (hash_is_complete(flags)) {
+ bpf_hash_return();
+ /* there is no need to keep going, all done */
+ return 0;
}
- /* Correct jump offsets */
- for (i = start_index; i < fprog->len; i++) {
- struct sock_filter *filter = &fprog->filter[i];
+ bpf_vlan_tag_present();
+ bpf_cmp(0, LABEL_VLAN_BRANCH, 0, FIX_JF);
+ if (!hash_is_novlan_l3l4_enabled(flags))
+ bpf_hash_return();
+
+ /* no vlan branch */
+ bpf_load_half(ETH_TYPE_OFFSET);
+ bpf_cmp(0, LABEL_NOVLAN_IPV6, PROTOID_IPV4, FIX_JF);
+
+ /* no vlan ipv4 branch */
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_IPV4))
+ bpf_novlan_ipv4_hash(fprog);
+
+ if (!hash_is_l4v4_enabled(flags))
+ bpf_hash_return();
+
+ /* no vlan ipv4 L4 */
+ /* ignore IP frags */
+ bpf_load_half(IPV4_FLAGS_OFFSET);
+ bpf_and(LABEL_NOVLAN_L4v4_OUT, 0, IPV4_FRAG_BITS, FIX_JT);
+
+ /* L4 protocol check */
+ bpf_load_byte(IPV4_PROTO_OFFSET);
+ bpf_cmp(0, LABEL_NOVLAN_TRY_UDP4, PROTOID_TCP, FIX_JF);
+
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_TCP4))
+ bpf_jump(LABEL_NOVLAN_L4v4_HASH);
+ else
+ bpf_jump(LABEL_NOVLAN_L4v4_OUT);
+
+ /* no vlan try udp4 */
+ push_label(fprog, LABEL_NOVLAN_TRY_UDP4);
+ bpf_cmp(0, LABEL_NOVLAN_TRY_STCP4, PROTOID_UDP, FIX_JF);
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_UDP4))
+ bpf_jump(LABEL_NOVLAN_L4v4_HASH);
+ else
+ bpf_jump(LABEL_NOVLAN_L4v4_OUT);
+
+ /* no vlan try sctp4 */
+ push_label(fprog, LABEL_NOVLAN_TRY_STCP4);
+ bpf_cmp(0, LABEL_NOVLAN_L4v4_OUT, PROTOID_SCTP, FIX_JF);
+ if (!hash_test_and_set_flag(flags, HASH_NOVLAN_SCTP4))
+ bpf_jump(LABEL_NOVLAN_L4v4_OUT);
+
+ /* no vlan L4v4 hashing */
+ push_label(fprog, LABEL_NOVLAN_L4v4_HASH);
+ bpf_novlan_l4v4_hash(fprog);
+
+ /* no vlan L4v4 out: */
+ push_label(fprog, LABEL_NOVLAN_L4v4_OUT);
+ bpf_hash_return();
+
+ /* no vlan ipv6 branch */
+ push_label(fprog, LABEL_NOVLAN_IPV6);
+ bpf_cmp(LABEL_NOVLAN_IPV6_CONTINUE, 0, PROTOID_IPV6, FIX_JT);
+ bpf_hash_return();
- if (filter->code == BPF_JMP + BPF_JEQ + BPF_K &&
- filter->jf == __JMP_END)
- filter->jf = fprog->len - i - 1;
+ /* no vlan ipv6 continue */
+ push_label(fprog, LABEL_NOVLAN_IPV6_CONTINUE);
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_IPV6))
+ bpf_novlan_ipv6_hash(fprog);
+
+ if (!hash_is_l4v6_enabled(flags))
+ bpf_hash_return();
+
+ /* no vlan ipv6 l4 branch */
+ /* L4 protocol check (Next Header) */
+ bpf_load_byte(IPV6_NEXTHEADER_OFFSET);
+ bpf_cmp(0, LABEL_NOVLAN_TRY_UDP6, PROTOID_TCP, FIX_JF);
+
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_TCP6))
+ bpf_jump(LABEL_NOVLAN_L4v6_HASH);
+ else
+ bpf_jump(LABEL_NOVLAN_L4v6_OUT);
+
+ /* no vlan check udp6 */
+ push_label(fprog, LABEL_NOVLAN_TRY_UDP6);
+ bpf_cmp(0, LABEL_NOVLAN_TRY_STCP6, PROTOID_UDP, FIX_JF);
+ if (hash_test_and_set_flag(flags, HASH_NOVLAN_UDP6))
+ bpf_jump(LABEL_NOVLAN_L4v6_HASH);
+ else
+ bpf_jump(LABEL_NOVLAN_L4v6_OUT);
+
+ /* no vlan check sctp6 */
+ push_label(fprog, LABEL_NOVLAN_TRY_STCP6);
+ bpf_cmp(0, LABEL_NOVLAN_L4v6_OUT, PROTOID_SCTP, FIX_JF);
+ if (!hash_test_and_set_flag(flags, HASH_NOVLAN_SCTP4))
+ bpf_jump(LABEL_NOVLAN_L4v6_OUT);
+
+ /* no vlan l4v6 hashing */
+ push_label(fprog, LABEL_NOVLAN_L4v6_HASH);
+ bpf_novlan_l4v6_hash(fprog);
+
+ /* no vlan l4v6 out */
+ push_label(fprog, LABEL_NOVLAN_L4v6_OUT);
+ bpf_hash_return();
+
+ /* vlan branch */
+ push_label(fprog, LABEL_VLAN_BRANCH);
+ if (hash_test_and_set_flag(flags, HASH_VLAN))
+ bpf_vlan_hash(fprog);
+
+ if (hash_is_complete(flags)) {
+ bpf_hash_return();
+ return 0;
}
+ bpf_load_half(vlan_hdr_shift(ETH_TYPE_OFFSET));
+ bpf_cmp(0, LABEL_VLAN_IPV6, PROTOID_IPV4, FIX_JF);
+ /* vlan ipv4 branch */
+ if (hash_test_and_set_flag(flags, HASH_VLAN_IPV4))
+ bpf_vlan_ipv4_hash(fprog);
+
+ if (!hash_is_l4v4_enabled(flags))
+ bpf_hash_return();
+
+ /* vlan ipv4 L4 */
+ /* ignore IP frags */
+ bpf_load_half(vlan_hdr_shift(IPV4_FLAGS_OFFSET));
+ bpf_and(LABEL_VLAN_L4v4_OUT, 0, IPV4_FRAG_BITS, FIX_JT);
+
+ /* L4 protocol check */
+ bpf_load_byte(vlan_hdr_shift(IPV4_PROTO_OFFSET));
+ bpf_cmp(0, LABEL_VLAN_TRY_UDP4, PROTOID_TCP, FIX_JF);
+
+ if (hash_test_and_set_flag(flags, HASH_VLAN_TCP4))
+ bpf_jump(LABEL_VLAN_L4v4_HASH);
+ else
+ bpf_jump(LABEL_VLAN_L4v4_OUT);
+
+ /* vlan try udp4 */
+ push_label(fprog, LABEL_VLAN_TRY_UDP4);
+ bpf_cmp(0, LABEL_VLAN_TRY_STCP4, PROTOID_UDP, FIX_JF);
+ if (hash_test_and_set_flag(flags, HASH_VLAN_UDP4))
+ bpf_jump(LABEL_VLAN_L4v4_HASH);
+ else
+ bpf_jump(LABEL_VLAN_L4v4_OUT);
+
+ /* vlan try sctp4 */
+ push_label(fprog, LABEL_VLAN_TRY_STCP4);
+ bpf_cmp(0, LABEL_VLAN_L4v4_OUT, PROTOID_SCTP, FIX_JF);
+ if (!hash_test_and_set_flag(flags, HASH_VLAN_SCTP4))
+ bpf_jump(LABEL_VLAN_L4v4_OUT);
+
+ /* vlan L4v4 hashing */
+ push_label(fprog, LABEL_VLAN_L4v4_HASH);
+ bpf_vlan_l4v4_hash(fprog);
+
+ /* vlan L4v4 out: */
+ push_label(fprog, LABEL_VLAN_L4v4_OUT);
+ bpf_hash_return();
+
+ /* vlan ipv6 branch */
+ push_label(fprog, LABEL_VLAN_IPV6);
+ bpf_cmp(LABEL_VLAN_IPV6_CONTINUE, 0, PROTOID_IPV6, FIX_JT);
+ bpf_hash_return();
+
+ /* vlan ipv6 continue */
+ push_label(fprog, LABEL_VLAN_IPV6_CONTINUE);
+ if (hash_test_and_set_flag(flags, HASH_VLAN_IPV6))
+ bpf_vlan_ipv6_hash(fprog);
+
+ if (!hash_is_l4v6_enabled(flags))
+ bpf_hash_return();
+
+ /* vlan ipv6 l4 branch */
+ /* L4 protocol check (Next Header) */
+ bpf_load_byte(vlan_hdr_shift(IPV6_NEXTHEADER_OFFSET));
+ bpf_cmp(0, LABEL_VLAN_TRY_UDP6, PROTOID_TCP, FIX_JF);
+
+ if (hash_test_and_set_flag(flags, HASH_VLAN_TCP6))
+ bpf_jump(LABEL_VLAN_L4v6_HASH);
+ else
+ bpf_jump(LABEL_VLAN_L4v6_OUT);
+
+ /* vlan check udp6 */
+ push_label(fprog, LABEL_VLAN_TRY_UDP6);
+ bpf_cmp(0, LABEL_VLAN_TRY_STCP6, PROTOID_UDP, FIX_JF);
+ if (hash_test_and_set_flag(flags, HASH_VLAN_UDP6))
+ bpf_jump(LABEL_VLAN_L4v6_HASH);
+ else
+ bpf_jump(LABEL_VLAN_L4v6_OUT);
+
+ /* vlan check sctp6 */
+ push_label(fprog, LABEL_VLAN_TRY_STCP6);
+ bpf_cmp(0, LABEL_VLAN_L4v6_OUT, PROTOID_SCTP, FIX_JF);
+ if (!hash_test_and_set_flag(flags, HASH_VLAN_SCTP4))
+ bpf_jump(LABEL_VLAN_L4v6_OUT);
+
+ /* vlan l4v6 hashing */
+ push_label(fprog, LABEL_VLAN_L4v6_HASH);
+ bpf_vlan_l4v6_hash(fprog);
+
+ /* vlan l4v6 out */
+ push_label(fprog, LABEL_VLAN_L4v6_OUT);
+ bpf_hash_return();
return 0;
+
err_add_inst:
return err;
}
+
+int teamd_bpf_desc_add_frag(struct sock_fprog *fprog,
+ const struct teamd_bpf_desc_frag *frag)
+{
+ switch (frag->hproto) {
+ case PROTO_ETH:
+ hash_set_enable(&hflags, HASH_ETH);
+ break;
+
+ case PROTO_VLAN:
+ hash_set_enable(&hflags, HASH_VLAN);
+ break;
+
+ case PROTO_IP:
+ case PROTO_L3:
+ hash_set_enable(&hflags, HASH_VLAN_IPV4);
+ hash_set_enable(&hflags, HASH_VLAN_IPV6);
+ hash_set_enable(&hflags, HASH_NOVLAN_IPV4);
+ hash_set_enable(&hflags, HASH_NOVLAN_IPV6);
+ break;
+
+ case PROTO_IPV4:
+ hash_set_enable(&hflags, HASH_VLAN_IPV4);
+ hash_set_enable(&hflags, HASH_NOVLAN_IPV4);
+ break;
+
+ case PROTO_IPV6:
+ hash_set_enable(&hflags, HASH_VLAN_IPV6);
+ hash_set_enable(&hflags, HASH_NOVLAN_IPV6);
+ break;
+
+ case PROTO_L4:
+ hash_set_enable(&hflags, HASH_VLAN_TCP4);
+ hash_set_enable(&hflags, HASH_VLAN_TCP6);
+ hash_set_enable(&hflags, HASH_VLAN_UDP4);
+ hash_set_enable(&hflags, HASH_VLAN_UDP6);
+ hash_set_enable(&hflags, HASH_VLAN_SCTP4);
+ hash_set_enable(&hflags, HASH_VLAN_SCTP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_TCP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_TCP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_UDP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_UDP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_SCTP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_SCTP6);
+ break;
+
+ case PROTO_TCP:
+ hash_set_enable(&hflags, HASH_VLAN_TCP4);
+ hash_set_enable(&hflags, HASH_VLAN_TCP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_TCP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_TCP6);
+ break;
+
+ case PROTO_UDP:
+ hash_set_enable(&hflags, HASH_VLAN_UDP4);
+ hash_set_enable(&hflags, HASH_VLAN_UDP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_UDP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_UDP6);
+ break;
+
+ case PROTO_SCTP:
+ hash_set_enable(&hflags, HASH_VLAN_SCTP4);
+ hash_set_enable(&hflags, HASH_VLAN_SCTP6);
+ hash_set_enable(&hflags, HASH_NOVLAN_SCTP4);
+ hash_set_enable(&hflags, HASH_NOVLAN_SCTP6);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void __compile_init(struct sock_fprog *fprog)
+{
+ fprog->len = 0;
+ fprog->filter = NULL;
+ stack_init();
+ hash_flags_init(&hflags);
+}
+
+void teamd_bpf_desc_compile_start(struct sock_fprog *fprog)
+{
+ __compile_init(fprog);
+}
+
+void teamd_bpf_desc_compile_release(struct sock_fprog *fprog)
+{
+ free(fprog->filter);
+ __compile_init(fprog);
+ stack_release();
+ hash_flags_init(&hflags);
+}
+
+int teamd_bpf_desc_compile_finish(struct sock_fprog *fprog)
+{
+ return 0;
+}
+
+int teamd_bpf_desc_compile(struct sock_fprog *fprog)
+{
+ int err;
+
+ err = bpf_create_code(fprog, &hflags);
+ if (err)
+ return err;
+
+ err = stack_resolve_offsets(fprog);
+ return err;
+}
diff --git a/teamd/teamd_bpf_chef.h b/teamd/teamd_bpf_chef.h
index f86405c..3b7d373 100644
--- a/teamd/teamd_bpf_chef.h
+++ b/teamd/teamd_bpf_chef.h
@@ -23,15 +23,17 @@
#include <stdint.h>
#include <sys/types.h>
-struct teamd_bpf_pattern {
- size_t offset;
- uint8_t type; /* BPF_B / BPF_H / BPF_W */
- uint32_t value;
-};
-
-struct teamd_bpf_hash_field {
- size_t offset; /* offset of element in header */
- uint8_t type; /* BPF_B / BPF_H / BPF_W */
+enum hashing_protos {
+ PROTO_ETH,
+ PROTO_VLAN,
+ PROTO_IP,
+ PROTO_IPV4,
+ PROTO_IPV6,
+ PROTO_L3,
+ PROTO_TCP,
+ PROTO_UDP,
+ PROTO_SCTP,
+ PROTO_L4,
};
/*
@@ -41,16 +43,14 @@ struct teamd_bpf_hash_field {
*/
struct teamd_bpf_desc_frag {
char * name;
- const struct teamd_bpf_pattern * pattern;
- unsigned int pattern_count;
- const struct teamd_bpf_hash_field * hash_field;
- unsigned int hash_field_count;
+ enum hashing_protos hproto;
};
void teamd_bpf_desc_compile_start(struct sock_fprog *fprog);
void teamd_bpf_desc_compile_release(struct sock_fprog *fprog);
+int teamd_bpf_desc_compile(struct sock_fprog *fprog);
int teamd_bpf_desc_compile_finish(struct sock_fprog *fprog);
-int teamd_bpf_desc_compile_frag(struct sock_fprog *fprog,
- const struct teamd_bpf_desc_frag *frag);
+int teamd_bpf_desc_add_frag(struct sock_fprog *fprog,
+ const struct teamd_bpf_desc_frag *frag);
#endif /* _TEAMD_BPF_CHEF_H_ */
diff --git a/teamd/teamd_hash_func.c b/teamd/teamd_hash_func.c
index ff4094a..e7d57ae 100644
--- a/teamd/teamd_hash_func.c
+++ b/teamd/teamd_hash_func.c
@@ -26,113 +26,65 @@
#include "teamd_config.h"
#include "teamd_bpf_chef.h"
-static const struct teamd_bpf_hash_field eth_hdr_hash_field[] = {
- { /* First 4 bytes of src addr */
- .offset = 0,
- .type = BPF_W,
- },
- { /* Last 2 bytes of src addr */
- .offset = 4,
- .type = BPF_H,
- },
- { /* First 4 bytes of dst addr */
- .offset = 6,
- .type = BPF_W,
- },
- { /* Last 2 bytes of dst addr */
- .offset = 10,
- .type = BPF_H,
- },
-};
-
static const struct teamd_bpf_desc_frag eth_hdr_frag = {
.name = "eth",
- .hash_field = eth_hdr_hash_field,
- .hash_field_count = ARRAY_SIZE(eth_hdr_hash_field),
+ .hproto = PROTO_ETH,
};
-static const struct teamd_bpf_pattern ipv4_hdr_pattern[] = {
- { /* type IPv4 */
- .offset = 12,
- .type = BPF_H,
- .value = 0x0800,
- },
-};
-
-static const struct teamd_bpf_hash_field ipv4_hdr_hash_field[] = {
- { /* 4 bytes of src addr */
- .offset = 26,
- .type = BPF_W,
- },
- { /* 4 bytes of dst addr */
- .offset = 30,
- .type = BPF_W,
- },
+static const struct teamd_bpf_desc_frag vlan_hdr_frag = {
+ .name = "vlan",
+ .hproto = PROTO_VLAN,
};
static const struct teamd_bpf_desc_frag ipv4_hdr_frag = {
.name = "ipv4",
- .pattern = ipv4_hdr_pattern,
- .pattern_count = ARRAY_SIZE(ipv4_hdr_pattern),
- .hash_field = ipv4_hdr_hash_field,
- .hash_field_count = ARRAY_SIZE(ipv4_hdr_hash_field),
+ .hproto = PROTO_IPV4,
};
-static const struct teamd_bpf_pattern ipv6_hdr_pattern[] = {
- { /* type IPv6 */
- .offset = 12,
- .type = BPF_H,
- .value = 0x86dd,
- },
+static const struct teamd_bpf_desc_frag ipv6_hdr_frag = {
+ .name = "ipv6",
+ .hproto = PROTO_IPV6,
};
-static const struct teamd_bpf_hash_field ipv6_hdr_hash_field[] = {
- { /* first 4 bytes of src addr */
- .offset = 22,
- .type = BPF_W,
- },
- { /* second 4 bytes of src addr */
- .offset = 26,
- .type = BPF_W,
- },
- { /* third 4 bytes of src addr */
- .offset = 30,
- .type = BPF_W,
- },
- { /* fourth 4 bytes of src addr */
- .offset = 34,
- .type = BPF_W,
- },
- { /* first 4 bytes of dst addr */
- .offset = 38,
- .type = BPF_W,
- },
- { /* second 4 bytes of dst addr */
- .offset = 42,
- .type = BPF_W,
- },
- { /* third 4 bytes of dst addr */
- .offset = 46,
- .type = BPF_W,
- },
- { /* fourth 4 bytes of dst addr */
- .offset = 50,
- .type = BPF_W,
- },
+static const struct teamd_bpf_desc_frag ip_hdr_frag = {
+ .name = "ip",
+ .hproto = PROTO_IP,
};
-static const struct teamd_bpf_desc_frag ipv6_hdr_frag = {
- .name = "ipv6",
- .pattern = ipv6_hdr_pattern,
- .pattern_count = ARRAY_SIZE(ipv6_hdr_pattern),
- .hash_field = ipv6_hdr_hash_field,
- .hash_field_count = ARRAY_SIZE(ipv6_hdr_hash_field),
+static const struct teamd_bpf_desc_frag l3_hdr_frag = {
+ .name = "l3",
+ .hproto = PROTO_L3,
+};
+
+static const struct teamd_bpf_desc_frag l4_hdr_frag = {
+ .name = "l4",
+ .hproto = PROTO_L4,
+};
+
+static const struct teamd_bpf_desc_frag tcp_hdr_frag = {
+ .name = "tcp",
+ .hproto = PROTO_TCP,
+};
+static const struct teamd_bpf_desc_frag udp_hdr_frag = {
+ .name = "udp",
+ .hproto = PROTO_UDP,
+};
+static const struct teamd_bpf_desc_frag sctp_hdr_frag = {
+ .name = "sctp",
+ .hproto = PROTO_SCTP,
};
static const struct teamd_bpf_desc_frag *frags[] = {
ð_hdr_frag,
+ &vlan_hdr_frag,
&ipv4_hdr_frag,
&ipv6_hdr_frag,
+ &ip_hdr_frag,
+ &l3_hdr_frag,
+ &l4_hdr_frag,
+ &tcp_hdr_frag,
+ &udp_hdr_frag,
+ &sctp_hdr_frag,
};
static const size_t frags_count = ARRAY_SIZE(frags);
@@ -169,10 +121,15 @@ static int teamd_hash_func_init(struct teamd_context *ctx, struct sock_fprog *fp
frag_name);
continue;
}
- err = teamd_bpf_desc_compile_frag(fprog, frag);
+ err = teamd_bpf_desc_add_frag(fprog, frag);
if (err)
goto release;
}
+
+ err = teamd_bpf_desc_compile(fprog);
+ if (err)
+ goto release;
+
err = teamd_bpf_desc_compile_finish(fprog);
if (err)
goto release;
--
1.8.1.4