vimer linux kernel 爱好者

xdp基本使用案例1

2019-03-02
xdp

前言

xdp作为一个新生事物,目前好多东西都在探索。

基本组成

这个https://github.com/xdp-project/xdp-tutorial/tree/master/basic01-xdp-pass为例,基本上eBPF的程序由两部分组成,一个位于内核,一个位于用户空间。

xdp_pass_kern.c:

#include <linux/bpf.h>
#include "bpf_helpers.h"

SEC("xdp")
int  xdp_prog_simple(struct xdp_md *ctx)
{
	return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

xdp_pass_user.c :

/* SPDX-License-Identifier: GPL-2.0 */
static const char *__doc__ = "Simple XDP prog doing XDP_PASS\n";

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>

#include <bpf/bpf.h>
#include <bpf/libbpf.h>

#include <net/if.h>
#include <linux/if_link.h> /* depend on kernel-headers installed */

#include "../common/common_params.h"

static const struct option_wrapper long_options[] = {
	{
		{"help",        no_argument,		NULL, 'h' },
		"Show help", false},

	{
		{"dev",         required_argument,	NULL, 'd' },
		"Operate on device <ifname>", "<ifname>", true},

	{
		{"skb-mode",    no_argument,		NULL, 'S' },
		 "Install XDP program in SKB (AKA generic) mode"},

	{
		{"native-mode", no_argument,		NULL, 'N' },
		"Install XDP program in native mode"},

	{
		{"auto-mode",   no_argument,		NULL, 'A' },
		"Auto-detect SKB or native mode"},

	{
		{"force",       no_argument,		NULL, 'F' },
		"Force install, replacing existing program on interface"},

	{
		{"unload",      no_argument,		NULL, 'U' },
		"Unload XDP program instead of loading"},

	{
		{0, 0, NULL,  0 }, NULL, false}
};

int load_bpf_object_file__simple(const char *filename)
{
	int first_prog_fd = -1;
	struct bpf_object *obj;
	int err;

	/* Use libbpf for extracting BPF byte-code from BPF-ELF object, and
	 * loading this into the kernel via bpf-syscall
	 */
	err = bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &first_prog_fd);
	if (err) {
		fprintf(stderr, "ERR: loading BPF-OBJ file(%s) (%d): %s\n",
			filename, err, strerror(-err));
		return -1;
	}

	/* Simply return the first program file descriptor.
	 * (Hint: This will get more advanced later)
	 */
	return first_prog_fd;
}

static int xdp_link_detach(int ifindex, __u32 xdp_flags)
{
	/* Next assignment this will move into ../common/
	 * (in more generic version)
	 */
	int err;

	if ((err = bpf_set_link_xdp_fd(ifindex, -1, xdp_flags)) < 0) {
		fprintf(stderr, "ERR: link set xdp unload failed (err=%d):%s\n",
			err, strerror(-err));
		return EXIT_FAIL_XDP;
	}
	return EXIT_OK;
}

int xdp_link_attach(int ifindex, __u32 xdp_flags, int prog_fd)
{
	/* Next assignment this will move into ../common/ */
	int err;

	/* libbpf provide the XDP net_device link-level hook attach helper */
	err = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
	if (err < 0) {
		fprintf(stderr, "ERR: "
			"ifindex(%d) link set xdp fd failed (%d): %s\n",
			ifindex, -err, strerror(-err));

		switch (-err) {
		case EBUSY:
			fprintf(stderr, "Hint: XDP already loaded on device"
				" use --force to swap/replace\n");
			break;
		case EOPNOTSUPP:
			fprintf(stderr, "Hint: Native-XDP not supported"
				" use --skb-mode or --auto-mode\n");
			break;
		default:
			break;
		}
		return EXIT_FAIL_XDP;
	}

	return EXIT_OK;
}

int main(int argc, char **argv)
{
	struct bpf_prog_info info = {};
	__u32 info_len = sizeof(info);
	char filename[256] = "xdp_pass_kern.o";
	int prog_fd, err;

	struct config cfg = {
		.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE,
		.ifindex   = -1,
		.do_unload = false,
	};

	parse_cmdline_args(argc, argv, long_options, &cfg, __doc__);
	/* Required option */
	if (cfg.ifindex == -1) {
		fprintf(stderr, "ERR: required option --dev missing\n");
		usage(argv[0], __doc__, long_options, (argc == 1));
		return EXIT_FAIL_OPTION;
	}
	if (cfg.do_unload)
		return xdp_link_detach(cfg.ifindex, cfg.xdp_flags);

	/* Load the BPF-ELF object file and get back first BPF_prog FD */
	prog_fd = load_bpf_object_file__simple(filename);
	if (prog_fd <= 0) {
		fprintf(stderr, "ERR: loading file: %s\n", filename);
		return EXIT_FAIL_BPF;
	}

	/* At this point: BPF-prog is (only) loaded by the kernel, and prog_fd
	 * is our file-descriptor handle. Next step is attaching this FD to a
	 * kernel hook point, in this case XDP net_device link-level hook.
	 * Fortunately libbpf have a helper for this:
	 */
	err = xdp_link_attach(cfg.ifindex, cfg.xdp_flags, prog_fd);
	if (err)
		return err;

        /* This step is not really needed , BPF-info via bpf-syscall */
	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
	if (err) {
		fprintf(stderr, "ERR: can't get prog info - %s\n",
			strerror(errno));
		return err;
	}

	printf("Success: Loading "
	       "XDP prog name:%s(id:%d) on device:%s(ifindex:%d)\n",
	       info.name, info.id, cfg.ifname, cfg.ifindex);
	return EXIT_OK;
}

Makefile:

XDP_TARGETS := xdp_pass_kern
USER_TARGETS := xdp_pass_user

LLC ?= llc
CLANG ?= clang
CC := gcc

LIBBPF_DIR = ../libbpf/src/
COMMON_DIR = ../common/

include $(COMMON_DIR)/common.mk
COMMON_OBJS := $(COMMON_DIR)/common_params.o

Makefile文件因为使用了libbpf库文件(或者说这个git仓库),思路都差不多的。

  1. 首先使用llvm编译xdp_pass_kern.o,这个文件是要注入到内核中去的,内核怎么保证这个代码的安全咱们这里无法展开详细说,怎么使用这个东西是咱们的重点。

  2. 加载。这里就是两种方式,第一种就是使用用户程序:xdp_pass_user.c,去生成可执行文件,具体来讲,The libbpf library provides both an ELF loader and several XDP helper functions.就是借助 libelf-dev库文件的某些机制去加载到内核。第二种方式就是使用iproute2命令.

BPF-ELF文件

Run: llvm-objdump -S xdp_pass_kern.o

xdp_pass_kern.o:	file format ELF64-BPF

Disassembly of section xdp:
xdp_prog_simple:
; {
       0:	b7 00 00 00 02 00 00 00 	r0 = 2
; return XDP_PASS;
       1:	95 00 00 00 00 00 00 00 	exit

## 用户空间加载 在libbpf/include/uapi/linux/bpf.h中,有以下定义。

	This is a mirror of bpf-next linux tree
(https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next)
bpf-next/tools/lib/bpf directory plus its supporting header files.

The following files will by sync'ed with bpf-next repo:
  src/ <-> bpf-next/tools/lib/bpf/
  include/uapi/linux/bpf_common.h <-> bpf-next/tools/include/uapi/linux/bpf_common.h
  include/uapi/linux/bpf.h <-> bpf-next/tools/include/uapi/linux/bpf.h
  include/uapi/linux/btf.h <-> bpf-next/tools/include/uapi/linux/btf.h
  include/uapi/linux/if_link.h <-> bpf-next/tools/include/uapi/linux/if_link.h
  include/uapi/linux/if_xdp.h <-> bpf-next/tools/include/uapi/linux/if_xdp.h
  include/uapi/linux/netlink.h <-> bpf-next/tools/include/uapi/linux/netlink.h
  include/tools/libc_compat.h <-> bpf-next/tools/include/tools/libc_compat.h

Other header files at this repo (include/linux/*.h) are reduced versions of
their counterpart files at bpf-next/tools/include/linux/*.h to make compilation
successful.

bpf_prog_info来自bpf.h

struct bpf_prog_info {
	__u32 type;
	__u32 id;
	__u8  tag[BPF_TAG_SIZE];
	__u32 jited_prog_len;
	__u32 xlated_prog_len;
	__aligned_u64 jited_prog_insns;
	__aligned_u64 xlated_prog_insns;
	__u64 load_time;	/* ns since boottime */
	__u32 created_by_uid;
	__u32 nr_map_ids;
	__aligned_u64 map_ids;
	char name[BPF_OBJ_NAME_LEN];
	__u32 ifindex;
	__u32 gpl_compatible:1;
	__u64 netns_dev;
	__u64 netns_ino;
	__u32 nr_jited_ksyms;
	__u32 nr_jited_func_lens;
	__aligned_u64 jited_ksyms;
	__aligned_u64 jited_func_lens;
	__u32 btf_id;
	__u32 func_info_rec_size;
	__aligned_u64 func_info;
	__u32 nr_func_info;
	__u32 nr_line_info;
	__aligned_u64 line_info;
	__aligned_u64 jited_line_info;
	__u32 nr_jited_line_info;
	__u32 line_info_rec_size;
	__u32 jited_line_info_rec_size;
	__u32 nr_prog_tags;
	__aligned_u64 prog_tags;
	__u64 run_time_ns;
	__u64 run_cnt;
} __attribute__((aligned(8)));

还有一个config来自哪里?来自common/common_defines.h

struct config {
	__u32 xdp_flags;
	int ifindex;
	char *ifname;
	char ifname_buf[IF_NAMESIZE];
	bool do_unload;
	char filename[512];
	char progsec[32];
};
parse_cmdline_args(argc, argv, long_options, &cfg, __doc__);

很明显来自一个库文件。

prog_fd = load_bpf_object_file__simple(filename);
/* At this point: BPF-prog is (only) loaded by the kernel, and prog_fd
 * is our file-descriptor handle. Next step is attaching this FD to a
 * kernel hook point, in this case XDP net_device link-level hook.
 * Fortunately libbpf have a helper for this:
 */

也许xdp可以指定在哪个设备上。

2 loading a program by name

The C structs corresponding to the libbpf objects are:

struct bpf_object
struct bpf_program
struct bpf_map

xdp_prog_kern.c:

#include <linux/bpf.h>
#include "bpf_helpers.h"

/* Notice how this XDP/BPF-program contains several programs in the same source
 * file. These will each get their own section in the ELF file, and via libbpf
 * they can be selected individually, and via their file-descriptor attached to
 * a given kernel BPF-hook.
 *
 * The libbpf bpf_object__find_program_by_title() refers to SEC names below.
 * The iproute2 utility also use section name.
 *
 * Slightly confusing, the names that gets listed by "bpftool prog" are the
 * C-function names (below the SEC define).
 */

SEC("xdp_pass")
int  xdp_pass_func(struct xdp_md *ctx)
{
	return XDP_PASS;
}

SEC("xdp_drop")
int  xdp_drop_func(struct xdp_md *ctx)
{
	return XDP_DROP;
}

/* Assignment#2: Add new XDP program section that use XDP_ABORTED */

char _license[] SEC("license") = "GPL";

/* Hint the avail XDP action return codes are:
enum xdp_action {
        XDP_ABORTED = 0,
        XDP_DROP,
        XDP_PASS,
        XDP_TX,
        XDP_REDIRECT,
};
*/

xdp_loader.c:

static const char *default_filename = "xdp_prog_kern.o";
static const char *default_progsec = "xdp_pass";

static const struct option_wrapper long_options[] = {
	{
		{"help",        no_argument,		NULL, 'h' },
	 "Show help", false},

	{
		{"dev",         required_argument,	NULL, 'd' },
	 "Operate on device <ifname>", "<ifname>", true},

	{
		{"skb-mode",    no_argument,		NULL, 'S' },
	 "Install XDP program in SKB (AKA generic) mode"},

	{
		{"native-mode", no_argument,		NULL, 'N' },
	 "Install XDP program in native mode"},

	{
		{"auto-mode",   no_argument,		NULL, 'A' },
	 "Auto-detect SKB or native mode"},

	{
		{"offload-mode",no_argument,		NULL, 3 },
	 "Hardware offload XDP program to NIC"},

	{
		{"force",       no_argument,		NULL, 'F' },
	 "Force install, replacing existing program on interface"},

	{
		{"unload",      no_argument,		NULL, 'U' },
	 "Unload XDP program instead of loading"},

	{
		{"quiet",       no_argument,		NULL, 'q' },
	 "Quiet mode (no output)"},

	{
		{"filename",    required_argument,	NULL,  1  },
	 "Load program from <file>", "<file>"},

	{
		{"progsec",    required_argument,	NULL,  2  },
	 "Load program in <section> of the ELF file", "<section>"},

	{
		{0, 0, NULL,  0 }, NULL, false}
};

/* Lesson#1: More advanced load_bpf_object_file and bpf_object */
struct bpf_object *__load_bpf_object_file(const char *filename, int ifindex)
{
	/* In next assignment this will be moved into ../common/ */
	int first_prog_fd = -1;
	struct bpf_object *obj;
	int err;

	/* Lesson#3: This struct allow us to set ifindex, this features is used
	 * for hardware offloading XDP programs.
	 */
	struct bpf_prog_load_attr prog_load_attr = {
		.prog_type	= BPF_PROG_TYPE_XDP,
		.ifindex	= ifindex,
	};
	prog_load_attr.file = filename;

	/* Use libbpf for extracting BPF byte-code from BPF-ELF object, and
	 * loading this into the kernel via bpf-syscall
	 */
	err = bpf_prog_load_xattr(&prog_load_attr, &obj, &first_prog_fd);
	if (err) {
		fprintf(stderr, "ERR: loading BPF-OBJ file(%s) (%d): %s\n",
			filename, err, strerror(-err));
		return NULL;
	}

	/* Notice how a pointer to a libbpf bpf_object is returned */
	return obj;
}

/* Lesson#2: This is a central piece of this lesson:
 * - Notice how BPF-ELF obj can have several programs
 * - Find by sec name via: bpf_object__find_program_by_title()
 */
struct bpf_object *__load_bpf_and_xdp_attach(struct config *cfg)
{
	/* In next assignment this will be moved into ../common/ */
	struct bpf_program *bpf_prog;
	struct bpf_object *bpf_obj;
	int offload_ifindex = 0;
	int prog_fd = -1;
	int err;

	/* If flags indicate hardware offload, supply ifindex */
	if (cfg->xdp_flags & XDP_FLAGS_HW_MODE)
		offload_ifindex = cfg->ifindex;

	/* Load the BPF-ELF object file and get back libbpf bpf_object */
	bpf_obj = __load_bpf_object_file(cfg->filename, offload_ifindex);
	if (!bpf_obj) {
		fprintf(stderr, "ERR: loading file: %s\n", cfg->filename);
		exit(EXIT_FAIL_BPF);
	}
	/* At this point: All XDP/BPF programs from the cfg->filename have been
	 * loaded into the kernel, and evaluated by the verifier. Only one of
	 * these gets attached to XDP hook, the others will get freed once this
	 * process exit.
	 */

	/* Find a matching BPF prog section name */
	bpf_prog = bpf_object__find_program_by_title(bpf_obj, cfg->progsec);
	if (!bpf_prog) {
		fprintf(stderr, "ERR: finding progsec: %s\n", cfg->progsec);
		exit(EXIT_FAIL_BPF);
	}

	prog_fd = bpf_program__fd(bpf_prog);
	if (prog_fd <= 0) {
		fprintf(stderr, "ERR: bpf_program__fd failed\n");
		exit(EXIT_FAIL_BPF);
	}

	/* At this point: BPF-progs are (only) loaded by the kernel, and prog_fd
	 * is our select file-descriptor handle. Next step is attaching this FD
	 * to a kernel hook point, in this case XDP net_device link-level hook.
	 */
	err = xdp_link_attach(cfg->ifindex, cfg->xdp_flags, prog_fd);
	if (err)
		exit(err);

	return bpf_obj;
}

static void list_avail_progs(struct bpf_object *obj)
{
	struct bpf_program *pos;

	printf("BPF object (%s) listing avail --progsec names\n",
	       bpf_object__name(obj));

	bpf_object__for_each_program(pos, obj) {
		if (bpf_program__is_xdp(pos))
			printf(" %s\n", bpf_program__title(pos, false));
	}
}

int main(int argc, char **argv)
{
	struct bpf_object *bpf_obj;

	struct config cfg = {
		.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE,
		.ifindex   = -1,
		.do_unload = false,
	};
	/* Set default BPF-ELF object file and BPF program name */
	strncpy(cfg.filename, default_filename, sizeof(cfg.filename));
	strncpy(cfg.progsec,  default_progsec,  sizeof(cfg.progsec));
	/* Cmdline options can change these */
	parse_cmdline_args(argc, argv, long_options, &cfg, __doc__);

	/* Required option */
	if (cfg.ifindex == -1) {
		fprintf(stderr, "ERR: required option --dev missing\n");
		usage(argv[0], __doc__, long_options, (argc == 1));
		return EXIT_FAIL_OPTION;
	}
	if (cfg.do_unload)
		return xdp_link_detach(cfg.ifindex, cfg.xdp_flags, 0);

	bpf_obj = __load_bpf_and_xdp_attach(&cfg);
	if (!bpf_obj)
		return EXIT_FAIL_BPF;

	if (verbose)
		list_avail_progs(bpf_obj);

	if (verbose) {
		printf("Success: Loaded BPF-object(%s) and used section(%s)\n",
		       cfg.filename, cfg.progsec);
		printf(" - XDP prog attached on device:%s(ifindex:%d)\n",
		       cfg.ifname, cfg.ifindex);
	}
	/* Other BPF section programs will get freed on exit */
	return EXIT_OK;
}

In xdp_loader.c : Basic bpf_object usage In xdp_loader.c the function __load_bpf_object_file() now returns a libbpf struct bpf_object pointer (while in the basic01 lesson we just returned the file descriptor to the first BPF prog).

The struct bpf_object represents an ELF object, which can be of various types.

the function __load_bpf_and_xdp_attach() uses bpf_object__find_program_by_title() on the bpf_object, which finds a program by the “SEC” name (not the C-function name). The return type is a struct bpf_program object, and we can use the function bpf_program__fd() for getting the file descriptor that we want to attach to the XDP hook.

Tutorial: Basic03 - counting with BPF maps

In this lesson we will only cover two simple maps types:

BPF_MAP_TYPE_ARRAY and BPF_MAP_TYPE_PERCPU_ARRAY.

xdp_prog_kern.c:

#include "common_kern_user.h" /* defines: struct datarec; */

/* Lesson#1: See how a map is defined.
 * - Here an array with XDP_ACTION_MAX (max_)entries are created.
 * - The idea is to keep stats per (enum) xdp_action
 */
struct bpf_map_def SEC("maps") xdp_stats_map = {
	.type        = BPF_MAP_TYPE_ARRAY,
	.key_size    = sizeof(__u32),
	.value_size  = sizeof(struct datarec),
	.max_entries = XDP_ACTION_MAX,
};

/* LLVM maps __sync_fetch_and_add() as a built-in function to the BPF atomic add
 * instruction (that is BPF_STX | BPF_XADD | BPF_W for word sizes)
 */
#ifndef lock_xadd
#define lock_xadd(ptr, val)	((void) __sync_fetch_and_add(ptr, val))
#endif

SEC("xdp_stats1")
int  xdp_stats1_func(struct xdp_md *ctx)
{
	// void *data_end = (void *)(long)ctx->data_end;
	// void *data     = (void *)(long)ctx->data;
	struct datarec *rec;
	__u32 key = XDP_PASS; /* XDP_PASS = 2 */

	/* Lookup in kernel BPF-side return pointer to actual data record */
	rec = bpf_map_lookup_elem(&xdp_stats_map, &key);
	/* BPF kernel-side verifier will reject program if the NULL pointer
	 * check isn't performed here. Even-though this is a static array where
	 * we know key lookup XDP_PASS always will succeed.
	 */
	if (!rec)
		return XDP_ABORTED;

	/* Multiple CPUs can access data record. Thus, the accounting needs to
	 * use an atomic operation.
	 */
	lock_xadd(&rec->rx_packets, 1);
        /* Assignment#1: Add byte counters
         * - Hint look at struct xdp_md *ctx (copied below)
         *
         * Assignment#3: Avoid the atomic operation
         * - Hint there is a map type named BPF_MAP_TYPE_PERCPU_ARRAY
         */

	return XDP_PASS;
}

char _license[] SEC("license") = "GPL";

/* Copied from: $KERNEL/include/uapi/linux/bpf.h
 *
 * User return codes for XDP prog type.
 * A valid XDP program must return one of these defined values. All other
 * return codes are reserved for future use. Unknown return codes will
 * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
 *
enum xdp_action {
	XDP_ABORTED = 0,
	XDP_DROP,
	XDP_PASS,
	XDP_TX,
	XDP_REDIRECT,
};
 * user accessible metadata for XDP packet hook
 * new fields must be added to the end of this structure
 *
struct xdp_md {
	// (Note: type __u32 is NOT the real-type)
	__u32 data;
	__u32 data_end;
	__u32 data_meta;
	// Below access go through struct xdp_rxq_info
	__u32 ingress_ifindex; // rxq->dev->ifindex
	__u32 rx_queue_index;  // rxq->queue_index
};
*/

定义一个map

在这个教程中,bpf_map_dev定义一个SEC(“map”).

struct bpf_map_def SEC("maps") xdp_stats_map = {
	.type        = BPF_MAP_TYPE_ARRAY,
	.key_size    = sizeof(__u32),
	.value_size  = sizeof(struct datarec),
	.max_entries = XDP_ACTION_MAX,
};

有关map的核心就在这里了


下一篇 gcc的一些用法

Comments

Content