vimer linux kernel 爱好者

icmp分析

2017-05-06

ICMP是layer 4协议,用户可以使用icmp来得到自己想要的东西。大名鼎鼎的ping就是利用ICMP的api写成的。

这个协议主要是用来处理(L3)层的错误信息与控制信息的,

ICMPv4

这里主要是两类: 错误报文和消息报文。著名的ip(来自于iputils包)就是打开 raw socket并且发送一个ICMP_ECHO报文、产生ICMP_REPLY报文作为回应。

icmpv4初始化

首先,在/net/ipv4/icmp.c中首先呼叫inet_init()(在boot阶段),方法,此方法激活icmp_init()方法,然后就会呼叫icmp_sk_init()产生ICMP包。

在 net/ipv4/af_inet.c 中有各种协议的注册。

static int __init inet_init(void)
{
	struct inet_protosw *q;
	struct list_head *r;
	int rc = -EINVAL;

	sock_skb_cb_check_size(sizeof(struct inet_skb_parm));

	rc = proto_register(&tcp_prot, 1);
	if (rc)
		goto out;

	rc = proto_register(&udp_prot, 1);
	if (rc)
		goto out_unregister_tcp_proto;

	rc = proto_register(&raw_prot, 1);
	if (rc)
		goto out_unregister_udp_proto;

	/*
	 *	Tell SOCKET that we are alive...
	 */

	(void)sock_register(&inet_family_ops);

#ifdef CONFIG_SYSCTL
	ip_static_sysctl_init();
#endif

	/*
	 *	Add all the base protocols.
	 */

	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
		pr_crit("%s: Cannot add ICMP protocol\n", __func__);
	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
		pr_crit("%s: Cannot add UDP protocol\n", __func__);
	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICAST
	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
		pr_crit("%s: Cannot add IGMP protocol\n", __func__);
#endif

	/* Register the socket-side information for inet_create. */
	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
		INIT_LIST_HEAD(r);

	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
		inet_register_protosw(q);

	/*
	 *	Set the ARP module up
	 */

------->-------

接下来看看 icmp_protocal的结构

static const struct net_protocol icmp_protocol = {
	.handler =	icmp_rcv,
	.err_handler =	icmp_err,
	.no_policy =	1,
	.netns_ok =	1,
};

handler

首先看看handler, 当接受到由外面发来的包,只要在ip头部等于IPPROTO_ICMP(0x1), icmp_rcv就会触发。

no_policy

当设置为1,意味着不需要表现IPsec策略检查(policy check),

netns_ok

设置为1时提醒协议是network命名空间

icmp_sk_init() 位于net/ipv4/icmp.c 中

static int __net_init icmp_sk_init(struct net *net)
{
	int i, err;

	net->ipv4.icmp_sk = alloc_percpu(struct sock *);
	if (!net->ipv4.icmp_sk)
		return -ENOMEM;

	for_each_possible_cpu(i) {
		struct sock *sk;

		err = inet_ctl_sock_create(&sk, PF_INET,
					   SOCK_RAW, IPPROTO_ICMP, net);
		if (err < 0)
			goto fail;

		*per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;

		/* Enough space for 2 64K ICMP packets, including
		 * sk_buff/skb_shared_info struct overhead.
		 */
		sk->sk_sndbuf =	2 * SKB_TRUESIZE(64 * 1024);

		/*
		 * Speedup sock_wfree()
		 */
		sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
		inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT;
	}

	/* Control parameters for ECHO replies. */
	net->ipv4.sysctl_icmp_echo_ignore_all = 0;
	net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;

	/* Control parameter - ignore bogus broadcast responses? */
	net->ipv4.sysctl_icmp_ignore_bogus_error_responses = 1;

	/*
	 * 	Configurable global rate limit.
	 *
	 *	ratelimit defines tokens/packet consumed for dst->rate_token
	 *	bucket ratemask defines which icmp types are ratelimited by
	 *	setting	it's bit position.
	 *
	 *	default:
	 *	dest unreachable (3), source quench (4),
	 *	time exceeded (11), parameter problem (12)
	 */

	net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
	net->ipv4.sysctl_icmp_ratemask = 0x1818;
	net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;

	return 0;

ICMPv4 Header

主要参考下面的这幅图片就可以了:

icmp-v4-header.png

前64 bits是属于头文件的部分,后面的Payload包括原始包IPv4头文件和它的的负载。

头部的定义在include/uapi/linux/icmp.h

struct icmphdr {
  __u8		type;
  __u8		code;
  __sum16	checksum;
  union {
	struct {
		__be16	id;
		__be16	sequence;
	} echo;
	__be32	gateway;
	struct {
		__be16	__unused;
		__be16	mtu;
	} frag;
	__u8	reserved[4];
  } un;
};

通过这张表,是不是可以得出这样一个结论:在网络协议中,它的bits size可以通过它的的类型的大小推断出来。

然后ICMPv4模块定义了icmp_control对象, 命名为 icmp_pointer,这个对象被ICMpv4消息的类型索引。

该定义位于 include/uapi/linux/icmp.c

/*
 *	ICMP control array. This specifies what to do with each ICMP.
 */

struct icmp_control {
	bool (*handler)(struct sk_buff *skb);
	short   error;		/* This ICMP is classed as an error message */
};

static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];

其中, error是1,代表着错误消息(ICMP_DEST_UNREACH), 为0代表着正常消息(ICMP_ECHO)。 而handler有时会被赋值超过一种类型。

ping_rcv()接受一个ping的回应(ICMP_ECHOREPLY)。 位于net/ipv4/icmp.c

bool ping_rcv(struct sk_buff *skb)
{
	struct sock *sk;
	struct net *net = dev_net(skb->dev);
	struct icmphdr *icmph = icmp_hdr(skb);

	/* We assume the packet has already been checked by icmp_rcv */

	pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
		 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));

	/* Push ICMP header back */
	skb_push(skb, skb->data - (u8 *)icmph);

	sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id));
	if (sk) {
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);

		pr_debug("rcv on socket %p\n", sk);
		if (skb2)
			ping_queue_rcv_skb(sk, skb2);
		sock_put(sk);
		return true;
	}
	pr_debug("no socket, dropping\n");

	return false;
}
EXPORT_SYMBOL_GPL(ping_rcv);

在3.0以前的内核版本中,你需要在用户空间程序中创建一个raw socket,当接收到一个(ICMP_ECHOREPLY)消息, 原来的raw socket处理它(理解不准确)。为了详细理解这个过程,先看一个函数。

在 /net/ipv4/in_input.c中。

static int ip_local_deliver_finish(struct net *net,
		struct sock *sk, struct sk_buff *skb)
{
	__skb_pull(skb, skb_network_header_len(skb));

	rcu_read_lock();
	{
		int protocol = ip_hdr(skb)->protocol;
		const struct net_protocol *ipprot;
		int raw;

	resubmit:
		raw = raw_local_deliver(skb, protocol);

		ipprot = rcu_dereference(inet_protos[protocol]);
		if (ipprot) {
			int ret;

			if (!ipprot->no_policy) {
				if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
					kfree_skb(skb);
					goto out;
				}
				nf_reset(skb);
			}
			ret = ipprot->handler(skb);
			if (ret < 0) {
				protocol = -ret;
				goto resubmit;
			}
			__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
		} else {
			if (!raw) {
				if (xfrm4_policy_check(NULL,
						XFRM_POLICY_IN, skb)) {
					__IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
					icmp_send(skb, ICMP_DEST_UNREACH,
						  ICMP_PROT_UNREACH, 0);
				}
				kfree_skb(skb);
			} else {
				__IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
				consume_skb(skb);
			}
		}
	}
 out:
	rcu_read_unlock();

	return 0;
}

上一篇 netlink嵌套字

Comments

Content