netfliter 簡(jiǎn)介
netfilter是在Linux 2.4.X內(nèi)核引入的一個(gè)子系統(tǒng),它提供了一個(gè)抽象的、通用框架,這個(gè)框架提供了一整套的鉤子函數(shù)的管理機(jī)制。包括鉤子函數(shù)的原型定義,注冊(cè),注銷等。下面將基于Linux 3.14.77 的內(nèi)核代碼簡(jiǎn)要介紹一下netfilter框架的實(shí)現(xiàn)。
netfilter 支持的協(xié)議
既然是通用的框架,那么netfilter就需要在數(shù)據(jù)包流經(jīng)的關(guān)鍵位置放置鉤子。因此,在內(nèi)核中netfilter支持了多種協(xié)議,當(dāng)數(shù)據(jù)包經(jīng)過(guò)這些協(xié)議的處理流程時(shí)就可以執(zhí)行相應(yīng)的鉤子函數(shù),從而完成對(duì)數(shù)據(jù)包的處理。netfilter支持的協(xié)議如下:
enum {
NFPROTO_UNSPEC = 0,
NFPROTO_INET = 1, #
NFPROTO_IPV4 = 2, # IPV4協(xié)議,對(duì)應(yīng)iptables
NFPROTO_ARP = 3, # ARP協(xié)議, 對(duì)應(yīng)arptables
NFPROTO_BRIDGE = 7, # 網(wǎng)橋協(xié)議, 對(duì)應(yīng)ebtables
NFPROTO_IPV6 = 10, # IPV6協(xié)議,對(duì)應(yīng)ip6tables
NFPROTO_DECNET = 12,
NFPROTO_NUMPROTO,
};
支持的協(xié)議定義在 include/uapi/linux/netfilter.h
netfilter 模塊初始化
int __init netfilter_init(void)
{
int i, h, ret;
for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
ret = register_pernet_subsys(&netfilter_net_ops);
if (ret < 0)
goto err;
ret = netfilter_log_init();
if (ret < 0)
goto err_pernet;
return 0;
err_pernet:
unregister_pernet_subsys(&netfilter_net_ops);
err:
return ret;
}
netfliter_init 函數(shù)是netfliter內(nèi)核模塊的初始化函數(shù),我們從中可以看到,該函數(shù)主要做的工作就是對(duì) extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] 全局二維鏈表數(shù)組進(jìn)行了初始化,nf_hooks二維鏈表是鉤子函數(shù)實(shí)際掛接的地方,這點(diǎn)將在下文的注冊(cè)函數(shù)中看出來(lái)。
netfilter 鉤子函數(shù)的注冊(cè)
struct nf_hook_ops {
struct list_head list;
/* User fills in from here down. */
nf_hookfn *hook;
struct module *owner;
void *priv;
u_int8_t pf;
unsigned int hooknum;
/* Hooks are ordered in ascending priority. */
int priority;
};
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_hook(®[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_hooks(reg, i);
return err;
}
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
int err;
err = mutex_lock_interruptible(&nf_hook_mutex);
if (err < 0)
return err;
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)
break;
}
list_add_rcu(®->list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
#if defined(CONFIG_JUMP_LABEL)
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
}
在使用netfliter框架注冊(cè)鉤子函數(shù)時(shí)我們需要首先定義一個(gè) struct nf_hook_ops 的實(shí)例,然后調(diào)用 nf_register_hooks 函數(shù)注冊(cè)自定義的鉤子函數(shù)。
接下來(lái)我們重點(diǎn)分析一下自定義的鉤子函數(shù)是怎么注冊(cè)到netfilter框架上的。通過(guò)上述代碼我們可以看到函數(shù)的調(diào)用關(guān)系是 nf_register_hooks —> nf_register_hook,所以最終注冊(cè)的動(dòng)作是在nf_register_hook函數(shù)中完成的。
我們?cè)賮?lái)看下注冊(cè)的關(guān)鍵流程
int nf_register_hook(struct nf_hook_ops *reg)
{
struct nf_hook_ops *elem;
.....................
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
if (reg->priority < elem->priority)
break;
}
list_add_rcu(®->list, elem->list.prev);
.....................
return 0;
}
從關(guān)鍵代碼,我們可以看到 nf_hooks[reg->pf][reg->hooknum] 這段代表的是根據(jù)struct nf_hook_ops的pf 和 hooknum項(xiàng)找到具體協(xié)議掛接點(diǎn)的的鏈表,然后遍歷鏈表,按優(yōu)先級(jí)遞增的順序插入鏈表,完成注冊(cè)。
netfilter 鉤子函數(shù)的注銷
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
{
while (n-- > 0)
nf_unregister_hook(®[n]);
}
void nf_unregister_hook(struct nf_hook_ops *reg)
{
mutex_lock(&nf_hook_mutex);
list_del_rcu(®->list);
mutex_unlock(&nf_hook_mutex);
#if defined(CONFIG_JUMP_LABEL)
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
}
從代碼很容易可以看出注銷一個(gè)鉤子函數(shù),就是把該鉤子函數(shù)從相應(yīng)的鉤子函數(shù)鏈表中刪除就行了。其中nf_unregister_hooks 調(diào)用 nf_unregister_hook 完成實(shí)際的刪除操作。
鉤子的放置 NF_HOOK 函數(shù)
static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sk_buff *))
{
return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
}
通過(guò)一個(gè)實(shí)例我們可以看出該函數(shù)如何使用
/*
* Main IP Receive routine.
*/
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
const struct iphdr *iph;
u32 len;
.................
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
ip_rcv_finish);
.................
out:
return NET_RX_DROP;
}
在IPV4的接收函數(shù) ip_rcv 中,我們可以看到 NF_HOOK 函數(shù)的用法。對(duì)比實(shí)例,介紹一下NF_HOOK 函數(shù)的形參意義。
NF_HOOK(
uint8_t pf, // 關(guān)聯(lián)的協(xié)議
unsigned int hook, // 掛接點(diǎn)
struct sk_buff *skb, // 數(shù)據(jù)包
struct net_device *in, // 接收接口
struct net_device *out, // 發(fā)送接口
int (*okfn)(struct sk_buff *) // 如果經(jīng)過(guò)該掛接點(diǎn)所有鉤子函數(shù)后數(shù)據(jù)包被接受,執(zhí)行的回調(diào)函數(shù)
)
//函數(shù)調(diào)用例子
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);
接下來(lái)是鉤子函數(shù)如何被執(zhí)行的代碼,從函數(shù)調(diào)用流程以及注釋我們很容易理解這些鉤子函數(shù)是怎么執(zhí)行的。
static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
struct net_device *in, struct net_device *out,
int (*okfn)(struct sk_buff *), int thresh)
{
int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
if (ret == 1)
ret = okfn(skb);
return ret;
}
/**
* nf_hook_thresh - call a netfilter hook
*
* Returns 1 if the hook has allowed the packet to pass. The function
* okfn must be invoked by the caller in this case. Any other return
* value indicates the packet has been consumed by the hook.
*/
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *), int thresh)
{
if (nf_hooks_active(pf, hook))
return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
return 1;
}
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
{
return !list_empty(&nf_hooks[pf][hook]);
}
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
struct nf_hook_ops *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
kfree_skb(skb);
}
}
rcu_read_unlock();
return ret;
}
總結(jié)
至此,我們大概了解了內(nèi)核 netfilter 模塊的整個(gè)結(jié)構(gòu)是怎么樣的了。它首先定義一個(gè)全局的二維數(shù)組nf_hooks,用于保存支持的協(xié)議類型以及各個(gè)協(xié)議支持的掛接點(diǎn),然后其他基于netfilter 架構(gòu)的模塊通過(guò)定義 struct nf_hook_ops 實(shí)例并填充里面的各個(gè)選項(xiàng),調(diào)用 nf_register_hook 函數(shù)注冊(cè)該鉤子到相應(yīng)的位置,在數(shù)據(jù)包流經(jīng)各個(gè)掛接點(diǎn)時(shí),NF_HOOK 函數(shù)會(huì)遍歷該掛接點(diǎn)注冊(cè)的所有鉤子函數(shù)完成對(duì)數(shù)據(jù)包的操作, 依賴netfilter 的模塊在卸載時(shí),通過(guò)調(diào)用 nf_unregister_hook 來(lái)注銷掛接的鉤子。
