Linux netfliter 架構(gòu)

netfliter 簡(jiǎn)介

netfilter是在Linux 2.4.X內(nèi)核引入的一個(gè)子系統(tǒng),它提供了一個(gè)抽象的、通用框架,這個(gè)框架提供了一整套的鉤子函數(shù)的管理機(jī)制。包括鉤子函數(shù)的原型定義,注冊(cè),注銷等。下面將基于Linux 3.14.77 的內(nèi)核代碼簡(jiǎn)要介紹一下netfilter框架的實(shí)現(xiàn)。

netfilter 支持的協(xié)議

既然是通用的框架,那么netfilter就需要在數(shù)據(jù)包流經(jīng)的關(guān)鍵位置放置鉤子。因此,在內(nèi)核中netfilter支持了多種協(xié)議,當(dāng)數(shù)據(jù)包經(jīng)過(guò)這些協(xié)議的處理流程時(shí)就可以執(zhí)行相應(yīng)的鉤子函數(shù),從而完成對(duì)數(shù)據(jù)包的處理。netfilter支持的協(xié)議如下:

enum {
    NFPROTO_UNSPEC =  0,
    NFPROTO_INET   =  1, #
    NFPROTO_IPV4   =  2, # IPV4協(xié)議,對(duì)應(yīng)iptables
    NFPROTO_ARP    =  3, # ARP協(xié)議, 對(duì)應(yīng)arptables
    NFPROTO_BRIDGE =  7, # 網(wǎng)橋協(xié)議, 對(duì)應(yīng)ebtables
    NFPROTO_IPV6   = 10, # IPV6協(xié)議,對(duì)應(yīng)ip6tables
    NFPROTO_DECNET = 12,
    NFPROTO_NUMPROTO,
};

支持的協(xié)議定義在 include/uapi/linux/netfilter.h

netfilter 模塊初始化

int __init netfilter_init(void)
{
    int i, h, ret;

    for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
        for (h = 0; h < NF_MAX_HOOKS; h++)
            INIT_LIST_HEAD(&nf_hooks[i][h]);
    }

    ret = register_pernet_subsys(&netfilter_net_ops);
    if (ret < 0)
        goto err;

    ret = netfilter_log_init();
    if (ret < 0)
        goto err_pernet;

    return 0;
err_pernet:
    unregister_pernet_subsys(&netfilter_net_ops);
err:
    return ret;
}

netfliter_init 函數(shù)是netfliter內(nèi)核模塊的初始化函數(shù),我們從中可以看到,該函數(shù)主要做的工作就是對(duì) extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] 全局二維鏈表數(shù)組進(jìn)行了初始化,nf_hooks二維鏈表是鉤子函數(shù)實(shí)際掛接的地方,這點(diǎn)將在下文的注冊(cè)函數(shù)中看出來(lái)。

netfilter 鉤子函數(shù)的注冊(cè)

struct nf_hook_ops {
    struct list_head list;

    /* User fills in from here down. */
    nf_hookfn   *hook;
    struct module   *owner;
    void        *priv;
    u_int8_t    pf;
    unsigned int    hooknum;
    /* Hooks are ordered in ascending priority. */
    int     priority;
};

int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
    unsigned int i;
    int err = 0;

    for (i = 0; i < n; i++) {
        err = nf_register_hook(&reg[i]);
        if (err)
            goto err;
    }
    return err;

err:
    if (i > 0)
        nf_unregister_hooks(reg, i);
    return err;
}

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    int err;

    err = mutex_lock_interruptible(&nf_hook_mutex);
    if (err < 0)
        return err;
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    list_add_rcu(&reg->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
#if defined(CONFIG_JUMP_LABEL)
    static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
    return 0;
}

在使用netfliter框架注冊(cè)鉤子函數(shù)時(shí)我們需要首先定義一個(gè) struct nf_hook_ops 的實(shí)例,然后調(diào)用 nf_register_hooks 函數(shù)注冊(cè)自定義的鉤子函數(shù)。
接下來(lái)我們重點(diǎn)分析一下自定義的鉤子函數(shù)是怎么注冊(cè)到netfilter框架上的。通過(guò)上述代碼我們可以看到函數(shù)的調(diào)用關(guān)系是 nf_register_hooks —> nf_register_hook,所以最終注冊(cè)的動(dòng)作是在nf_register_hook函數(shù)中完成的。

我們?cè)賮?lái)看下注冊(cè)的關(guān)鍵流程

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;
    .....................

    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
    list_add_rcu(&reg->list, elem->list.prev);
    .....................
    return 0;
}

從關(guān)鍵代碼,我們可以看到 nf_hooks[reg->pf][reg->hooknum] 這段代表的是根據(jù)struct nf_hook_ops的pf 和 hooknum項(xiàng)找到具體協(xié)議掛接點(diǎn)的的鏈表,然后遍歷鏈表,按優(yōu)先級(jí)遞增的順序插入鏈表,完成注冊(cè)。

netfilter 鉤子函數(shù)的注銷

void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
{
    while (n-- > 0)
        nf_unregister_hook(&reg[n]);
}

void nf_unregister_hook(struct nf_hook_ops *reg)
{
    mutex_lock(&nf_hook_mutex);
    list_del_rcu(&reg->list);
    mutex_unlock(&nf_hook_mutex);
#if defined(CONFIG_JUMP_LABEL)
    static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
    synchronize_net();
}

從代碼很容易可以看出注銷一個(gè)鉤子函數(shù),就是把該鉤子函數(shù)從相應(yīng)的鉤子函數(shù)鏈表中刪除就行了。其中nf_unregister_hooks 調(diào)用 nf_unregister_hook 完成實(shí)際的刪除操作。

鉤子的放置 NF_HOOK 函數(shù)

 static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb,
    struct net_device *in, struct net_device *out,
    int (*okfn)(struct sk_buff *))
{
    return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN);
}

通過(guò)一個(gè)實(shí)例我們可以看出該函數(shù)如何使用

/*
 *  Main IP Receive routine.
 */
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
    const struct iphdr *iph;
    u32 len;
.................

    return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
               ip_rcv_finish);
.................

out:
    return NET_RX_DROP;
}

在IPV4的接收函數(shù) ip_rcv 中,我們可以看到 NF_HOOK 函數(shù)的用法。對(duì)比實(shí)例,介紹一下NF_HOOK 函數(shù)的形參意義。

NF_HOOK(
uint8_t pf,                           // 關(guān)聯(lián)的協(xié)議
unsigned int hook,             // 掛接點(diǎn)
struct sk_buff *skb,           // 數(shù)據(jù)包
struct net_device *in,       // 接收接口
struct net_device *out,    // 發(fā)送接口
int (*okfn)(struct sk_buff *) // 如果經(jīng)過(guò)該掛接點(diǎn)所有鉤子函數(shù)后數(shù)據(jù)包被接受,執(zhí)行的回調(diào)函數(shù)
)
//函數(shù)調(diào)用例子
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish);

接下來(lái)是鉤子函數(shù)如何被執(zhí)行的代碼,從函數(shù)調(diào)用流程以及注釋我們很容易理解這些鉤子函數(shù)是怎么執(zhí)行的。

static inline int
NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb,
           struct net_device *in, struct net_device *out,
           int (*okfn)(struct sk_buff *), int thresh)
{
    int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh);
    if (ret == 1)
        ret = okfn(skb);
    return ret;
}

/**
 *  nf_hook_thresh - call a netfilter hook
 *  
 *  Returns 1 if the hook has allowed the packet to pass.  The function
 *  okfn must be invoked by the caller in this case.  Any other return
 *  value indicates the packet has been consumed by the hook.
 */
static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
                 struct sk_buff *skb,
                 struct net_device *indev,
                 struct net_device *outdev,
                 int (*okfn)(struct sk_buff *), int thresh)
{
    if (nf_hooks_active(pf, hook))
        return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
    return 1;
}

static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
{
    return !list_empty(&nf_hooks[pf][hook]);
}

/* Returns 1 if okfn() needs to be executed by the caller,
 * -EPERM for NF_DROP, 0 otherwise. */
int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
         struct net_device *indev,
         struct net_device *outdev,
         int (*okfn)(struct sk_buff *),
         int hook_thresh)
{
    struct nf_hook_ops *elem;
    unsigned int verdict;
    int ret = 0;

    /* We may already have this, but read-locks nest anyway */
    rcu_read_lock();

    elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
next_hook:
    verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
                 outdev, &elem, okfn, hook_thresh);
    if (verdict == NF_ACCEPT || verdict == NF_STOP) {
        ret = 1;
    } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
        kfree_skb(skb);
        ret = NF_DROP_GETERR(verdict);
        if (ret == 0)
            ret = -EPERM;
    } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
        int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
                        verdict >> NF_VERDICT_QBITS);
        if (err < 0) {
            if (err == -ECANCELED)
                goto next_hook;
            if (err == -ESRCH &&
               (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
                goto next_hook;
            kfree_skb(skb);
        }
    }
    rcu_read_unlock();
    return ret;
}

總結(jié)

至此,我們大概了解了內(nèi)核 netfilter 模塊的整個(gè)結(jié)構(gòu)是怎么樣的了。它首先定義一個(gè)全局的二維數(shù)組nf_hooks,用于保存支持的協(xié)議類型以及各個(gè)協(xié)議支持的掛接點(diǎn),然后其他基于netfilter 架構(gòu)的模塊通過(guò)定義 struct nf_hook_ops 實(shí)例并填充里面的各個(gè)選項(xiàng),調(diào)用 nf_register_hook 函數(shù)注冊(cè)該鉤子到相應(yīng)的位置,在數(shù)據(jù)包流經(jīng)各個(gè)掛接點(diǎn)時(shí),NF_HOOK 函數(shù)會(huì)遍歷該掛接點(diǎn)注冊(cè)的所有鉤子函數(shù)完成對(duì)數(shù)據(jù)包的操作, 依賴netfilter 的模塊在卸載時(shí),通過(guò)調(diào)用 nf_unregister_hook 來(lái)注銷掛接的鉤子。

packet flow and netfilter
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容