本文整理下之前的學習筆記,基于DPDK17.11版本源碼分析。主要分析一下igb_uio驅(qū)動源碼。
總線-設(shè)備-驅(qū)動
首先簡單介紹一下kernel中的總線-設(shè)備-驅(qū)動模型,以pci總線為例,pci總線上有兩個表,一個用于保存系統(tǒng)中的pci設(shè)備,一個用于保存pci設(shè)備對應(yīng)的驅(qū)動。每當加載pci設(shè)備驅(qū)動時,就會遍歷pci總線上的pci設(shè)備進行匹配,每當插入pci設(shè)備到系統(tǒng)中時,熱插拔機制就會自動遍歷pci總線上的pci設(shè)備驅(qū)動進行匹配,如果匹配成功則使用此驅(qū)動初始化設(shè)備。
注冊pci總線
可以調(diào)用bus_register注冊總線。比如下面的pci總線,平臺總線和usb總線等。
//注冊pci總線
struct bus_type pci_bus_type = {
.name = "pci",
.match = pci_bus_match,
.uevent = pci_uevent,
.probe = pci_device_probe,
.remove = pci_device_remove,
.shutdown = pci_device_shutdown,
.dev_groups = pci_dev_groups,
.bus_groups = pci_bus_groups,
.drv_groups = pci_drv_groups,
.pm = PCI_PM_OPS_PTR,
};
bus_register(&pci_bus_type);
//注冊平臺總線
struct bus_type platform_bus_type = {
.name = "platform",
.dev_groups = platform_dev_groups,
.match = platform_match,
.uevent = platform_uevent,
.pm = &platform_dev_pm_ops,
};
bus_register(&platform_bus_type);
//注冊usb總線
struct bus_type usb_bus_type = {
.name = "usb",
.match = usb_device_match,
.uevent = usb_uevent,
};
bus_register(&usb_bus_type);
//注冊virtio總線
static struct bus_type virtio_bus = {
.name = "virtio",
.match = virtio_dev_match,
.dev_groups = virtio_dev_groups,
.uevent = virtio_uevent,
.probe = virtio_dev_probe,
.remove = virtio_dev_remove,
};
bus_register(&virtio_bus)
注冊總線后,會在 /sys/bus 下生成總線目錄,比如 pci 總線會生成目錄 /sys/bus/pci
/**
* bus_register - register a driver-core subsystem
* @bus: bus to register
*
* Once we have that, we register the bus with the kobject
* infrastructure, then register the children subsystems it has:
* the devices and drivers that belong to the subsystem.
*/
int bus_register(struct bus_type *bus)
struct subsys_private *priv;
struct lock_class_key *key = &bus->lock_key;
priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
priv->bus = bus;
bus->p = priv;
kobject_set_name(&priv->subsys.kobj, "%s", bus->name);
priv->subsys.kobj.kset = bus_kset;
priv->subsys.kobj.ktype = &bus_ktype;
kset_register(&priv->subsys);
//此值為1加載驅(qū)動時會自動探測設(shè)備進行匹配
priv->drivers_autoprobe = 1;
bus_create_file(bus, &bus_attr_uevent);
//在總線目錄下,生成 devices 子目錄,下面再包含具體pci設(shè)備子目錄
priv->devices_kset = kset_create_and_add("devices", NULL,
&priv->subsys.kobj);
//在總線目錄下,生成 drivers 子目錄,下面再包含具體驅(qū)動子目錄
priv->drivers_kset = kset_create_and_add("drivers", NULL,
&priv->subsys.kobj);
//此鏈表用于保存加載的pci設(shè)備驅(qū)動
klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
//此鏈表用于保存掃描到的pci設(shè)備
klist_init(&priv->klist_drivers, NULL, NULL);
//在sys文件系統(tǒng)創(chuàng)建 drivers_probe 和 drivers_autoprobe 文件
add_probe_files(bus);
bus_create_file(bus, &bus_attr_drivers_probe);
bus_create_file(bus, &bus_attr_drivers_autoprobe);
bus_add_groups(bus, bus->bus_groups);
注冊總線后,會生成文件/sys/bus/pci/drivers_autoprobe,寫此文件時在kernel中會調(diào)用如下函數(shù),如果為1,表示 bus 支持自動探測 device,則加載驅(qū)動時,自動遍歷所有pci設(shè)備進行匹配
store_drivers_autoprobe
static ssize_t store_drivers_autoprobe(struct bus_type *bus,
const char *buf, size_t count)
{
if (buf[0] == '0')
bus->p->drivers_autoprobe = 0;
else
bus->p->drivers_autoprobe = 1;
return count;
}
注冊驅(qū)動到pci總線
結(jié)構(gòu)體struct pci_driver表示一個pci設(shè)備驅(qū)動,其中id_table和dynids用來保存此驅(qū)動支持的設(shè)備id等信息,如果有匹配的設(shè)備,則調(diào)用probe函數(shù)。
struct pci_driver {
struct list_head node;
const char *name;
//靜態(tài)table,用來保存驅(qū)動支持的id
const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */
int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
int (*suspend_late) (struct pci_dev *dev, pm_message_t state);
int (*resume_early) (struct pci_dev *dev);
int (*resume) (struct pci_dev *dev); /* Device woken up */
void (*shutdown) (struct pci_dev *dev);
int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
const struct pci_error_handlers *err_handler;
struct device_driver driver;
//動態(tài)table,通過寫文件 new_id 動態(tài)添加id
struct pci_dynids dynids;
};
調(diào)用函數(shù)pci_register_driver注冊pci設(shè)備驅(qū)動。
static struct pci_driver igbuio_pci_driver = {
.name = "igb_uio",
.id_table = NULL, //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動的id_table默認為空
.probe = igbuio_pci_probe,
.remove = igbuio_pci_remove,
};
pci_register_driver(&igbuio_pci_driver);
static const struct pci_device_id igb_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
...
}
static struct pci_driver igb_driver = {
.name = igb_driver_name,
.id_table = igb_pci_tbl, //正常的kernel驅(qū)動都有一個靜態(tài)的id_table
.probe = igb_probe,
.remove = igb_remove,
#ifdef CONFIG_PM
.driver.pm = &igb_pm_ops,
#endif
.shutdown = igb_shutdown,
.sriov_configure = igb_pci_sriov_configure,
.err_handler = &igb_err_handler
};
pci_register_driver(&igb_driver);
注冊驅(qū)動后,會在/sys/bus/pci/drivers目錄下創(chuàng)建以驅(qū)動名字命名的目錄,并在此目錄下創(chuàng)建new_id, bind和unbind等sys文件,可以通過這些文件動態(tài)修改驅(qū)動信息。
/*
* pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
*/
#define pci_register_driver(driver) \
__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
int __pci_register_driver(struct pci_driver *drv, struct module *owner,
const char *mod_name)
{
/* initialize common driver fields */
drv->driver.name = drv->name;
//bus固定為 pci_bus_type
drv->driver.bus = &pci_bus_type;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
spin_lock_init(&drv->dynids.lock);
INIT_LIST_HEAD(&drv->dynids.list);
/* register with core */
driver_register(&drv->driver);
bus_add_driver(drv);
struct bus_type *bus;
struct driver_private *priv;
bus = bus_get(drv->bus);
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
klist_init(&priv->klist_devices, NULL, NULL);
priv->driver = drv;
drv->p = priv;
priv->kobj.kset = bus->p->drivers_kset;
kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name);
//將驅(qū)動添加到pci總線
klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
//如果pci總線支持自動探測設(shè)備,則在加載驅(qū)動時就遍歷所有pci設(shè)備進行匹配
if (drv->bus->p->drivers_autoprobe) {
driver_attach(drv);
//遍歷所有的pci設(shè)備,和drv進行匹配
bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
//設(shè)備和驅(qū)動進行匹配
driver_match_device(drv, dev)
//如果匹配成功,并且設(shè)備還沒有加載其他驅(qū)動,則使用當前驅(qū)動drv
if (!dev->driver)
driver_probe_device(drv, dev);
}
module_add_driver(drv->owner, drv);
driver_create_file(drv, &driver_attr_uevent);
//bus->drv_groups 為 pci_drv_groups,
//在sys文件系統(tǒng)創(chuàng)建 new_id 和 remove_id 文件
driver_add_groups(drv, bus->drv_groups);
//在sys文件系統(tǒng)創(chuàng)建 bind 和 unbind 文件,用來將驅(qū)動綁定和解綁定設(shè)備
if (!drv->suppress_bind_attrs) {
add_bind_files(drv);
driver_create_file(drv, &driver_attr_unbind);
driver_create_file(drv, &driver_attr_bind);
}
}
向new_id寫入"0x0806 0x1521"信息(0x0806表示vendor id,0x1521為device id)時,會調(diào)用kernel中的store_new_id,解析相關(guān)字段后,保存到動態(tài)鏈表dynids,然后遍歷當前所有的pci設(shè)備進行匹配。
//定義struct driver_attribute driver_attr_new_id
static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
//定義 //struct driver_attribute driver_attr_remove_id
static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
//定義 struct attribute_group pci_drv_groups
static struct attribute *pci_drv_attrs[] = {
&driver_attr_new_id.attr,
&driver_attr_remove_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(pci_drv);
static ssize_t store_new_id(struct device_driver *driver, const char *buf,size_t count)
fields = sscanf(buf, "%x %x %x %x %x %x %lx",
&vendor, &device, &subvendor, &subdevice,
&class, &class_mask, &driver_data);
if (fields < 2)
return -EINVAL;
pci_add_dynid(pdrv, vendor, device, subvendor, subdevice, class, class_mask, driver_data);
struct pci_dynid *dynid;
dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
dynid->id.vendor = vendor;
dynid->id.device = device;
dynid->id.subvendor = subvendor;
dynid->id.subdevice = subdevice;
dynid->id.class = class;
dynid->id.class_mask = class_mask;
dynid->id.driver_data = driver_data;
spin_lock(&drv->dynids.lock);
list_add_tail(&dynid->node, &drv->dynids.list);
spin_unlock(&drv->dynids.lock);
//設(shè)置new id時,也會自動匹配設(shè)備
return driver_attach(&drv->driver);
向bind文件寫入網(wǎng)卡的pci地址時,會調(diào)用kernel中的bind_store,將此網(wǎng)卡綁定到此驅(qū)動。
向unbind文件寫入網(wǎng)卡的pci地址時,會調(diào)用kernel中的unbind_store,將此網(wǎng)卡和此驅(qū)動解綁。
//定義 struct driver_attribute driver_attr_bind,寫文件時,調(diào)用 bind_store
static DRIVER_ATTR_WO(bind);
//定義 struct driver_attribute driver_attr_unbind,寫文件時,調(diào)用 unbind_store
static DRIVER_ATTR_WO(unbind);
/*
* Manually attach a device to a driver.
* Note: the driver must want to bind to the device,
* it is not possible to override the driver's id table.
*/
static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
if (dev->parent) /* Needed for USB */
device_lock(dev->parent);
device_lock(dev);
err = driver_probe_device(drv, dev);
device_unlock(dev);
if (dev->parent)
device_unlock(dev->parent);
if (err > 0) {
/* success */
err = count;
} else if (err == 0) {
/* driver didn't accept device */
err = -ENODEV;
}
}
/* Manually detach a device from its associated driver. */
static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
{
struct bus_type *bus = bus_get(drv->bus);
struct device *dev;
int err = -ENODEV;
dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == drv) {
if (dev->parent) /* Needed for USB */
device_lock(dev->parent);
device_release_driver(dev);
if (dev->parent)
device_unlock(dev->parent);
err = count;
}
put_device(dev);
bus_put(bus);
return err;
}
發(fā)現(xiàn)pci設(shè)備
系統(tǒng)啟動時會掃描所有的pci設(shè)備,以他們的pci地址為名字創(chuàng)建目錄,并在此目錄下創(chuàng)建相關(guān)的sys文件。并且會遍歷所有的pci設(shè)備驅(qū)動進行匹配。
pci_scan_root_bus
pci_scan_child_bus(b);
pci_scan_slot
pci_scan_single_device
pci_scan_device
pci_device_add
device_add(&dev->dev);
bus_add_device(dev);
//bus->dev_groups為pci_dev_groups,
//會在 /sys/bus/pci/devices/'pci address'/ 目錄下創(chuàng)建 vendor, device等目錄
device_add_groups(dev, bus->dev_groups);
//將設(shè)備添加到pci總線鏈表
klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices);
pci_bus_add_devices
pci_bus_add_device
pci_create_sysfs_dev_files(dev);
//如果pci配置空間大于 PCI_CFG_SPACE_SIZE(256字節(jié)),則創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/config文件,
//大小為 4096 字節(jié)
if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
else //否則config文件大小為 256 字節(jié)
retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
//創(chuàng)建 resource 文件,用戶態(tài)可以使用mmap映射 resource0 實現(xiàn)對網(wǎng)卡寄存器的操作
pci_create_resource_files(pdev);
//創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/resource0 等文件
/* Expose the PCI resources from this device as files */
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
/* skip empty resources */
if (!pci_resource_len(pdev, i))
continue;
retval = pci_create_attr(pdev, i, 0);
struct bin_attribute *res_attr;
res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC);
sysfs_bin_attr_init(res_attr);
if (write_combine) {
pdev->res_attr_wc[num] = res_attr;
sprintf(res_attr_name, "resource%d_wc", num);
res_attr->mmap = pci_mmap_resource_wc;
} else {
pdev->res_attr[num] = res_attr;
sprintf(res_attr_name, "resource%d", num);
res_attr->mmap = pci_mmap_resource_uc;
}
if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
res_attr->read = pci_read_resource_io;
res_attr->write = pci_write_resource_io;
}
res_attr->attr.name = res_attr_name;
res_attr->attr.mode = S_IRUSR | S_IWUSR;
res_attr->size = pci_resource_len(pdev, num);
res_attr->private = &pdev->resource[num];
//創(chuàng)建 kernel 文件
sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
/* for prefetchable resources, create a WC mappable file */
if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH)
retval = pci_create_attr(pdev, i, 1);
}
//嘗試匹配驅(qū)動
device_attach(&dev->dev);
//遍歷所有driver,查看是否有匹配此設(shè)備的driver
bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
//判斷驅(qū)動和設(shè)備是否匹配
driver_match_device
//pci_bus_match
drv->bus->match
pci_match_device(pci_drv, pci_dev);
//如果有匹配的,則調(diào)用驅(qū)動的probe函數(shù)
driver_probe_device
really_probe(dev, drv);
//pci_device_probe
dev->bus->probe
__pci_device_probe
pci_call_probe
local_pci_probe
pci_drv->probe(pci_dev, ddi->id);
向設(shè)備的driver_override文件寫入驅(qū)動名字,表示此設(shè)備只能綁定到此驅(qū)動。
static ssize_t driver_override_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
struct pci_dev *pdev = to_pci_dev(dev);
driver_override = kstrndup(buf, count, GFP_KERNEL);
pdev->driver_override = driver_override;
如何匹配?
前面多次提到設(shè)備和驅(qū)動進行匹配,究竟如何匹配呢?
先看一下用來表示一個pci設(shè)備的結(jié)構(gòu)體pci_dev,其中如下幾個成員變量表示此pci設(shè)備的類型,一般vendor和device就足夠,vendor表示此設(shè)備是哪個廠商的,device表示此設(shè)備的類型。
struct pci_dev {
...
unsigned short vendor;
unsigned short device;
unsigned short subsystem_vendor;
unsigned short subsystem_device;
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
...
}
再看一下用來表示設(shè)備驅(qū)動的pci_driver,其中id_table和dynids用來保存此驅(qū)動支持的設(shè)備類型,前者是靜態(tài)值,后者可以通過驅(qū)動目錄下的new_id動態(tài)添加。設(shè)備類型使用pci_device_id結(jié)構(gòu)體來表示,其成員變量也是vendor,device等信息,和pci_dev中的信息是一樣的,所以可以使用這幾個字段進行匹配。
struct pci_device_id {
__u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/
__u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
__u32 class, class_mask; /* (class,subclass,prog-if) triplet */
kernel_ulong_t driver_data; /* Data private to the driver */
};
struct pci_driver {
struct pci_device_id *id_table
struct pci_dynids dynids;
...
}
最終使用函數(shù)pci_match_device進行驅(qū)動和設(shè)備的匹配。
static const struct pci_device_id pci_device_id_any = {
.vendor = PCI_ANY_ID,
.device = PCI_ANY_ID,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
};
static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev)
//如果設(shè)備設(shè)置了 driver_override,則只能綁定到driver_override指定的驅(qū)動上。
//如果不是此驅(qū)動直接返回NULL
/* When driver_override is set, only bind to the matching driver */
if (dev->driver_override && strcmp(dev->driver_override, drv->name))
return NULL;
//首先查找驅(qū)動的動態(tài)鏈表和設(shè)備進行匹配
/* Look at the dynamic ids first, before the static ones */
spin_lock(&drv->dynids.lock);
list_for_each_entry(dynid, &drv->dynids.list, node) {
if (pci_match_one_device(&dynid->id, dev)) {
found_id = &dynid->id;
break;
}
}
spin_unlock(&drv->dynids.lock);
//如果沒匹配到,則查找驅(qū)動的靜態(tài)table
if (!found_id)
found_id = pci_match_id(drv->id_table, dev);
while (ids->vendor || ids->subvendor || ids->class_mask) {
if (pci_match_one_device(ids, dev))
return ids;
ids++;
}
//如果仍然沒匹配到,但是指定了驅(qū)動,則強制認為匹配成功,返回 pci_device_id_any
/* driver_override will always match, send a dummy id */
if (!found_id && dev->driver_override)
found_id = &pci_device_id_any;
return found_id;
//具體的匹配規(guī)則
static inline const struct pci_device_id *
pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
(id->device == PCI_ANY_ID || id->device == dev->device) &&
(id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
(id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
!((id->class ^ dev->class) & id->class_mask))
return id;
return NULL;
}
綁定到 igb_uio 驅(qū)動
網(wǎng)卡如何綁定到igb_uio驅(qū)動呢?這里拿DPDK提供的腳步文件dpdk-devbind.py中的函數(shù)bind_one進行分析。
def bind_one(dev_id, driver, force):
'''Bind the device given by "dev_id" to the driver "driver". If the device
is already bound to a different driver, it will be unbound first'''
dev = devices[dev_id]
saved_driver = None # used to rollback any unbind in case of failure
//如果網(wǎng)卡已經(jīng)綁定到某個驅(qū)動,則判斷是否是要綁定的驅(qū)動,如果是則返回,
//如果不是,則解綁之前的驅(qū)動。unbind_one只要向驅(qū)動的unbind寫入此網(wǎng)卡的pci地址即可解綁。
# unbind any existing drivers we don't want
if has_driver(dev_id):
if dev["Driver_str"] == driver:
print("%s already bound to driver %s, skipping\n"
% (dev_id, driver))
return
else:
saved_driver = dev["Driver_str"]
unbind_one(dev_id, force)
dev["Driver_str"] = "" # clear driver string
//綁定方法根據(jù)kernel版本有不同的綁定方法。
//對于kernel版本大于等于3.15的,首先將驅(qū)動名字寫入到網(wǎng)卡的文件 driver_override來指定此驅(qū)動。
//而小于3.15的,需要將網(wǎng)卡的vendor和device id寫入驅(qū)動的new_id文件。
//為什么大于等于3.15的不使用new_id呢?這是因為高版本的new_id不只是將設(shè)備類型添加到驅(qū)動的
//動態(tài)鏈表,也會遍歷所有的設(shè)備將此類型的設(shè)備全部綁定到此驅(qū)動。如果你只想綁定一個網(wǎng)卡,
//結(jié)果把同類型的網(wǎng)卡都綁定了,豈不是很尷尬。
# For kernels >= 3.15 driver_override can be used to specify the driver
# for a device rather than relying on the driver to provide a positive
# match of the device. The existing process of looking up
# the vendor and device ID, adding them to the driver new_id,
# will erroneously bind other devices too which has the additional burden
# of unbinding those devices
if driver in dpdk_drivers:
filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
if os.path.exists(filename):
try:
f = open(filename, "w")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
return
try:
f.write("%s" % driver)
f.close()
except:
print("Error: bind failed for %s - Cannot write driver %s to "
"PCI ID " % (dev_id, driver))
return
# For kernels < 3.15 use new_id to add PCI id's to the driver
else:
filename = "/sys/bus/pci/drivers/%s/new_id" % driver
try:
f = open(filename, "w")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
return
try:
# Convert Device and Vendor Id to int to write to new_id
f.write("%04x %04x" % (int(dev["Vendor"],16),
int(dev["Device"], 16)))
f.close()
except:
print("Error: bind failed for %s - Cannot write new PCI ID to "
"driver %s" % (dev_id, driver))
return
//第二步是將網(wǎng)卡的pci地址寫入驅(qū)動的文件 /sys/bus/pci/drivers/%s/bind,這樣就能將
//網(wǎng)卡和驅(qū)動綁定到一起。
# do the bind by writing to /sys
filename = "/sys/bus/pci/drivers/%s/bind" % driver
try:
f = open(filename, "a")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
if saved_driver is not None: # restore any previous driver
bind_one(dev_id, saved_driver, force)
return
try:
f.write(dev_id)
f.close()
except:
# for some reason, closing dev_id after adding a new PCI ID to new_id
# results in IOError. however, if the device was successfully bound,
# we don't care for any errors and can safely ignore IOError
tmp = get_pci_device_details(dev_id, True)
if "Driver_str" in tmp and tmp["Driver_str"] == driver:
return
print("Error: bind failed for %s - Cannot bind to driver %s"
% (dev_id, driver))
if saved_driver is not None: # restore any previous driver
bind_one(dev_id, saved_driver, force)
return
//對于kernel版本大于等于3.15的,還要將文件 driver_override 清空,以便綁定到其他驅(qū)動。
# For kernels > 3.15 driver_override is used to bind a device to a driver.
# Before unbinding it, overwrite driver_override with empty string so that
# the device can be bound to any other driver
filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
if os.path.exists(filename):
try:
f = open(filename, "w")
except:
print("Error: unbind failed for %s - Cannot open %s"
% (dev_id, filename))
sys.exit(1)
try:
f.write("\00")
f.close()
except:
print("Error: unbind failed for %s - Cannot open %s"
% (dev_id, filename))
sys.exit(1)
igb_uio驅(qū)動的id_table為空,則在加載此驅(qū)動時,是不會匹配到任何設(shè)備的。
static struct pci_driver igbuio_pci_driver = {
.name = "igb_uio",
.id_table = NULL, //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動的id_table默認為空
.probe = igbuio_pci_probe,
.remove = igbuio_pci_remove,
};
經(jīng)過上面的分析,有三種方法可以將網(wǎng)卡綁定到驅(qū)動igb_uio
a. 如果kernel版本大于等于3.15,先向網(wǎng)卡的文件 /sys/bus/pci/devices/'pci address'/driver_override 寫入驅(qū)動名字igb_uio,再向驅(qū)動igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。
b. 如果kernel版本大于等于3.15,向驅(qū)動igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id,則會自動將所有此類型并且沒有綁定到任何驅(qū)動的網(wǎng)卡綁定到igb_uio。
c. 如果kernel版本小于3.15,先向驅(qū)動igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id,再向驅(qū)動igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。注意低版本的kernel,在向new_id寫入值時,只會將設(shè)備類型添加到此驅(qū)動的動態(tài)鏈表,而不會自動探測設(shè)備。
igb_uio probe
經(jīng)過前面的分析網(wǎng)卡綁定到了igb_uio驅(qū)動后,會調(diào)用驅(qū)動的probe函數(shù)igbuio_pci_probe,主要做了如下幾個事情:
a. 調(diào)用pci_enable_device使能pci設(shè)備
b. 設(shè)置DMA mask
c. 填充struct uio_info信息,注冊uio設(shè)備
d. 注冊中斷處理函數(shù)
static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
struct rte_uio_pci_dev *udev;
dma_addr_t map_dma_addr;
void *map_addr;
udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
//使能pci設(shè)備
/*
* enable device: ask low-level code to enable I/O and
* memory
*/
pci_enable_device(dev);
/* enable bus mastering on the device */
pci_set_master(dev);
//將設(shè)備的memory類型BAR信息保存到 struct uio_info->mem中,
//將設(shè)備的io類型BAR信息保存到 struct uio_info->port中
/* remap IO memory */
igbuio_setup_bars(dev, &udev->info);
/* set 64-bit DMA mask */
pci_set_dma_mask(dev, DMA_BIT_MASK(64));
pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
//填充 struct uio_info 其他字段
/* fill uio infos */
udev->info.name = "igb_uio";
udev->info.version = "0.1";
udev->info.irqcontrol = igbuio_pci_irqcontrol;
udev->info.open = igbuio_pci_open;
udev->info.release = igbuio_pci_release;
udev->info.priv = udev;
udev->pdev = dev;
//創(chuàng)建 /sys/bus/pci/devices/'pci address'/max_vf 文件,
//寫此文件用來生成 VF,這說明即使網(wǎng)卡綁定到igb_uio口,仍然可以
//生成 VF。
sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
//注冊uio,會生成 /dev/uiox 字符設(shè)備文件,
//同時生成目錄 /sys/bus/pci/devices/'pci address'/uio/uiox
/* register uio driver */
uio_register_device(&dev->dev, &udev->info);
//保存 struct rte_uio_pci_dev 到 dev->driver_data
pci_set_drvdata(dev, udev);
dev_set_drvdata(&pdev->dev, data);
dev->driver_data = data;
宏uio_register_device用來注冊uio設(shè)備。
/* use a define to avoid include chaining to get THIS_MODULE */
#define uio_register_device(parent, info) \
__uio_register_device(THIS_MODULE, parent, info)
int __uio_register_device(struct module *owner,
struct device *parent,
struct uio_info *info)
//根據(jù) uio_info 生成 uio_device
struct uio_device *idev;
idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL);
idev->owner = owner;
idev->info = info;
init_waitqueue_head(&idev->wait);
atomic_set(&idev->event, 0);
//分配最小未使用的id,保存到 idev->minor
uio_get_minor(idev);
//創(chuàng)建字符設(shè)備 /dev/uiox
idev->dev = device_create(&uio_class, parent, MKDEV(uio_major, idev->minor), idev, "uio%d", idev->minor);
//在 /sys/class/uio/uiox/下創(chuàng)建maps目錄,maps目錄下根據(jù) struct uio_info->mem和port信息
//分別生成 mapx 和 portx 等目錄,這些目錄下又存放對應(yīng)類型的信息,比如起始地址,name,offset和size。
//用戶態(tài)可以通過mmap mapx下的文件來操作網(wǎng)卡寄存器。
//但是DPDK沒有使用此方法,而是直接mmap /sys/bus/pci/devices/'pci address'/resource0 文件實現(xiàn)。
uio_dev_add_attributes(idev);
info->uio_dev = idev;
//注冊中斷。但是在新版本的DPDK中,注冊uio時沒有分配info->irq來注冊中斷,
//而是在用戶態(tài) open /dev/uiox 時,在函數(shù) igbuio_pci_open 中注冊中斷。
if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
devm_request_irq(idev->dev, info->irq, uio_interrupt, info->irq_flags, info->name, idev);
}
簡單總結(jié)一下,igb_uio是DPDK使用網(wǎng)卡的一個通用驅(qū)動,不只intel網(wǎng)卡可以用,其他廠商的網(wǎng)卡也可以用(有一個例外,mellanox的網(wǎng)卡不用綁定到igb_uio就能被使用DPDK),因為它只使能了pci設(shè)備,注冊uio,和注冊中斷處理函數(shù),這些工作是不區(qū)分網(wǎng)卡類型的。
加載igb_uio時,不會自動探測pci設(shè)備,而是需要寫sys文件將設(shè)備綁定到igb_uio。
igb_uio依賴uio驅(qū)動,注冊uio設(shè)備后,會生成/dev/uiox,和網(wǎng)卡一一對應(yīng),用戶態(tài)可以poll /dev/uiox監(jiān)聽中斷是否到來。
同時uio設(shè)備還會將網(wǎng)卡的BAR地址通過sys文件系統(tǒng)暴露出去,用戶態(tài)可以mmap sys文件后操作網(wǎng)卡寄存器。但是DPDK沒有采用這種方式,而是直接mmap網(wǎng)卡自身暴露出去的sys文件 /sys/bus/pci/devices/'pci address'/resource0。
參考
https://www.cnblogs.com/jungle1996/p/12398915.html
https://www.cnblogs.com/jungle1996/p/12452636.html