Android存儲系統(tǒng)源碼走讀(一):StorageManagerService
前言
上文走讀了StorageManagerService啟動部分的主要邏輯,本篇將從對vold部分代碼進(jìn)行分析。
本文涉及代碼:
- system/vold/Android.bp
- system/vold/vold.rc
- system/vold/main.cpp
- system/vold/VoldNativeService.cpp
- system/vold/NetlinkManager.cpp
- system/vold/NetlinkHandler.cpp
- system/vold/VolumeManager.cpp
- system/vold/fs/Ext4.cpp
- system/vold/model/PublicVolume.cpp
存儲服務(wù)的代碼框架
在此之前,我們先看下目前版本(android11)存儲服務(wù)的代碼框架。
對比AndroidO之前版本主要優(yōu)化了StorageManagerService與vold通信的方式,移除了CommandListener改為了binder通信。舊版本的代碼框架可以參考此文:http://gityuan.com/2016/07/23/android-io-arch/
Android沒有使用Linux平臺下的udev來處理磁盤,于是Google寫了一個類似udev功能的vold,充當(dāng)了kernel與framework之間的橋梁;
vold的啟動
Android.bp將vold.rc打包進(jìn)init.rc
init_rc: [
"vold.rc",
"wait_for_keymaster.rc",
],
vold.rc啟動/system/bin/vold
service vold /system/bin/vold \
--blkid_context=u:r:blkid:s0 --blkid_untrusted_context=u:r:blkid_untrusted:s0 \
--fsck_context=u:r:fsck:s0 --fsck_untrusted_context=u:r:fsck_untrusted:s0
class core
ioprio be 2
writepid /dev/cpuset/foreground/tasks
shutdown critical
group root reserved_disk
Vold的主程序在/system/vold目錄中,vold(main.cpp)將會啟動VoldNativeService以及創(chuàng)建VoldManager和NetlinkManager
- VolumeManager是VoldNativeService功能的執(zhí)行者
- NetLinkManager用來監(jiān)聽內(nèi)核的熱插拔事件,通知到vold進(jìn)程USB設(shè)備已經(jīng)接入了
int main(int argc, char** argv) {
...
VolumeManager* vm;
NetlinkManager* nm;
// 初始化VolumeManager,VoldNativeService功能的執(zhí)行者
if (!(vm = VolumeManager::Instance())) {
LOG(ERROR) << "Unable to create VolumeManager";
exit(1);
}
// 初始化NetLinkManager用來監(jiān)聽內(nèi)核的熱插拔事件,通知到vold進(jìn)程USB設(shè)備已經(jīng)接入了
if (!(nm = NetlinkManager::Instance())) {
LOG(ERROR) << "Unable to create NetlinkManager";
exit(1);
}
if (vm->start()) {
PLOG(ERROR) << "Unable to start VolumeManager";
exit(1);
}
/**********************************************************************************
**process_config函數(shù)用來解析/etc/vold.fstab的配置文件,從代碼可以看出,配置文件的參數(shù)以空格和
**制表格(Tab鍵)分隔;系統(tǒng)啟動起來,分析該配置文件,掛載相應(yīng)的分區(qū),相當(dāng)于Linux系統(tǒng)的/etc/fstab文件
**********************************************************************************/
bool has_adoptable;
bool has_quota;
bool has_reserved;
if (process_config(vm, &has_adoptable, &has_quota, &has_reserved)) {
PLOG(ERROR) << "Error reading configuration... continuing anyways";
}
ATRACE_BEGIN("VoldNativeService::start");
if (android::vold::VoldNativeService::start() != android::OK) {
LOG(ERROR) << "Unable to start VoldNativeService";
exit(1);
}
ATRACE_END();
// 啟動VoldNativeService
ATRACE_BEGIN("NetlinkManager::start");
if (nm->start()) {
PLOG(ERROR) << "Unable to start NetlinkManager";
exit(1);
}
ATRACE_END();
// 應(yīng)用層往/sys/block目錄下的uevent文件寫"add\n"指令,觸發(fā)kernel向上發(fā)送Uevent消息,獲取設(shè)備的當(dāng)前信息
coldboot("/sys/block");
...
}
StorageManager與vold的通信
前文的StorageManagerService在connect時獲取的就是VoldNativeService的binder proxy
StorageManager與vold建立通信是通過IVoldListener
// 獲取Vold的bp端用于通信
mVold = IVold.Stub.asInterface(binder);
try {
// 關(guān)鍵代碼:設(shè)置Vold的Listener
mVold.setListener(mListener);
} catch (RemoteException e) {
mVold = null;
Slog.w(TAG, "vold listener rejected; trying again", e);
}
在VoldNativeService.cpp是通過setListener函數(shù)實現(xiàn)
binder::Status VoldNativeService::setListener(
const android::sp<android::os::IVoldListener>& listener) {
ENFORCE_SYSTEM_OR_ROOT;
ACQUIRE_LOCK;
VolumeManager::Instance()->setListener(listener);
return Ok();
}
到此IVoldListener這個Binder回調(diào)就已經(jīng)建立好了,VoldNativeService就可以像一般回調(diào)一樣調(diào)用VoldListener來通知java層。
vold與內(nèi)核的通信
vold通過NetLinkManager來建立socket通道,監(jiān)聽內(nèi)核上報的uevent事件。
NetLink是Linux下用戶進(jìn)程和kernel進(jìn)行信息交互的一種機(jī)制,借助這種機(jī)制,用戶進(jìn)程(如Vold/Netd)可以接收來自kernel的一些消息,同時也可以向kernel發(fā)送一些控制命令。NetlinkManager就是基于此設(shè)計的。Uevent也跟Linux系統(tǒng)有關(guān),它與Linux 的設(shè)備文件系統(tǒng)有一定關(guān)系;這里,我們可以簡單的認(rèn)為,Uevent就是一個字符串,它描述了外部存儲設(shè)備插入/拔出、掛載/卸載的狀態(tài)信息。Vold通過Netlink機(jī)制,可以得到這些信息,并進(jìn)行外部存儲設(shè)備的管理、控制。
int NetlinkManager::start() {
struct sockaddr_nl nladdr;
int sz = 64 * 1024;
int on = 1;
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
nladdr.nl_pid = getpid();
nladdr.nl_groups = 0xffffffff;
// 關(guān)鍵代碼: 創(chuàng)建地址族為PF_NETLINK的socket,與Kernel進(jìn)行通信
if ((mSock = socket(PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_KOBJECT_UEVENT)) < 0) {
PLOG(ERROR) << "Unable to create uevent socket";
return -1;
}
// When running in a net/user namespace, SO_RCVBUFFORCE will fail because
// it will check for the CAP_NET_ADMIN capability in the root namespace.
// Try using SO_RCVBUF if that fails.
if ((setsockopt(mSock, SOL_SOCKET, SO_RCVBUFFORCE, &sz, sizeof(sz)) < 0) &&
(setsockopt(mSock, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz)) < 0)) {
PLOG(ERROR) << "Unable to set uevent socket SO_RCVBUF/SO_RCVBUFFORCE option";
goto out;
}
if (setsockopt(mSock, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on)) < 0) {
PLOG(ERROR) << "Unable to set uevent socket SO_PASSCRED option";
goto out;
}
if (bind(mSock, (struct sockaddr*)&nladdr, sizeof(nladdr)) < 0) {
PLOG(ERROR) << "Unable to bind uevent socket";
goto out;
}
// 通過NetlinkerHandler處理socket的回調(diào)
mHandler = new NetlinkHandler(mSock);
if (mHandler->start()) {
PLOG(ERROR) << "Unable to start NetlinkHandler";
goto out;
}
return 0;
out:
close(mSock);
return -1;
}
通過NetlinkerHandler這個類處理socket的回調(diào)
void NetlinkHandler::onEvent(NetlinkEvent* evt) {
VolumeManager* vm = VolumeManager::Instance();
const char* subsys = evt->getSubsystem();
if (!subsys) {
LOG(WARNING) << "No subsystem found in netlink event";
return;
}
if (std::string(subsys) == "block") {
vm->handleBlockEvent(evt);
}
}
由VolumeManager處理block相關(guān)的event.
驅(qū)動設(shè)備分為字符設(shè)備、塊設(shè)備、網(wǎng)絡(luò)設(shè)備。對于字符設(shè)備按照字符流的方式被有序訪問,字符設(shè)備也稱為裸設(shè)備,可以直接讀取物理磁盤,不經(jīng)過系統(tǒng)緩存,例如鍵盤直接產(chǎn)生中斷。而塊設(shè)備是指系統(tǒng)中能夠隨機(jī)(不需要按順序)訪問固定大小數(shù)據(jù)片(chunks)的設(shè)備,例如硬盤;塊設(shè)備則是通過系統(tǒng)緩存進(jìn)行讀取。
內(nèi)核上報的事件由字符串構(gòu)成,示例如下:
ACTION=add
DEVPATH=/devices/platform/msm_sdcc.2/mmc_host/mmc1/mmc1:c9f2/block/mmcblk0
SUBSYSTEM=block
MAJOR=179
MINOR=0
DEVNAME=mmcblk0
DEVTYPE=disk
NPARTS=3
SEQNUM=1357
void VolumeManager::handleBlockEvent(NetlinkEvent* evt) {
std::lock_guard<std::mutex> lock(mLock);
if (mDebug) {
LOG(DEBUG) << "----------------";
LOG(DEBUG) << "handleBlockEvent with action " << (int)evt->getAction();
evt->dump();
}
// 從NetlinkEvent中讀取幾個參數(shù),包括DEVPATH、DEVTYPE、MAJOR、MINOR
std::string eventPath(evt->findParam("DEVPATH") ? evt->findParam("DEVPATH") : "");// 設(shè)備路徑
std::string devType(evt->findParam("DEVTYPE") ? evt->findParam("DEVTYPE") : "");// 設(shè)備類型
if (devType != "disk") return;
// 主次設(shè)備號,兩者可以描述一個具體設(shè)備
int major = std::stoi(evt->findParam("MAJOR"));
int minor = std::stoi(evt->findParam("MINOR"));
// 根據(jù)主次設(shè)備號創(chuàng)建設(shè)備
dev_t device = makedev(major, minor);
switch (evt->getAction()) {
// 設(shè)備插入事件
case NetlinkEvent::Action::kAdd: {
for (const auto& source : mDiskSources) {
if (source->matches(eventPath)) {
// 識別是U盤還是SD卡
// For now, assume that MMC and virtio-blk (the latter is
// specific to virtual platforms; see Utils.cpp for details)
// devices are SD, and that everything else is USB
int flags = source->getFlags();
if (major == kMajorBlockMmc || IsVirtioBlkDevice(major)) {
flags |= android::vold::Disk::Flags::kSd;
} else {
flags |= android::vold::Disk::Flags::kUsb;
}
auto disk =
new android::vold::Disk(eventPath, device, source->getNickname(), flags);
// 識別完成后調(diào)用handleDiskAdded完成掛載
handleDiskAdded(std::shared_ptr<android::vold::Disk>(disk));
break;
}
}
break;
}
case NetlinkEvent::Action::kChange: {
LOG(DEBUG) << "Disk at " << major << ":" << minor << " changed";
handleDiskChanged(device);
break;
}
// 設(shè)備移除事件
case NetlinkEvent::Action::kRemove: {
handleDiskRemoved(device);
break;
}
default: {
LOG(WARNING) << "Unexpected block event action " << (int)evt->getAction();
break;
}
}
}
識別完是U盤還是sd卡后調(diào)用handleDiskAdded進(jìn)行掛載
有兩個場景需要暫時等待:
- 鎖屏未解鎖,需要等待用戶解鎖
- 需要等待user0啟動,因為我們需要在mount Fuse daemon處理磁盤前啟用用戶。
void VolumeManager::handleDiskAdded(const std::shared_ptr<android::vold::Disk>& disk) {
// 1、鎖屏未解鎖,需要等待用戶解鎖
2、需要等待user0啟動,因為我們需要在mount Fuse daemon處理磁盤前啟用用戶。
// For security reasons, if secure keyguard is showing, wait‵‵‵
// until the user unlocks the device to actually touch it
// Additionally, wait until user 0 is actually started, since we need
// the user to be up before we can mount a FUSE daemon to handle the disk.
bool userZeroStarted = mStartedUsers.find(0) != mStartedUsers.end();
if (mSecureKeyguardShowing) {
LOG(INFO) << "Found disk at " << disk->getEventPath()
<< " but delaying scan due to secure keyguard";
mPendingDisks.push_back(disk);
} else if (!userZeroStarted) {
LOG(INFO) << "Found disk at " << disk->getEventPath()
<< " but delaying scan due to user zero not having started";
mPendingDisks.push_back(disk);
} else {
disk->create();
mDisks.push_back(disk);
}
}
Disk::create()
status_t Disk::create() {
CHECK(!mCreated);
mCreated = true;
// 回調(diào)VolumeManager的onDiskCreated 通知磁盤已經(jīng)創(chuàng)建
auto listener = VolumeManager::Instance()->getListener();
if (listener) listener->onDiskCreated(getId(), mFlags);
if (isStub()) {
createStubVolume();
return OK;
}
// 讀取磁盤的元數(shù)據(jù)
readMetadata();
// 讀取磁盤的分區(qū)信息
readPartitions();
return OK;
}
Disk::readPartitions()
這里就是調(diào)用/system/bin/sgdisk工具讀取分區(qū)信息
sgdisk是Linux下操作GPT分區(qū)的工具,就像fdisk是操作MBR分區(qū)的工具。
disk對象創(chuàng)建完開始創(chuàng)建volume對象,sd卡USb設(shè)備是publicVolume,內(nèi)置存儲是privateVolume
static const char* kSgdiskPath = "/system/bin/sgdisk";
status_t Disk::readPartitions() {
int maxMinors = getMaxMinors();
if (maxMinors < 0) {
return -ENOTSUP;
}
destroyAllVolumes();
// Parse partition table
std::vector<std::string> cmd;
cmd.push_back(kSgdiskPath);
cmd.push_back("--android-dump");
cmd.push_back(mDevPath);
std::vector<std::string> output;
// 這里就是調(diào)用/system/bin/sgdisk工具讀取分區(qū)信息
status_t res = ForkExecvp(cmd, &output);
if (res != OK) {
LOG(WARNING) << "sgdisk failed to scan " << mDevPath;
auto listener = VolumeManager::Instance()->getListener();
if (listener) listener->onDiskScanned(getId());
mJustPartitioned = false;
return res;
}
Table table = Table::kUnknown;
bool foundParts = false;
for (const auto& line : output) {
auto split = android::base::Split(line, kSgdiskToken);
auto it = split.begin();
if (it == split.end()) continue;
if (*it == "DISK") {
if (++it == split.end()) continue;
if (*it == "mbr") {‵‵‵
table = Table::kMbr;
} else if (*it == "gpt") {
table = Table::kGpt;
} else {
LOG(WARNING) << "Invalid partition table " << *it;
continue;
}
} else if (*it == "PART") {
foundParts = true;
if (++it == split.end()) continue;
int i = 0;
if (!android::base::ParseInt(*it, &i, 1, maxMinors)) {
LOG(WARNING) << "Invalid partition number " << *it;
continue;
}
dev_t partDevice = makedev(major(mDevice), minor(mDevice) + i);
if (table == Table::kMbr) {
if (++it == split.end()) continue;
int type = 0;
if (!android::base::ParseInt("0x" + *it, &type)) {
LOG(WARNING) << "Invalid partition type " << *it;
continue;
}
switch (type) {
case 0x06: // FAT16
case 0x07: // HPFS/NTFS/exFAT
case 0x0b: // W95 FAT32 (LBA)
case 0x0c: // W95 FAT32 (LBA)
case 0x0e: // W95 FAT16 (LBA)
case 0x83: // Linux EXT4/F2FS/...
createPublicVolume(partDevice);
break;
}
} else if (table == Table::kGpt) {
if (++it == split.end()) continue;
auto typeGuid = *it;
if (++it == split.end()) continue;
auto partGuid = *it;
if (android::base::EqualsIgnoreCase(typeGuid, kGptBasicData)) {
createPublicVolume(partDevice);
} else if (android::base::EqualsIgnoreCase(typeGuid, kGptAndroidExpand)) {
createPrivateVolume(partDevice, partGuid);
}
}
}
}
// Ugly last ditch effort, treat entire disk as partition
if (table == Table::kUnknown || !foundParts) {
LOG(WARNING) << mId << " has unknown partition table; trying entire device";
std::string fsType;
std::string unused;
if (ReadMetadataUntrusted(mDevPath, &fsType, &unused, &unused) == OK) {
createPublicVolume(mDevice);
} else {
LOG(WARNING) << mId << " failed to identify, giving up";
}
}
auto listener = VolumeManager::Instance()->getListener();
// 通知上層設(shè)備開始掃描
if (listener) listener->onDiskScanned(getId());
mJustPartitioned = false;
return OK;
}
Disk::createPublicVolume
void Disk::createPublicVolume(dev_t device) {
auto vol = std::shared_ptr<VolumeBase>(new PublicVolume(device));
if (mJustPartitioned) {
LOG(DEBUG) << "Device just partitioned; silently formatting";
vol->setSilent(true);
vol->create();
vol->format("auto");
vol->destroy();
vol->setSilent(false);
}
mVolumes.push_back(vol);
vol->setDiskId(getId());
vol->create();
}
在StorageManagerService接收到哦onVolumeCreated回調(diào)后,會調(diào)用VoldNativeService::mount進(jìn)行真正的掛載。掛載前會調(diào)用不同文件系統(tǒng)對應(yīng)的fsck工具進(jìn)行磁盤校驗。