源碼基于 Android 11.0/R/sdk-30。
Broadcast ANR
// BroadcastQueue.java
final void processNextBroadcast(boolean fromMsg) {
//通過 AMS 更新 CPU 使用信息
mService.updateCpuStats();
// First, deliver any non-serialized broadcasts right away.
while (mParallelBroadcasts.size() > 0) {
//...
}
//發(fā)送 BROADCAST_TIMEOUT_MSG 消息
broadcastTimeoutLocked(false); // forcibly finish this broadcast
//執(zhí)行廣播
performReceiveLocked(r.callerApp, r.resultTo,...);
//移除 BROADCAST_TIMEOUT_MSG 消息
cancelBroadcastTimeoutLocked();
}
- 先處理并行廣播,因為是單向通知,不需要等待反饋,所以并行廣播沒有 ANR。
- 再處理串行廣播。
- 首先判斷是否已經(jīng)有一個廣播超時消息;
- 然后,根據(jù)目標(biāo)進(jìn)程優(yōu)先級,分別在前臺隊列和后臺隊列(超時時限不同)中排隊處理;
- 接下來,根據(jù)不同的隊列,發(fā)出不同延時的 ANR 消息;如果處理及時,取消延時消息;如果處理超時,觸發(fā) ANR;
廣播的 ANR 處理相對簡單,主要是再次判斷是否超時、記錄日志,記錄 ANR 次數(shù)等。然后就繼續(xù)調(diào)用 processNextBroadcast 函數(shù),處理下一條廣播了。
// ActivityManagerService.java
// How long we allow a receiver to run before giving up on it.
static final int BROADCAST_FG_TIMEOUT = 10*1000;
static final int BROADCAST_BG_TIMEOUT = 60*1000;
Service ANR
public final class ActiveServices {
static final int SERVICE_TIMEOUT = 20*1000;
static final int SERVICE_BACKGROUND_TIMEOUT = SERVICE_TIMEOUT * 10;
// 啟動服務(wù)時判斷是否是前臺服務(wù)
ComponentName startServiceLocked(IApplicationThread caller, ...){
final boolean callerFg;
if (caller != null) {
final ProcessRecord callerApp = mAm.getRecordForAppLocked(caller);
callerFg = callerApp.setSchedGroup != ProcessList.SCHED_GROUP_BACKGROUND;
} else {
callerFg = true;
}
}
// 發(fā)送服務(wù)超時消息。啟動服務(wù)時調(diào)用
void scheduleServiceTimeoutLocked(ProcessRecord proc) {
Message msg = mAm.mHandler.obtainMessage(ActivityManagerService.SERVICE_TIMEOUT_MSG);
msg.obj = proc;
mAm.mHandler.sendMessageDelayed(msg,
proc.execServicesFg ? SERVICE_TIMEOUT : SERVICE_BACKGROUND_TIMEOUT);
}
// 處理服務(wù)超時消息
void serviceTimeout(ProcessRecord proc) {
String anrMessage = null;
if (timeout != null && mAm.mProcessList.mLruProcesses.contains(proc)) {
Slog.w(TAG, "Timeout executing service: " + timeout);
//...
anrMessage = "executing service " + timeout.shortInstanceName;
} else {
//時間未到,發(fā)送超時消息
}
if (anrMessage != null) {
mAm.mAnrHelper.appNotResponding(proc, anrMessage);
}
}
// 服務(wù)執(zhí)行完成取消超時消息
private void serviceDoneExecutingLocked(ServiceRecord r, boolean inDestroying,
boolean finishing) {
if (r.app.executingServices.size() == 0) {
mAm.mHandler.removeMessages(ActivityManagerService.SERVICE_TIMEOUT_MSG, r.app);
}
}
}
- 啟動服務(wù)時判斷是否是前臺服務(wù),決定超時時間;
- 發(fā)送 SERVICE_TIMEOUT_MSG 服務(wù)超時消息;
- 如果服務(wù)沒有超時,移除消息。否則處理服務(wù)超時邏輯,記錄日志等。
ContentProvider ANR
ContentProvider 超時為 CONTENT_PROVIDER_PUBLISH_TIMEOUT = 10s
public abstract class ContentResolver implements ContentInterface {
/**
* How long we wait for an attached process to publish its content providers
* before we decide it must be hung.
*/
public static final int CONTENT_PROVIDER_PUBLISH_TIMEOUT_MILLIS = 10 * 1000;
}
public class ActivityManagerService{
private boolean attachApplicationLocked(@NonNull IApplicationThread thread, int pid, ...) {
//啟動的 App 存在 provider,則超時10s后發(fā)送 CONTENT_PROVIDER_PUBLISH_TIMEOUT_MSG 消息
if (providers != null && checkAppInLaunchingProvidersLocked(app)) {
Message msg = mHandler.obtainMessage(CONTENT_PROVIDER_PUBLISH_TIMEOUT_MSG);
msg.obj = app;
mHandler.sendMessageDelayed(msg, ContentResolver.CONTENT_PROVIDER_PUBLISH_TIMEOUT_MILLIS);
}
}
// App 進(jìn)程發(fā)布 Provider 成功后移除消息
public final void publishContentProviders(IApplicationThread caller, List<ContentProviderHolder> providers) {
//成功pubish則移除該消息
if (wasInLaunchingProviders) {
mHandler.removeMessages(CONTENT_PROVIDER_PUBLISH_TIMEOUT_MSG, r);
}
}
// 發(fā)布 Provider 失敗,
private final void processContentProviderPublishTimedOutLocked(ProcessRecord app) {
cleanupAppInLaunchingProvidersLocked(app, true);
mProcessList.removeProcessLocked(app, false, true,
ApplicationExitInfo.REASON_INITIALIZATION_FAILURE,
ApplicationExitInfo.SUBREASON_UNKNOWN,
"timeout publishing content providers");
}
}
Activity ANR
Activity 的 ANR 是相對最復(fù)雜的,也只有 Activity 中出現(xiàn)的 ANR 會彈出 ANR 提示框。
最終的表現(xiàn)形式是:彈出一個對話框,告訴用戶當(dāng)前某個程序無響應(yīng),輸入一大堆與 ANR 相關(guān)的日志,便于開發(fā)者解決問題。
InputDispatching:
Activity 最主要的功能之一是交互,為了方便交互,Android 中的 InputDispatcher 會發(fā)出操作事件,最終在 InputManagerService 中發(fā)出事件,通過 InputChannel,向 Activity 分發(fā)事件。交互事件必須得到響應(yīng),如果不能及時處理,IMS 就會報出 ANR,交給 AMS 去彈出 ANR 提示框。
KeyDispatching:
如果輸入是個 Key 事件,會從 IMS 進(jìn)入 ActivityRecord.Token.keyDispatchingTimeOut,然后進(jìn)入 AMS 處理,不同的是,在 ActivityRecord 中,會先截留一次 Key 的不響應(yīng),只有當(dāng) Key 連續(xù)第二次處理超時,才會彈出 ANR 提示框。
窗口焦點:
Activity 總是需要有一個當(dāng)前窗口來響應(yīng)事件的,但如果遲遲沒有當(dāng)前窗口(獲得焦點),比如在 Activity 切換時,舊 Activity 已經(jīng) onPause,新的 Activity 一直沒有 onResume,持續(xù)超過 5 秒,就會 ANR。
App 的生命周期太慢,或 CPU 資源不足,或 WMS 異常,都可能導(dǎo)致窗口焦點。
1. 判斷是否有 focused 組件以及 focused Application:
這種一般是在應(yīng)用啟動時觸發(fā),比如啟動時間過長在這過程中觸發(fā)了 keyevent 或者 trackball moteionevent 就會出現(xiàn)。
// ~/frameworks/native/services/inputflinger/dispatcher/InputDispatcher.cpp
void InputDispatcher::dumpDispatchStateLocked(String8& dump) {
dump.appendFormat(INDENT "DispatchEnabled: %d\n", mDispatchEnabled);
dump.appendFormat(INDENT "DispatchFrozen: %d\n", mDispatchFrozen);
if (mFocusedApplicationHandle != NULL) {
dump.appendFormat(INDENT "FocusedApplication: name='%s', dispatchingTimeout=%0.3fms\n",
mFocusedApplicationHandle->getName().string(),
mFocusedApplicationHandle->getDispatchingTimeout(
DEFAULT_INPUT_DISPATCHING_TIMEOUT) / 1000000.0);
} else {
dump.append(INDENT "FocusedApplication: <null>\n");
}
...
}
對應(yīng)于
Reason: Input dispatching timed out (Waiting because no window has focus but there is a focused application that may eventually add a window when it finishes starting up.)
// ActivityManagerService.java
/**
* Handle input dispatching timeouts.
* @return whether input dispatching should be aborted or not.
*/
boolean inputDispatchingTimedOut(ProcessRecord proc, ...) {
final String annotation;
if (reason == null) {
annotation = "Input dispatching timed out";
} else {
annotation = "Input dispatching timed out (" + reason + ")";
}
if (proc != null) {
synchronized (this) {
if (proc.isDebugging()) {
return false;
}
if (proc.getActiveInstrumentation() != null) {
Bundle info = new Bundle();
info.putString("shortMsg", "keyDispatchingTimedOut");
info.putString("longMsg", annotation);
finishInstrumentationLocked(proc, Activity.RESULT_CANCELED, info);
return true;
}
}
mAnrHelper.appNotResponding(proc, activityShortComponentName, aInfo,
parentShortComponentName, parentProcess, aboveSystem, annotation);
}
return true;
}
2. 判斷前面的事件是否及時完成:
對應(yīng)于
Reason: Input dispatching timed out (Waiting to send non-key event because the touched window has not finished processing certain input events that were delivered to it over 500.0ms ago. Wait queue length: 10. Wait queue head age: 5591.3ms.)
出現(xiàn)這種問題意味著主線程正在執(zhí)行其他的事件但是比較耗時導(dǎo)致輸入事件無法及時處理。


InputDispatcher 超時是最常見的 ANR 類型,而且其類型也比較多。
當(dāng)用戶觸摸屏幕或者按鍵操作,首次觸發(fā)的是硬件驅(qū)動,驅(qū)動收到事件后,將該相應(yīng)事件寫入到輸入設(shè)備節(jié)點, 這便產(chǎn)生了最原生態(tài)的內(nèi)核事件。接著,輸入系統(tǒng)取出原生態(tài)的事件,經(jīng)過層層封裝后成為 KeyEvent 或者 MotionEvent ;最后,交付給相應(yīng)的目標(biāo)窗口(Window)來消費(fèi)該輸入事件??梢?,輸入系統(tǒng)在整個過程起到承上啟下的銜接作用。
Input 模塊的主要組成:
- Native 層的 InputReader 負(fù)責(zé)從 EventHub 取出事件并處理,再交給 InputDispatcher;
- Native 層的 InputDispatcher 接收來自 InputReader 的輸入事件,并記錄 WMS 的窗口信息,用于派發(fā)事件到合適的窗口;
- Java 層的 InputManagerService 跟 WMS 交互,WMS 記錄所有窗口信息,并同步更新到 IMS,為 InputDispatcher 正確派發(fā)事件到 ViewRootImpl 提供保障;
生成 ANR 信息
// ProcessRecord.java
final void appNotResponding(ProcessRecord app, ActivityRecord activity,...) {
long anrTime = SystemClock.uptimeMillis();
// 默認(rèn) true
if (isMonitorCpuUsage()) {
// 更新CPU使用信息。ANR的第一次CPU信息采樣,采樣數(shù)據(jù)會保存在mProcessStats這個變量中
mService.updateCpuStatsNow();
}
final boolean isSilentAnr;
// 進(jìn)程正在處于正在關(guān)閉的狀態(tài),正在crash的狀態(tài),被kill的狀態(tài),或者相同進(jìn)程已經(jīng)處在ANR的流程中的進(jìn)程直接返回。
// 記錄ANR到 event log
EventLog.writeEvent(EventLogTags.AM_ANR, app.userId, app.pid,...;
// 選擇需要 dump 的進(jìn)程。系統(tǒng)進(jìn)程,LRU 進(jìn)程和 Native 進(jìn)程。
// 收集 log 信息,輸出 main log.
StringBuilder info = new StringBuilder();
info.setLength(0);
info.append("ANR in ").append(app.processName);
if (activity != null && activity.shortComponentName != null) {
info.append(" (").append(activity.shortComponentName).append(")");
}
info.append("\n");
info.append("PID: ").append(app.pid).append("\n");
if (annotation != null) {
info.append("Reason: ").append(annotation).append("\n");
}
if (parent != null && parent != activity) {
info.append("Parent: ").append(parent.shortComponentName).append("\n");
}
StringBuilder report = new StringBuilder();
report.append(MemoryPressureUtil.currentPsiState());
ProcessCpuTracker processCpuTracker = new ProcessCpuTracker(true);
// 保存日志到 /data/anr 目錄
File tracesFile = ActivityManagerService.dumpStackTraces(true, firstPids,...);
// 默認(rèn) true
if (isMonitorCpuUsage()) {
// 更新CPU使用信息。ANR的第二次CPU使用信息采樣。兩次采樣的數(shù)據(jù)分別對應(yīng)ANR發(fā)生前后的CPU使用情況
mService.updateCpuStatsNow();
synchronized (mService.mProcessCpuTracker) {
// 輸出ANR發(fā)生前一段時間內(nèi)各個進(jìn)程的CPU使用情況
report.append(mService.mProcessCpuTracker.printCurrentState(anrTime));
}
// 輸出CPU負(fù)載
info.append(processCpuTracker.printCurrentLoad());
info.append(report);
}
// 輸出ANR發(fā)生后一段時間內(nèi)各個進(jìn)程的CPU使用率
info.append(processCpuTracker.printCurrentState(anrTime));
// 打印 ANR 日志
Slog.e(TAG, info.toString());
if (tracesFile == null) {
// There is no trace file, so dump (only) the alleged culprit's threads to the log
// 發(fā)送signal 3(SIGNAL_QUIT)來dump棧信息
Process.sendSignal(app.pid, Process.SIGNAL_QUIT);
} else if (offsets[1] > 0) {
// We've dumped into the trace file successfully
mService.mProcessList.mAppExitInfoTracker.scheduleLogAnrTrace(
pid, uid, getPackageList(), tracesFile, offsets[0], offsets[1]);
}
// 將anr信息同時輸出到DropBox
mService.addErrorToDropBox("anr", app, app.processName, ...);
// 后臺 anr 會結(jié)束進(jìn)程??梢栽陂_發(fā)者選項中打開后臺 ANR
if (isSilentAnr() && !isDebugging()) {
kill("bg anr", ApplicationExitInfo.REASON_ANR, true);
return;
}
// Bring up the infamous App Not Responding dialog
// 顯示ANR對話框。發(fā)送 SHOW_NOT_RESPONDING_MSG 消息,顯示 anr 對話框
Message msg = Message.obtain();
msg.what = ActivityManagerService.SHOW_NOT_RESPONDING_UI_MSG;
msg.obj = new AppNotRespondingDialog.Data(this, aInfo, aboveSystem);
mService.mUiHandler.sendMessage(msg);
}
public class ActivityManagerService{
public static final String ANR_TRACE_DIR = "/data/anr";
File dumpStackTraces(ArrayList<Integer> firstPids, ...){
// 打印到 main log
Slog.i(TAG, "dumpStackTraces pids=" + lastPids + " nativepids=" + nativePids);
final File tracesDir = new File(ANR_TRACE_DIR);
// 創(chuàng)建 ANR 文件,可能失敗。以 yyyy-MM-dd-HH-mm-ss-SSS 格式命名。
File tracesFile = createAnrDumpFile(tracesDir);
Pair<Long, Long> offsets = dumpStackTraces(
tracesFile.getAbsolutePath(), firstPids, nativePids, extraPids);
return tracesFile;
}
/**
* @return The start/end offset of the trace of the very first PID
*/
public static Pair<Long, Long> dumpStackTraces(String tracesFile, ArrayList<Integer> firstPids,
ArrayList<Integer> nativePids, ArrayList<Integer> extraPids) {
// 打印到 main log
Slog.i(TAG, "Dumping to " + tracesFile);
// We must complete all stack dumps within 20 seconds.
long remainingTime = 20 * 1000;
// First collect all of the stacks of the most important pids.
if (firstPids != null) {
// 首先收集重要進(jìn)程的堆棧
}
// Next collect the stacks of the native pids
if (nativePids != null) {
// 再收集 Native 進(jìn)程堆棧
}
// Lastly, dump stacks for all extra PIDs from the CPU tracker.
if (extraPids != null) {
// 最后收集其他進(jìn)程堆棧信息
}
}
}
- 收集 ANR 信息最長 20 秒;
- 調(diào)用
Debug.dumpJavaBacktraceToFileTimeout()native 方法,按進(jìn)程重要程度 dump 信息堆棧信息。
bool debuggerd_trigger_dump(pid_t tid, DebuggerdDumpType dump_type, unsigned int timeout_ms, unique_fd output_fd) {
LOG(INFO) << TAG "started dumping process " << pid;
// Send the signal.
const int signal = (dump_type == kDebuggerdJavaBacktrace) ? SIGQUIT : BIONIC_SIGNAL_DEBUGGER;
sigval val = {.sival_int = (dump_type == kDebuggerdNativeBacktrace) ? 1 : 0};
if (sigqueue(pid, signal, val) != 0) {
log_error(output_fd, errno, "failed to send signal to pid %d", pid);
return false;
}
LOG(INFO) << TAG "done dumping process " << pid;
}
每一個應(yīng)用進(jìn)程都會有一個 SignalCatcher 線程,專門處理 SIGQUIT,來到 art/runtime/signal_catcher.cc:
void* SignalCatcher::Run(void* arg) {
SignalCatcher* signal_catcher = reinterpret_cast<SignalCatcher*>(arg);
CHECK(signal_catcher != nullptr);
Runtime* runtime = Runtime::Current();
// ...
// Set up mask with signals we want to handle.
SignalSet signals;
signals.Add(SIGQUIT);
signals.Add(SIGUSR1);
while (true) {
int signal_number = signal_catcher->WaitForSignal(self, signals);
if (signal_catcher->ShouldHalt()) {
runtime->DetachCurrentThread();
return nullptr;
}
switch (signal_number) {
case SIGQUIT:
signal_catcher->HandleSigQuit();
break;
case SIGUSR1:
signal_catcher->HandleSigUsr1();
break;
default:
LOG(ERROR) << "Unexpected signal %d" << signal_number;
break;
}
}
}
當(dāng)應(yīng)用發(fā)生 ANR 之后,系統(tǒng)會收集許多進(jìn)程,來 dump 堆棧,從而生成 ANR Trace 文件。收集的第一個,也是一定會被收集到的進(jìn)程,就是發(fā)生 ANR 的進(jìn)程。接著系統(tǒng)開始向這些應(yīng)用進(jìn)程發(fā)送 SIGQUIT 信號,應(yīng)用進(jìn)程收到 SIGQUIT 后開始 dump 堆棧。
參考
[1] developer ANRs
[2] Android ANR 分析詳解
[3] 看完這篇 Android ANR 分析,就可以和面試官裝逼了!
[4] 微信 Android 團(tuán)隊手把手教你高效監(jiān)控 ANR
[5] Input 系統(tǒng)—ANR 原理分析 - Gityuan
[6] 徹底理解安卓應(yīng)用無響應(yīng)機(jī)制 - Gityuan
[7] 理解 Android ANR 的觸發(fā)原理 - Gityuan