注意:之后的講解通通以div > p + .aaron[type="checkbox"], #id:first-child這個選擇器為例
詞法解析
拿到這個選擇器之后首先要做的事情是把它分解成一個個單元,單元結(jié)構(gòu)如下
{
value: '匹配到的字符串',
type: '對應(yīng)的Token類型',
matches: '正則匹配到的一個結(jié)構(gòu)'
}
上面的選擇器拆分之后就是
[
[
{ value: "div", type: "TAG", matches: ["div"] },
{ value: " > ", type: ">" },
{ value: "p", type: "TAG", matches: ["p"] },
{ value: " + ", type: "+" },
{ value: ".aaron", type: "CLASS", matches: ["aaron"] },
{ value: "[type='checkbox']", type: "ATTR", matches: ["type", "=", "checkbox"] }
],
[
{ value: "#id", type: "ID", matches: ["id"] },
{ value: ":first-child", type: "CHILD", matches: Array }
]
]
這一個過程叫做詞法解析
Sizzle.tokenize
Sizzle.tokenize()就是詞法解析的接口,將css選擇器傳入就能得到上面的Token序列,接下來用代碼去實現(xiàn)。
整體架構(gòu)
首先搭建一個大體的框架,和jQuery整體架構(gòu)一樣,外部用一個即時函數(shù)包裹
(function(window, undefined) {
var Sizzle = function() {
}
window.Sizzle = Sizzle;
})(window);
主體
注釋已經(jīng)做了很詳細的講解了,剩下的就是在debug模式下一步步的調(diào)試,看代碼是如何執(zhí)行的。其實詞法解析這一篇內(nèi)容大可以跳過,因為它的任務(wù)只是將css選擇器解析成Token序列,不理解Sizzle.tokenize的源碼也不影響接下來的代碼學(xué)習(xí),只需要有解析這個步驟罷了。至于怎么解析,要是能力很強的話可以自己寫一個,像我這種菜鳥選擇照搬jQuery源碼
Sizzle.tokenize = function (selector, parseOnly) {
var matched, // 用于判斷每執(zhí)行一次while循環(huán)是否解析成功,如果為false說明css選擇器有錯誤,直接終止循環(huán)
match, // 正則匹配結(jié)果
tokens, // Token序列
soFar = selector, // 表示字符串未解析的部分
groups = [], // 存放已經(jīng)解析好的Token序列
preFilters = Expr.preFilter, // 預(yù)處理用
cached = tokenCache[selector]; // 緩存
if (cached) {
return parseOnly ? 0 : cached;
}
while (soFar) {
// 檢查是否有逗號,有逗號的話就是多個Token序列
// 比如像'div > p + .aaron[type="checkbox"], #id:first-child'這個選擇器就是兩個Token序列
// groups = [
// [序列一], // div > p + .aaron[type="checkbox"]
// [序列二] // #id:first-child
// ]
if (!matched || (match = rcomma.exec(soFar))) {
if (match) {
// 清除逗號
soFar = soFar.slice(match[0].length);
}
// 每有一個逗號,都要往groups壓入一個Token序列
// 然后就是最開始的時候也要往groups壓入一個Token
groups.push(tokens = []);
}
matched = false;
// 處理特殊的Token:>, +, 空格, ~
if (match = rcombinators.exec(soFar)) {
matched = match.shift();
tokens.push({
value: matched,
type: match[0].replace(rtrim, " ")
});
// 處理完之后將其從待處理的字符中刪除
soFar = soFar.slice(matched.length);
}
// 這里開始分析這幾種Token:TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME
// 如果通過正則匹配到了Token格式:match = matchExpr[ type ].exec( soFar )
// 需要預(yù)處理的Token:ATTR, CHILD, PSEUDO
// 交給預(yù)處理器處理:match = preFilters[ type ]( match )
for (var type in Expr.filter) {
if ( match = matchExpr[type].exec(soFar) ) {
// 預(yù)處理
if (preFilters[type]) {
match = preFilters[type](match);
}
matched = match.shift();
tokens.push({
value: matched,
type: type,
matches: match
})
// 處理完之后將其從待處理的字符中刪除
soFar = soFar.slice(matched.length);
}
}
// 如果到了這里都還沒matched到,那么說明這個選擇器在這里有錯誤
// 直接中斷詞法分析過程
// 這就是Sizzle對詞法分析的異常處理
if (!matched) {
break;
}
}
// 如果只需要這個接口檢查選擇器的合法性,直接就返回soFar的剩余長度,因為soFar長度大于零說明選擇器不合法
// 其余情況,如果soFar不等于"",拋出異常;否則把groups記錄在cache里邊并返回
return parseOnly ?
soFar.length :
soFar ?
Sizzle.error(selector) :
// 這里對選擇器(selector)做一個緩存
tokenCache(selector, groups);
}
其余代碼
如果只有上面的代碼是運行不了的,因為上面代碼所用到的一些變量和方法是未定義的,接下來把那些缺失的變量和方法添上
定義變量
這些變量幾乎都是正則表達式,用來匹配不同Token(TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME)的。表達式不用去研究了,只需要明白每個表達式是匹配哪種Token的就行了。我之前嘗試去弄懂為什么表達式要那樣寫,然而到現(xiàn)在我都沒弄懂,而且研究源碼也不是每個細節(jié)都需要弄懂,只要把整體思路理解了就行了。
var
// 選擇器緩存
tokenCache = createCache(),
booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" +
"ismap|loop|multiple|open|readonly|required|scoped",
// 空白字符
whitespace = "[\\x20\\t\\r\\n\\f]",
// ClassName或ID Name
identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace +
"?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",
// css選擇器屬性,如[type="phone"]
attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace +
// Operator (capture 2)
"*([*^$|!~]?=)" + whitespace +
// "Attribute values must be CSS identifiers [capture 5]
// or strings [capture 3 or capture 4]"
"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" +
whitespace + "*\\]",
pseudos = ":(" + identifier + ")(?:\\((" +
// To reduce the number of selectors needing tokenize in the preFilter, prefer arguments:
// 1. quoted (capture 3; capture 4 or capture 5)
"('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" +
// 2. simple (capture 6)
"((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" +
// 3. anything else (capture 2)
".*" +
")\\)|)",
// Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter
rwhitespace = new RegExp( whitespace + "+", "g" ),
// 去除左右空格用
rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" +
whitespace + "+$", "g" ),
// 逗號
rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ),
// >, +, 空格, ~
rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace +
"*" ),
rdescend = new RegExp( whitespace + "|>" ),
rpseudo = new RegExp( pseudos ),
ridentifier = new RegExp( "^" + identifier + "$" ),
matchExpr = {
"ID": new RegExp( "^#(" + identifier + ")" ),
"CLASS": new RegExp( "^\\.(" + identifier + ")" ),
"TAG": new RegExp( "^(" + identifier + "|[*])" ),
"ATTR": new RegExp( "^" + attributes ),
"PSEUDO": new RegExp( "^" + pseudos ),
"CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" +
whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" +
whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ),
"bool": new RegExp( "^(?:" + booleans + ")$", "i" ),
// For use in libraries implementing .is()
// We use this for POS matching in `select`
"needsContext": new RegExp( "^" + whitespace +
"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace +
"*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" )
},
rhtml = /HTML$/i,
rinputs = /^(?:input|select|textarea|button)$/i,
rheader = /^h\d$/i,
rnative = /^[^{]+\{\s*\[native \w/,
// Easily-parseable/retrievable ID or TAG or CLASS selectors
rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,
rsibling = /[+~]/,
// CSS escapes
// http://www.w3.org/TR/CSS21/syndata.html#escaped-characters
runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ),
funescape = function( escape, nonHex ) {
var high = "0x" + escape.slice( 1 ) - 0x10000;
return nonHex ?
// Strip the backslash prefix from a non-hex escape sequence
nonHex :
// Replace a hexadecimal escape sequence with the encoded Unicode code point
// Support: IE <=11+
// For values outside the Basic Multilingual Plane (BMP), manually construct a
// surrogate pair
high < 0 ?
String.fromCharCode( high + 0x10000 ) :
String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 );
};
定義方法
var Expr = Sizzle.selectors = {
// 選擇器緩存長度
cacheLength: 50,
// 預(yù)處理
preFilter: {
"ATTR": function( match ) {
match[ 1 ] = match[ 1 ].replace( runescape, funescape );
// Move the given value to match[3] whether quoted or unquoted
match[ 3 ] = ( match[ 3 ] || match[ 4 ] ||
match[ 5 ] || "" ).replace( runescape, funescape );
if ( match[ 2 ] === "~=" ) {
match[ 3 ] = " " + match[ 3 ] + " ";
}
return match.slice( 0, 4 );
},
"CHILD": function( match ) {
/* matches from matchExpr["CHILD"]
1 type (only|nth|...)
2 what (child|of-type)
3 argument (even|odd|\d*|\d*n([+-]\d+)?|...)
4 xn-component of xn+y argument ([+-]?\d*n|)
5 sign of xn-component
6 x of xn-component
7 sign of y-component
8 y of y-component
*/
match[ 1 ] = match[ 1 ].toLowerCase();
if ( match[ 1 ].slice( 0, 3 ) === "nth" ) {
// nth-* requires argument
if ( !match[ 3 ] ) {
Sizzle.error( match[ 0 ] );
}
// numeric x and y parameters for Expr.filter.CHILD
// remember that false/true cast respectively to 0/1
match[ 4 ] = +( match[ 4 ] ?
match[ 5 ] + ( match[ 6 ] || 1 ) :
2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) );
match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" );
// other types prohibit arguments
} else if ( match[ 3 ] ) {
Sizzle.error( match[ 0 ] );
}
return match;
},
"PSEUDO": function( match ) {
var excess,
unquoted = !match[ 6 ] && match[ 2 ];
if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) {
return null;
}
// Accept quoted arguments as-is
if ( match[ 3 ] ) {
match[ 2 ] = match[ 4 ] || match[ 5 ] || "";
// Strip excess characters from unquoted arguments
} else if ( unquoted && rpseudo.test( unquoted ) &&
// Get excess from tokenize (recursively)
( excess = tokenize( unquoted, true ) ) &&
// advance to the next closing parenthesis
( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) {
// excess is a negative index
match[ 0 ] = match[ 0 ].slice( 0, excess );
match[ 2 ] = unquoted.slice( 0, excess );
}
// Return only captures needed by the pseudo filter method (type and argument)
return match.slice( 0, 3 );
}
},
filter: {
"TAG": function( nodeNameSelector ) {
var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase();
return nodeNameSelector === "*" ?
function() {
return true;
} :
function( elem ) {
return elem.nodeName && elem.nodeName.toLowerCase() === nodeName;
};
},
"CLASS": function( className ) {
var pattern = classCache[ className + " " ];
return pattern ||
( pattern = new RegExp( "(^|" + whitespace +
")" + className + "(" + whitespace + "|$)" ) ) && classCache(
className, function( elem ) {
return pattern.test(
typeof elem.className === "string" && elem.className ||
typeof elem.getAttribute !== "undefined" &&
elem.getAttribute( "class" ) ||
""
);
} );
},
"ATTR": function( name, operator, check ) {
return function( elem ) {
var result = Sizzle.attr( elem, name );
if ( result == null ) {
return operator === "!=";
}
if ( !operator ) {
return true;
}
result += "";
/* eslint-disable max-len */
return operator === "=" ? result === check :
operator === "!=" ? result !== check :
operator === "^=" ? check && result.indexOf( check ) === 0 :
operator === "*=" ? check && result.indexOf( check ) > -1 :
operator === "$=" ? check && result.slice( -check.length ) === check :
operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 :
operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" :
false;
/* eslint-enable max-len */
};
},
"CHILD": function( type, what, _argument, first, last ) {
var simple = type.slice( 0, 3 ) !== "nth",
forward = type.slice( -4 ) !== "last",
ofType = what === "of-type";
return first === 1 && last === 0 ?
// Shortcut for :nth-*(n)
function( elem ) {
return !!elem.parentNode;
} :
function( elem, _context, xml ) {
var cache, uniqueCache, outerCache, node, nodeIndex, start,
dir = simple !== forward ? "nextSibling" : "previousSibling",
parent = elem.parentNode,
name = ofType && elem.nodeName.toLowerCase(),
useCache = !xml && !ofType,
diff = false;
if ( parent ) {
// :(first|last|only)-(child|of-type)
if ( simple ) {
while ( dir ) {
node = elem;
while ( ( node = node[ dir ] ) ) {
if ( ofType ?
node.nodeName.toLowerCase() === name :
node.nodeType === 1 ) {
return false;
}
}
// Reverse direction for :only-* (if we haven't yet done so)
start = dir = type === "only" && !start && "nextSibling";
}
return true;
}
start = [ forward ? parent.firstChild : parent.lastChild ];
// non-xml :nth-child(...) stores cache data on `parent`
if ( forward && useCache ) {
// Seek `elem` from a previously-cached index
// ...in a gzip-friendly way
node = parent;
outerCache = node[ expando ] || ( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
cache = uniqueCache[ type ] || [];
nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
diff = nodeIndex && cache[ 2 ];
node = nodeIndex && parent.childNodes[ nodeIndex ];
while ( ( node = ++nodeIndex && node && node[ dir ] ||
// Fallback to seeking `elem` from the start
( diff = nodeIndex = 0 ) || start.pop() ) ) {
// When found, cache indexes on `parent` and break
if ( node.nodeType === 1 && ++diff && node === elem ) {
uniqueCache[ type ] = [ dirruns, nodeIndex, diff ];
break;
}
}
} else {
// Use previously-cached element index if available
if ( useCache ) {
// ...in a gzip-friendly way
node = elem;
outerCache = node[ expando ] || ( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
cache = uniqueCache[ type ] || [];
nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
diff = nodeIndex;
}
// xml :nth-child(...)
// or :nth-last-child(...) or :nth(-last)?-of-type(...)
if ( diff === false ) {
// Use the same loop as above to seek `elem` from the start
while ( ( node = ++nodeIndex && node && node[ dir ] ||
( diff = nodeIndex = 0 ) || start.pop() ) ) {
if ( ( ofType ?
node.nodeName.toLowerCase() === name :
node.nodeType === 1 ) &&
++diff ) {
// Cache the index of each encountered element
if ( useCache ) {
outerCache = node[ expando ] ||
( node[ expando ] = {} );
// Support: IE <9 only
// Defend against cloned attroperties (jQuery gh-1709)
uniqueCache = outerCache[ node.uniqueID ] ||
( outerCache[ node.uniqueID ] = {} );
uniqueCache[ type ] = [ dirruns, diff ];
}
if ( node === elem ) {
break;
}
}
}
}
}
// Incorporate the offset, then check against cycle size
diff -= last;
return diff === first || ( diff % first === 0 && diff / first >= 0 );
}
};
},
"PSEUDO": function( pseudo, argument ) {
// pseudo-class names are case-insensitive
// http://www.w3.org/TR/selectors/#pseudo-classes
// Prioritize by case sensitivity in case custom pseudos are added with uppercase letters
// Remember that setFilters inherits from pseudos
var args,
fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] ||
Sizzle.error( "unsupported pseudo: " + pseudo );
// The user may use createPseudo to indicate that
// arguments are needed to create the filter function
// just as Sizzle does
if ( fn[ expando ] ) {
return fn( argument );
}
// But maintain support for old signatures
if ( fn.length > 1 ) {
args = [ pseudo, pseudo, "", argument ];
return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ?
markFunction( function( seed, matches ) {
var idx,
matched = fn( seed, argument ),
i = matched.length;
while ( i-- ) {
idx = indexOf( seed, matched[ i ] );
seed[ idx ] = !( matches[ idx ] = matched[ i ] );
}
} ) :
function( elem ) {
return fn( elem, 0, args );
};
}
return fn;
},
"ID": function( id ) {
var attrId = id.replace( runescape, funescape );
return function( elem ) {
return elem.getAttribute( "id" ) === attrId;
};
}
},
}
// 異常
Sizzle.error = function( msg ) {
throw new Error( "Syntax error, unrecognized expression: " + msg );
}
// 選擇器緩存
function createCache() {
var keys = [];
function cache(key, value) {
if (keys.push(key) > Expr.cacheLength) {
delete cache[keys.shift()];
}
return (cache[key] = value);
}
return cache;
}