jQuery源碼二周目#10 Sizzle 詞法解析

注意:之后的講解通通以div > p + .aaron[type="checkbox"], #id:first-child這個選擇器為例

詞法解析

拿到這個選擇器之后首先要做的事情是把它分解成一個個單元,單元結(jié)構(gòu)如下

{
    value: '匹配到的字符串',
    type: '對應(yīng)的Token類型',
    matches: '正則匹配到的一個結(jié)構(gòu)'
}

上面的選擇器拆分之后就是

[
  [
    { value: "div", type: "TAG", matches: ["div"] },
    { value: " > ", type: ">" },
    { value: "p", type: "TAG", matches: ["p"] },
    { value: " + ", type: "+" },
    { value: ".aaron", type: "CLASS", matches: ["aaron"] },
    { value: "[type='checkbox']", type: "ATTR", matches: ["type", "=", "checkbox"] }
  ],
  [
    { value: "#id", type: "ID", matches: ["id"] },
    { value: ":first-child", type: "CHILD", matches: Array }
  ]
]

這一個過程叫做詞法解析

Sizzle.tokenize

Sizzle.tokenize()就是詞法解析的接口,將css選擇器傳入就能得到上面的Token序列,接下來用代碼去實現(xiàn)。

整體架構(gòu)
首先搭建一個大體的框架,和jQuery整體架構(gòu)一樣,外部用一個即時函數(shù)包裹

(function(window, undefined) {

    var Sizzle = function() {

    }

    window.Sizzle = Sizzle;
})(window);

主體
注釋已經(jīng)做了很詳細的講解了,剩下的就是在debug模式下一步步的調(diào)試,看代碼是如何執(zhí)行的。其實詞法解析這一篇內(nèi)容大可以跳過,因為它的任務(wù)只是將css選擇器解析成Token序列,不理解Sizzle.tokenize的源碼也不影響接下來的代碼學(xué)習(xí),只需要有解析這個步驟罷了。至于怎么解析,要是能力很強的話可以自己寫一個,像我這種菜鳥選擇照搬jQuery源碼

Sizzle.tokenize = function (selector, parseOnly) {
        var matched,                        // 用于判斷每執(zhí)行一次while循環(huán)是否解析成功,如果為false說明css選擇器有錯誤,直接終止循環(huán)
            match,                          // 正則匹配結(jié)果
            tokens,                         // Token序列
            soFar = selector,               // 表示字符串未解析的部分
            groups = [],                    // 存放已經(jīng)解析好的Token序列
            preFilters = Expr.preFilter,    // 預(yù)處理用
            cached = tokenCache[selector];  // 緩存

        if (cached) {
            return parseOnly ? 0 : cached;
        }

        while (soFar) {

            // 檢查是否有逗號,有逗號的話就是多個Token序列
            // 比如像'div > p + .aaron[type="checkbox"], #id:first-child'這個選擇器就是兩個Token序列
            // groups = [
            //     [序列一],   // div > p + .aaron[type="checkbox"]
            //     [序列二]    // #id:first-child
            // ]
            if (!matched || (match = rcomma.exec(soFar))) {
                if (match) {
                    // 清除逗號
                    soFar = soFar.slice(match[0].length);
                }

                // 每有一個逗號,都要往groups壓入一個Token序列
                // 然后就是最開始的時候也要往groups壓入一個Token
                groups.push(tokens = []);
            }

            matched = false;

            // 處理特殊的Token:>, +, 空格, ~
            if (match = rcombinators.exec(soFar)) {
                matched = match.shift();
                tokens.push({
                    value: matched,
                    type: match[0].replace(rtrim, " ")
                });

                // 處理完之后將其從待處理的字符中刪除
                soFar = soFar.slice(matched.length);
            }

            // 這里開始分析這幾種Token:TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME
            // 如果通過正則匹配到了Token格式:match = matchExpr[ type ].exec( soFar )
            // 需要預(yù)處理的Token:ATTR, CHILD, PSEUDO
            // 交給預(yù)處理器處理:match = preFilters[ type ]( match )
            for (var type in Expr.filter) {
                if ( match = matchExpr[type].exec(soFar) ) {
                    // 預(yù)處理
                    if (preFilters[type]) {
                        match = preFilters[type](match);
                    }

                    matched = match.shift();
                    tokens.push({
                        value: matched,
                        type: type,
                        matches: match
                    })

                    // 處理完之后將其從待處理的字符中刪除
                    soFar = soFar.slice(matched.length);
                }
            }

            // 如果到了這里都還沒matched到,那么說明這個選擇器在這里有錯誤
            // 直接中斷詞法分析過程
            // 這就是Sizzle對詞法分析的異常處理
            if (!matched) {
                break;
            }
        }

        // 如果只需要這個接口檢查選擇器的合法性,直接就返回soFar的剩余長度,因為soFar長度大于零說明選擇器不合法
        // 其余情況,如果soFar不等于"",拋出異常;否則把groups記錄在cache里邊并返回
        return parseOnly ?
            soFar.length :
            soFar ?
                Sizzle.error(selector) :

                // 這里對選擇器(selector)做一個緩存
                tokenCache(selector, groups);
    }

其余代碼

如果只有上面的代碼是運行不了的,因為上面代碼所用到的一些變量和方法是未定義的,接下來把那些缺失的變量和方法添上

定義變量
這些變量幾乎都是正則表達式,用來匹配不同Token(TAG, ID, CLASS, ATTR, CHILD, PSEUDO, NAME)的。表達式不用去研究了,只需要明白每個表達式是匹配哪種Token的就行了。我之前嘗試去弄懂為什么表達式要那樣寫,然而到現(xiàn)在我都沒弄懂,而且研究源碼也不是每個細節(jié)都需要弄懂,只要把整體思路理解了就行了。

var
    // 選擇器緩存
    tokenCache = createCache(),

    booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" +
        "ismap|loop|multiple|open|readonly|required|scoped",


    // 空白字符
    whitespace = "[\\x20\\t\\r\\n\\f]",

    // ClassName或ID Name
    identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace +
        "?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",

    // css選擇器屬性,如[type="phone"]
    attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace +

        // Operator (capture 2)
        "*([*^$|!~]?=)" + whitespace +

        // "Attribute values must be CSS identifiers [capture 5]
        // or strings [capture 3 or capture 4]"
        "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" +
        whitespace + "*\\]",

    pseudos = ":(" + identifier + ")(?:\\((" +

        // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments:
        // 1. quoted (capture 3; capture 4 or capture 5)
        "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" +

        // 2. simple (capture 6)
        "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" +

        // 3. anything else (capture 2)
        ".*" +
        ")\\)|)",

    // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter
    rwhitespace = new RegExp( whitespace + "+", "g" ),

    // 去除左右空格用
    rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" +
        whitespace + "+$", "g" ),

    // 逗號
    rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ),

    // >, +, 空格, ~
    rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace +
        "*" ),
    rdescend = new RegExp( whitespace + "|>" ),

    rpseudo = new RegExp( pseudos ),
    ridentifier = new RegExp( "^" + identifier + "$" ),

    matchExpr = {
        "ID": new RegExp( "^#(" + identifier + ")" ),
        "CLASS": new RegExp( "^\\.(" + identifier + ")" ),
        "TAG": new RegExp( "^(" + identifier + "|[*])" ),
        "ATTR": new RegExp( "^" + attributes ),
        "PSEUDO": new RegExp( "^" + pseudos ),
        "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" +
            whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" +
            whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ),
        "bool": new RegExp( "^(?:" + booleans + ")$", "i" ),

        // For use in libraries implementing .is()
        // We use this for POS matching in `select`
        "needsContext": new RegExp( "^" + whitespace +
            "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace +
            "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" )
    },

    rhtml = /HTML$/i,
    rinputs = /^(?:input|select|textarea|button)$/i,
    rheader = /^h\d$/i,

    rnative = /^[^{]+\{\s*\[native \w/,

    // Easily-parseable/retrievable ID or TAG or CLASS selectors
    rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,

    rsibling = /[+~]/,

    // CSS escapes
    // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters
    runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ),
    
    funescape = function( escape, nonHex ) {
        var high = "0x" + escape.slice( 1 ) - 0x10000;

        return nonHex ?

            // Strip the backslash prefix from a non-hex escape sequence
            nonHex :

            // Replace a hexadecimal escape sequence with the encoded Unicode code point
            // Support: IE <=11+
            // For values outside the Basic Multilingual Plane (BMP), manually construct a
            // surrogate pair
            high < 0 ?
                String.fromCharCode( high + 0x10000 ) :
                String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 );
    };

定義方法

var Expr = Sizzle.selectors = {

    // 選擇器緩存長度
    cacheLength: 50,

    // 預(yù)處理
    preFilter: {
        "ATTR": function( match ) {
            match[ 1 ] = match[ 1 ].replace( runescape, funescape );

            // Move the given value to match[3] whether quoted or unquoted
            match[ 3 ] = ( match[ 3 ] || match[ 4 ] ||
                match[ 5 ] || "" ).replace( runescape, funescape );

            if ( match[ 2 ] === "~=" ) {
                match[ 3 ] = " " + match[ 3 ] + " ";
            }

            return match.slice( 0, 4 );
        },

        "CHILD": function( match ) {

            /* matches from matchExpr["CHILD"]
                1 type (only|nth|...)
                2 what (child|of-type)
                3 argument (even|odd|\d*|\d*n([+-]\d+)?|...)
                4 xn-component of xn+y argument ([+-]?\d*n|)
                5 sign of xn-component
                6 x of xn-component
                7 sign of y-component
                8 y of y-component
            */
            match[ 1 ] = match[ 1 ].toLowerCase();

            if ( match[ 1 ].slice( 0, 3 ) === "nth" ) {

                // nth-* requires argument
                if ( !match[ 3 ] ) {
                    Sizzle.error( match[ 0 ] );
                }

                // numeric x and y parameters for Expr.filter.CHILD
                // remember that false/true cast respectively to 0/1
                match[ 4 ] = +( match[ 4 ] ?
                    match[ 5 ] + ( match[ 6 ] || 1 ) :
                    2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) );
                match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" );

                // other types prohibit arguments
            } else if ( match[ 3 ] ) {
                Sizzle.error( match[ 0 ] );
            }

            return match;
        },

        "PSEUDO": function( match ) {
            var excess,
                unquoted = !match[ 6 ] && match[ 2 ];

            if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) {
                return null;
            }

            // Accept quoted arguments as-is
            if ( match[ 3 ] ) {
                match[ 2 ] = match[ 4 ] || match[ 5 ] || "";

                // Strip excess characters from unquoted arguments
            } else if ( unquoted && rpseudo.test( unquoted ) &&

                // Get excess from tokenize (recursively)
                ( excess = tokenize( unquoted, true ) ) &&

                // advance to the next closing parenthesis
                ( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) {

                // excess is a negative index
                match[ 0 ] = match[ 0 ].slice( 0, excess );
                match[ 2 ] = unquoted.slice( 0, excess );
            }

            // Return only captures needed by the pseudo filter method (type and argument)
            return match.slice( 0, 3 );
        }
    },

    filter: {

        "TAG": function( nodeNameSelector ) {
            var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase();
            return nodeNameSelector === "*" ?
                function() {
                    return true;
                } :
                function( elem ) {
                    return elem.nodeName && elem.nodeName.toLowerCase() === nodeName;
                };
        },

        "CLASS": function( className ) {
            var pattern = classCache[ className + " " ];

            return pattern ||
                ( pattern = new RegExp( "(^|" + whitespace +
                    ")" + className + "(" + whitespace + "|$)" ) ) && classCache(
                    className, function( elem ) {
                        return pattern.test(
                            typeof elem.className === "string" && elem.className ||
                            typeof elem.getAttribute !== "undefined" &&
                            elem.getAttribute( "class" ) ||
                            ""
                        );
                    } );
        },

        "ATTR": function( name, operator, check ) {
            return function( elem ) {
                var result = Sizzle.attr( elem, name );

                if ( result == null ) {
                    return operator === "!=";
                }
                if ( !operator ) {
                    return true;
                }

                result += "";

                /* eslint-disable max-len */

                return operator === "=" ? result === check :
                    operator === "!=" ? result !== check :
                        operator === "^=" ? check && result.indexOf( check ) === 0 :
                            operator === "*=" ? check && result.indexOf( check ) > -1 :
                                operator === "$=" ? check && result.slice( -check.length ) === check :
                                    operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 :
                                        operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" :
                                            false;
                /* eslint-enable max-len */

            };
        },

        "CHILD": function( type, what, _argument, first, last ) {
            var simple = type.slice( 0, 3 ) !== "nth",
                forward = type.slice( -4 ) !== "last",
                ofType = what === "of-type";

            return first === 1 && last === 0 ?

                // Shortcut for :nth-*(n)
                function( elem ) {
                    return !!elem.parentNode;
                } :

                function( elem, _context, xml ) {
                    var cache, uniqueCache, outerCache, node, nodeIndex, start,
                        dir = simple !== forward ? "nextSibling" : "previousSibling",
                        parent = elem.parentNode,
                        name = ofType && elem.nodeName.toLowerCase(),
                        useCache = !xml && !ofType,
                        diff = false;

                    if ( parent ) {

                        // :(first|last|only)-(child|of-type)
                        if ( simple ) {
                            while ( dir ) {
                                node = elem;
                                while ( ( node = node[ dir ] ) ) {
                                    if ( ofType ?
                                        node.nodeName.toLowerCase() === name :
                                        node.nodeType === 1 ) {

                                        return false;
                                    }
                                }

                                // Reverse direction for :only-* (if we haven't yet done so)
                                start = dir = type === "only" && !start && "nextSibling";
                            }
                            return true;
                        }

                        start = [ forward ? parent.firstChild : parent.lastChild ];

                        // non-xml :nth-child(...) stores cache data on `parent`
                        if ( forward && useCache ) {

                            // Seek `elem` from a previously-cached index

                            // ...in a gzip-friendly way
                            node = parent;
                            outerCache = node[ expando ] || ( node[ expando ] = {} );

                            // Support: IE <9 only
                            // Defend against cloned attroperties (jQuery gh-1709)
                            uniqueCache = outerCache[ node.uniqueID ] ||
                                ( outerCache[ node.uniqueID ] = {} );

                            cache = uniqueCache[ type ] || [];
                            nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
                            diff = nodeIndex && cache[ 2 ];
                            node = nodeIndex && parent.childNodes[ nodeIndex ];

                            while ( ( node = ++nodeIndex && node && node[ dir ] ||

                                // Fallback to seeking `elem` from the start
                                ( diff = nodeIndex = 0 ) || start.pop() ) ) {

                                // When found, cache indexes on `parent` and break
                                if ( node.nodeType === 1 && ++diff && node === elem ) {
                                    uniqueCache[ type ] = [ dirruns, nodeIndex, diff ];
                                    break;
                                }
                            }

                        } else {

                            // Use previously-cached element index if available
                            if ( useCache ) {

                                // ...in a gzip-friendly way
                                node = elem;
                                outerCache = node[ expando ] || ( node[ expando ] = {} );

                                // Support: IE <9 only
                                // Defend against cloned attroperties (jQuery gh-1709)
                                uniqueCache = outerCache[ node.uniqueID ] ||
                                    ( outerCache[ node.uniqueID ] = {} );

                                cache = uniqueCache[ type ] || [];
                                nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ];
                                diff = nodeIndex;
                            }

                            // xml :nth-child(...)
                            // or :nth-last-child(...) or :nth(-last)?-of-type(...)
                            if ( diff === false ) {

                                // Use the same loop as above to seek `elem` from the start
                                while ( ( node = ++nodeIndex && node && node[ dir ] ||
                                    ( diff = nodeIndex = 0 ) || start.pop() ) ) {

                                    if ( ( ofType ?
                                        node.nodeName.toLowerCase() === name :
                                        node.nodeType === 1 ) &&
                                        ++diff ) {

                                        // Cache the index of each encountered element
                                        if ( useCache ) {
                                            outerCache = node[ expando ] ||
                                                ( node[ expando ] = {} );

                                            // Support: IE <9 only
                                            // Defend against cloned attroperties (jQuery gh-1709)
                                            uniqueCache = outerCache[ node.uniqueID ] ||
                                                ( outerCache[ node.uniqueID ] = {} );

                                            uniqueCache[ type ] = [ dirruns, diff ];
                                        }

                                        if ( node === elem ) {
                                            break;
                                        }
                                    }
                                }
                            }
                        }

                        // Incorporate the offset, then check against cycle size
                        diff -= last;
                        return diff === first || ( diff % first === 0 && diff / first >= 0 );
                    }
                };
        },

        "PSEUDO": function( pseudo, argument ) {

            // pseudo-class names are case-insensitive
            // http://www.w3.org/TR/selectors/#pseudo-classes
            // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters
            // Remember that setFilters inherits from pseudos
            var args,
                fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] ||
                    Sizzle.error( "unsupported pseudo: " + pseudo );

            // The user may use createPseudo to indicate that
            // arguments are needed to create the filter function
            // just as Sizzle does
            if ( fn[ expando ] ) {
                return fn( argument );
            }

            // But maintain support for old signatures
            if ( fn.length > 1 ) {
                args = [ pseudo, pseudo, "", argument ];
                return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ?
                    markFunction( function( seed, matches ) {
                        var idx,
                            matched = fn( seed, argument ),
                            i = matched.length;
                        while ( i-- ) {
                            idx = indexOf( seed, matched[ i ] );
                            seed[ idx ] = !( matches[ idx ] = matched[ i ] );
                        }
                    } ) :
                    function( elem ) {
                        return fn( elem, 0, args );
                    };
            }

            return fn;
        },

        "ID": function( id ) {
            var attrId = id.replace( runescape, funescape );
            return function( elem ) {
                return elem.getAttribute( "id" ) === attrId;
            };

        }
    },
}

// 異常
Sizzle.error = function( msg ) {
    throw new Error( "Syntax error, unrecognized expression: " + msg );
}

// 選擇器緩存
function createCache() {
    var keys = [];

    function cache(key, value) {
        if (keys.push(key) > Expr.cacheLength) {
            delete cache[keys.shift()];
        }
        return (cache[key] = value);
    }
    return cache;
}

代碼下載

傳送門

?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

友情鏈接更多精彩內(nèi)容