Home > Article > Web Front-end > Talk about the problem of implementing AST abstract syntax tree in JS
Free learning recommendation: javascript learning tutorial
AST abstract syntax tree problem in the front end
Four Arithmetic Operations
First of all, it is clear that the code this time is based on LL syntax analysis. It implements the function of the four arithmetic mixed operations. Let’s look at the definition first:
TokenNumber: ·
1
2
3
4
5
6
7
8
9
0
combination
Operator:
-
*
/
One of
WhiteSpace:<sp></sp>
LineTerminator:
<lf></lf>
<cr></cr>
Look at the production:
##Regular Expression
We first implement the matching principle of regular expressions:<script> var regexp = /([0-9\.]+)|([ \t]+)|([\r\n]+)|(\*)|(\/)|(\+)|(\-)/g var dictionary = ["Number", "Whitespace", "LineTerminator", "*", "/", "+", "-"]; function tokenize(source) { var result = null; while(true) { result = regexp.exec(source); if(!result) break; for(var i = 1; i <= dictionary.length; i ++) { if(result[i]) console.log(dictionary[i - 1]); } console.log(result); } } tokenize("1024 + 10 * 25");</script>At this time we take a look at the running print results of the page:
It is worth mentioning that The exec method is used here. The exec() method is used to retrieve regular expression matches in a string.
Let's take a look at its syntax:
RegExpObject.exec(string)
Lexical analysis
We optimize the above code in this part. First of all, what was just mentioned:
When RegExpObject is a global regular expression, the behavior of exec() is slightly more complicated. It starts retrieving the string string at the character specified by the RegExpObject's lastIndex property. When exec() finds text that matches an expression, it sets the RegExpObject's lastIndex property to the position next to the last character of the matching text after the match. Then we have to consider the situation where there is no matching character and make a judgment:
<script> var regexp = /([0-9\.]+)|([ \t]+)|([\r\n]+)|(\*)|(\/)|(\+)|(\-)/g var dictionary = ["Number", "Whitespace", "LineTerminator", "*", "/", "+", "-"]; function* tokenize(source) { var result = null; var lastIndex = 0; while(true) { lastIndex = regexp.lastIndex; result = regexp.exec(source); if(!result) break; if(regexp.lastIndex - lastIndex > result[0].length) break; let token = { type: null, value: null } for(var i = 1; i <= dictionary.length; i ++) { if(result[i]) token.type = dictionary[i - 1]; } token.value = result[0]; yield token } yield { type: 'EOF' } } for (let token of tokenize("1024 + 10 * 25")) { console.log(token) }</script>As above, we have
regexp.lastIndex - lastIndex and
The length of result[0] is compared to determine whether any string does not match.
Change the entire function into the form of a generator function. Let’s look at the results of the operation:
Syntax analysis
First write the chunks Production, let's take a look at the overall code structure:<script> var regexp = /([0-9\.]+)|([ \t]+)|([\r\n]+)|(\*)|(\/)|(\+)|(\-)/g var dictionary = ["Number", "Whitespace", "LineTerminator", "*", "/", "+", "-"]; function* tokenize(source) { var result = null; var lastIndex = 0; while(true) { lastIndex = regexp.lastIndex; result = regexp.exec(source); if(!result) break; if(regexp.lastIndex - lastIndex > result[0].length) break; let token = { type: null, value: null } for(var i = 1; i <= dictionary.length; i ++) { if(result[i]) token.type = dictionary[i - 1]; } token.value = result[0]; yield token } yield { type: 'EOF' } } let source = []; for(let token of tokenize("10 * 25")) { if (token.type !== "Whitespace" && token.type !== "LineTerminator") source.push(token); } function Expression(tokens) { } function AdditiveExpression(source){ } function MultiplicativeExpresson(source) { console.log(source); } MultiplicativeExpresson("10 * 25")</script>Let's study it first from
MultiplicativeExpresson, which is divided into four situations:
function MultiplicativeExpresson(source) { //如果是数字则进行封装 if(source[0].type === "Number") { let node = { type: "MultiplicativeExpresson", children:[source[0]] } source[0] = node; return MultiplicativeExpresson(source) } //如果是乘号或者除号,则将三项出栈,进行重组 if(source[0].type === "MultiplicativeExpresson" && source[1] && source[1].type === "*") { let node = { type: "MultiplicativeExpresson", operator: "*", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); node.children.push(source.shift()); source.unshift(node); return MultiplicativeExpresson(source) } if(source[0].type === "MultiplicativeExpresson" && source[1] && source[1].type === "/") { let node = { type: "MultiplicativeExpresson", operator: "*", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); node.children.push(source.shift()); source.unshift(node); return MultiplicativeExpresson(source) } //递归结束的条件 if(source[0].type === "MultiplicativeExpresson") return source[0]; return MultiplicativeExpresson(source); }Let's take a look When the source is
"10 * 25 / 2", call
console.log(MultiplicativeExpresson(source))The final running result:
Let’s see next
AdditiveExpression is essentially the same as
MultiplicativeExpresson. The differences have been marked in the code:
function AdditiveExpression(source){ if(source[0].type === "MultiplicativeExpresson") { let node = { type: "AdditiveExpression", children:[source[0]] } source[0] = node; return AdditiveExpression(source) } //如果是乘号或者除号,则将三项出栈,进行重组 if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "+") { let node = { type: "AdditiveExpression", operator: "+", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); //考虑到第三个数可能时Number 需要在这里再次调用一下 MultiplicativeExpresson 做处理 MultiplicativeExpresson(source); node.children.push(source.shift()); source.unshift(node); return AdditiveExpression(source) } if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "-") { let node = { type: "AdditiveExpression", operator: "-", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); MultiplicativeExpresson(source); node.children.push(source.shift()); source.unshift(node); return AdditiveExpression(source) } //递归结束的条件 if(source[0].type === "AdditiveExpression") return source[0]; //第一次进循环 调用 MultiplicativeExpresson(source); return AdditiveExpression(source); }
我们看一下当source为"10 * 25 / 2"
时调用console.log(AdditiveExpression(source))
最后运行的结果:
那么Expression
的代码逻辑就很好表达了:
function Expression(tokens) { if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "EOF") { let node = { type: "Expression", children: [source.shift(), source.shift()] } source.unshift(node); return node; } AdditiveExpression(source); return Expression(source); }
看下运行后的结果:
以上就是所有的js解析抽象语法树的代码。
完整代码
<script> var regexp = /([0-9\.]+)|([ \t]+)|([\r\n]+)|(\*)|(\/)|(\+)|(\-)/g var dictionary = ["Number", "Whitespace", "LineTerminator", "*", "/", "+", "-"]; function* tokenize(source) { var result = null; var lastIndex = 0; while(true) { lastIndex = regexp.lastIndex; result = regexp.exec(source); if(!result) break; if(regexp.lastIndex - lastIndex > result[0].length) break; let token = { type: null, value: null } for(var i = 1; i <= dictionary.length; i ++) { if(result[i]) token.type = dictionary[i - 1]; } token.value = result[0]; yield token } yield { type: 'EOF' } } let source = []; for(let token of tokenize("10 * 25 / 2")) { if (token.type !== "Whitespace" && token.type !== "LineTerminator") source.push(token); } function Expression(tokens) { if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "EOF") { let node = { type: "Expression", children: [source.shift(), source.shift()] } source.unshift(node); return node; } AdditiveExpression(source); return Expression(source); } function AdditiveExpression(source){ if(source[0].type === "MultiplicativeExpresson") { let node = { type: "AdditiveExpression", children:[source[0]] } source[0] = node; return AdditiveExpression(source) } //如果是乘号或者除号,则将三项出栈,进行重组 if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "+") { let node = { type: "AdditiveExpression", operator: "+", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); //考虑到第三个数可能时Number 需要在这里再次调用一下 MultiplicativeExpresson 做处理 MultiplicativeExpresson(source); node.children.push(source.shift()); source.unshift(node); return AdditiveExpression(source) } if(source[0].type === "AdditiveExpression" && source[1] && source[1].type === "-") { let node = { type: "AdditiveExpression", operator: "-", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); MultiplicativeExpresson(source); node.children.push(source.shift()); source.unshift(node); return AdditiveExpression(source) } //递归结束的条件 if(source[0].type === "AdditiveExpression") return source[0]; //第一次进循环 调用 MultiplicativeExpresson(source); return AdditiveExpression(source); } function MultiplicativeExpresson(source) { if(source[0].type === "Number") { let node = { type: "MultiplicativeExpresson", children:[source[0]] } source[0] = node; return MultiplicativeExpresson(source) } //如果是乘号或者除号,则将三项出栈,进行重组 if(source[0].type === "MultiplicativeExpresson" && source[1] && source[1].type === "*") { let node = { type: "MultiplicativeExpresson", operator: "*", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); node.children.push(source.shift()); source.unshift(node); return MultiplicativeExpresson(source) } if(source[0].type === "MultiplicativeExpresson" && source[1] && source[1].type === "/") { let node = { type: "MultiplicativeExpresson", operator: "*", children: [] } node.children.push(source.shift()); node.children.push(source.shift()); node.children.push(source.shift()); source.unshift(node); return MultiplicativeExpresson(source) } //递归结束的条件 if(source[0].type === "MultiplicativeExpresson") return source[0]; return MultiplicativeExpresson(source); } console.log(Expression(source))</script>
相关免费学习推荐:javascript(视频)
The above is the detailed content of Talk about the problem of implementing AST abstract syntax tree in JS. For more information, please follow other related articles on the PHP Chinese website!