notes for php-parser
Background
最近根据需求,学习php代码的静态分析,希望能提取php代码的AST,采用php-parser库
https://github.com/nikic/PHP-Parser
所以这篇其实应该叫做结合php-parser学习污点分析
且该篇基本不对ast转换为原本代码做过多的研究
basic operation
根据文档,编写小demo记录所需的用法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| <?php require_once("vendor/autoload.php");
use PhpParser\ParserFactory; use PhpParser\NodeDumper; $code = file_get_contents('test/1.php');
$parser = (new ParserFactory)->create(ParserFactory :: PREFER_PHP7);
try{ $ast = $parser->parse($code); $nodeDump= new NodeDumper(); echo $nodeDump->dump($ast); }catch(Error $e){ echo 'Parse Error: ', $e->getMessage(); } ?>
|
得结果:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| array( 0: Stmt_Expression( expr: Expr_ErrorSuppress( expr: Expr_FuncCall( name: Name( parts: array( 0: assert ) ) args: array( 0: Arg( name: null value: Expr_ArrayDimFetch( var: Expr_Variable( name: _POST ) dim: Scalar_String( value: shell ) ) byRef: false unpack: false ) ) ) ) ) )
|
这里包含了Stmt_Expression
节点,对应的空间为PhpParser\Node\Stmt\Function_
这里做了三种类别:
PhpParser\Node\Stmt
语句节点,不返回值,且不能出现在一个表达式中,例如类定义class A
PhpParser\Node\Expr
表达式节点,有返回值的语言结构,如变量$a
、函数func()
PhpParser\Node\Scalar
标量节点,如string
__FILE__
- 其他节点如
PhpParser\Node\Name
、PhpParser\Node\Arg
所以上述一句话木马中的结构含有一个函数调用,所以仅有一个Stmt_Expression
节点
所以也可以针对这棵语法树对节点部分做分析
加一个循环来输出看内部结构
1 2 3
| foreach($ast as $item){ var_dump($item,'\n'); }
|
可以看到更具体的类型和格式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
| object(PhpParser\Node\Stmt\Expression)#1185 (2) { ["expr"]=> object(PhpParser\Node\Expr\ErrorSuppress)#1184 (2) { ["expr"]=> object(PhpParser\Node\Expr\FuncCall)#1183 (3) { ["name"]=> object(PhpParser\Node\Name)#1178 (2) { ["parts"]=> array(1) { [0]=> string(6) "assert" } ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } ["args"]=> array(1) { [0]=> object(PhpParser\Node\Arg)#1182 (5) { ["name"]=> NULL ["value"]=> object(PhpParser\Node\Expr\ArrayDimFetch)#1181 (3) { ["var"]=> object(PhpParser\Node\Expr\Variable)#1179 (2) { ["name"]=> string(5) "_POST" ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } ["dim"]=> object(PhpParser\Node\Scalar\String_)#1180 (2) { ["value"]=> string(5) "shell" ["attributes":protected]=> array(3) { ["startLine"]=> int(1) ["endLine"]=> int(1) ["kind"]=> int(1) } } ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } ["byRef"]=> bool(false) ["unpack"]=> bool(false) ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } } ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } } ["attributes":protected]=> array(2) { ["startLine"]=> int(1) ["endLine"]=> int(1) } }
|
getType()
可以返回节点类型,结合访问某个节点,这里想拿到一句话木马里调用的函数名,可以这么写
1 2
| print_r(getType($ast[0]->expr->expr->name->parts[0])); print_r($ast[0]->expr->expr->name->parts[0]);
|
Node traversation
use PhpParser\NodeTraverser
基本用法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
| ( [0] => PhpParser\Node\Stmt\Expression Object ( [expr] => PhpParser\Node\Expr\ErrorSuppress Object ( [expr] => PhpParser\Node\Expr\FuncCall Object ( [name] => PhpParser\Node\Name Object ( [parts] => Array ( [0] => assert )
[attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
[args] => Array ( [0] => PhpParser\Node\Arg Object ( [name] => [value] => PhpParser\Node\Expr\ArrayDimFetch Object ( [var] => PhpParser\Node\Expr\Variable Object ( [name] => _POST [attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
[dim] => PhpParser\Node\Scalar\String_ Object ( [value] => shell [attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 [kind] => 1 )
)
[attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
[byRef] => [unpack] => [attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
)
[attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
[attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
[attributes:protected] => Array ( [startLine] => 1 [endLine] => 1 )
)
)
|
需要实现PhpParser\NodeVisitor
接口
1 2 3 4
| public function beforeTraverse(array $nodes); public function enterNode(\PhpParser\Node $node); public function leaveNode(\PhpParser\Node $node); public function afterTraverse(array $nodes);
|