From 699a0b6deb76021e31d7fb3a3d57e40bb072679f Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 03:43:59 -0800 Subject: [PATCH 01/35] fix: update entrypoint collector and enhance FastAPI route detection logic --- .../entrypoint-collector/python-entrypoint.ts | 9 + .../fastapi-entrypoint.ts | 322 ++++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts diff --git a/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts b/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts index 909f9bc8..56d4383c 100644 --- a/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts +++ b/src/engine/analyzer/python/common/entrypoint-collector/python-entrypoint.ts @@ -1,4 +1,5 @@ const { findFlaskEntryPointAndSource } = require('../../flask/entrypoint-collector/flask-default-entrypoint') +const { findFastApiEntryPointAndSource } = require('../../fastapi/entrypoint-collector/fastapi-entrypoint') const { findInferenceAiStudioTplEntryPointAndSource, findInferenceTritonEntryPointAndSource, @@ -37,6 +38,14 @@ function findPythonFcEntryPointAndSource(dir: string, fileManager: FileManager): pyFcEntryPointSourceArray.push(...flaskEntryPointSourceArray) } + const { fastApiEntryPointArray, fastApiEntryPointSourceArray } = findFastApiEntryPointAndSource(filenameAstObj, dir) + if (fastApiEntryPointArray) { + pyFcEntryPointArray.push(...fastApiEntryPointArray) + } + if (fastApiEntryPointSourceArray) { + pyFcEntryPointSourceArray.push(...fastApiEntryPointSourceArray) + } + const { inferenceAiStudioTplEntryPointArray, inferenceAiStudioTplEntryPointSourceArray } = findInferenceAiStudioTplEntryPointAndSource(filenameAstObj, dir) if (inferenceAiStudioTplEntryPointArray) { diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts new file mode 100644 index 00000000..eacc0fae --- /dev/null +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -0,0 +1,322 @@ +import { extractRelativePath } from '../../../../../util/file-util' +import * as Constant from '../../../../../util/constant' +import { EntryPoint } from '../../../common/entrypoint' + + +const config = require('../../../../../config') +const { entryPointAndSourceAtSameTime } = config + +const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') +const { findSourceOfFuncParam } = PythonEntrypointSource + +const EntryPointClass = require('../../../common/entrypoint') + +interface ASTObject { + body?: any[] + [key: string]: any +} + +interface FilenameAstMap { + [filename: string]: ASTObject +} + +interface ValidInstances { + validFastApiInstances: Set + validRouterInstances: Set +} + +interface EntryPointResult { + fastApiEntryPointArray: EntryPoint[] + fastApiEntryPointSourceArray: any[] +} + +const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) + +/** + * + * @param node + * @returns + */ +function extractLiteralString(node: any): string | null { + if (!node) return null + if (node.type === 'Literal' && typeof node.value === 'string') { + return node.value + } + return null +} + +/** + * + * @param route + * @returns + */ +function extractRouteParams(route: string | null): string[] { + if (!route) return [] + const regex = /\{(.*?)\}/g + const params: string[] = [] + let match: RegExpExecArray | null + while ((match = regex.exec(route)) !== null) { + const name = match[1].split(':').pop() + if (name) params.push(name) + } + return params +} + +/** + * + * @param obj + * @returns + */ +function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { + try { + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + if (obj.left?.type === 'Identifier') { + return { varName: obj.left.name, init: obj.right } + } + } + } catch (error) { + } + return null +} + +/** + * + * @param body + * @returns + */ +function analyzeImports(body: any[]): Map { + const map = new Map() + if (!Array.isArray(body)) return map + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { + const importExpr = obj.init + const localName = obj.id?.name + if (!localName) continue + + const fromValue = extractLiteralString(importExpr.from) + const importedName = importExpr.imported?.name // Identifier + + if (fromValue) { + // from ... import ... + if (fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) { + if (importedName) { + // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' + // (case: fastapi.applications) + map.set(localName, `fastapi.${importedName}`) + } + } + } else if (importedName === 'fastapi') { + // import fastapi + map.set(localName, 'fastapi') + } + } + } + return map +} + +/** + * + * @param node + * @param importMap + * @returns + */ +function resolveCanonicalName(node: any, importMap: Map): string | null { + if (!node) return null + if (node.type === 'Identifier') { + return importMap.get(node.name) || null + } + if (node.type === 'MemberAccess') { + const objectCanonical = resolveCanonicalName(node.object, importMap) + const propertyName = node.property?.name + if (objectCanonical && propertyName) { + return `${objectCanonical}.${propertyName}` + } + } + return null +} + +/** + * + * @param body + * @param importMap + * @returns + */ +function collectValidInstances(body: any[], importMap: Map): ValidInstances { + const validFastApiInstances = new Set() + const validRouterInstances = new Set() + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + // Only process AssignmentExpression + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + const varInfo = extractVarNameAndInit(obj) + if (!varInfo?.varName || !varInfo.init) continue + + if (varInfo.init.type === 'CallExpression') { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap) + if (canonical === 'fastapi.FastAPI') { + validFastApiInstances.add(varInfo.varName) + } else if (canonical === 'fastapi.APIRouter') { + validRouterInstances.add(varInfo.varName) + } + } + } + } + return { validFastApiInstances, validRouterInstances } +} + +/** + * + * @param deco + * @param funcName + * @param obj + * @param relativeFile + * @param filename + * @param validInstances + * @param entryPoints + * @param entryPointSources + */ +function processDecorator( + deco: any, + funcName: string, + obj: any, + relativeFile: string, + filename: string, + validInstances: ValidInstances, + entryPoints: EntryPoint[], + entryPointSources: any[] +): void { + if (!deco || deco.type !== 'CallExpression') return + const { callee } = deco + + if (!callee || callee.type !== 'MemberAccess') return + + const methodName = callee.property?.name + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return + + // Get router or app name + let routerName = '' + if (callee.object?.type === 'Identifier') { + routerName = callee.object.name + } + + // Validate router/app + const { validFastApiInstances, validRouterInstances } = validInstances + const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) + + if (!isValidRouter) return + + // Create entrypoint + const routePath = extractLiteralString(deco.arguments?.[0]) + const params = extractRouteParams(routePath) + + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) + entryPoint.filePath = relativeFile + entryPoint.functionName = funcName + entryPoint.attribute = 'HTTP' + + entryPoints.push(entryPoint) + + if (entryPointAndSourceAtSameTime) { + const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) + + if (filename !== relativeFile) { + const extra = findSourceOfFuncParam(filename, funcName, obj, undefined) + if (extra?.length) entryPointSources.push(...extra) + } + + if (paramSources) { + entryPointSources.push(...paramSources) + const allScopeSources = paramSources.map((s: any) => ({ ...s, scopeFile: 'all' })) + entryPointSources.push(...allScopeSources) + } + + if (params.length && Array.isArray(obj.parameters)) { + for (const p of obj.parameters) { + const pn = p.id?.name + if (pn && params.includes(pn)) { + entryPointSources.push({ + introPoint: 4, + kind: 'PYTHON_INPUT', + path: pn, + scopeFile: 'all', + scopeFunc: funcName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + locColumnStart: p.loc?.start?.column, + locColumnEnd: p.loc?.end?.column, + }) + } + } + } + } +} + +/** + * + * @param filenameAstObj + * @param dir + * @returns + */ +function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { + const entryPoints: EntryPoint[] = [] + const entryPointSources: any[] = [] + + for (const filename in filenameAstObj) { + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue + const fileObj = filenameAstObj[filename] + if (!fileObj?.body) continue + + // Calculate relative path + const { body } = fileObj + const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename + + if (!relativeFile) continue + + const importMap = analyzeImports(body) + + let hasFastApiImport = false + for (const val of importMap.values()) { + if (val === 'fastapi' || val.startsWith('fastapi.')) { + hasFastApiImport = true + break + } + } + if (!hasFastApiImport) continue + + const validInstances = collectValidInstances(body, importMap) + + for (const obj of body) { + if (!obj || typeof obj !== 'object') continue + + if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { + const funcName = obj.id.name + const { decorators } = obj._meta + + for (const deco of decorators) { + processDecorator( + deco, + funcName, + obj, + relativeFile, + filename, + validInstances, + entryPoints, + entryPointSources + ) + } + } + } + } + + return { + fastApiEntryPointArray: entryPoints, + fastApiEntryPointSourceArray: entryPointSources, + } +} + +export = { findFastApiEntryPointAndSource } From 86f34736303a6717a7bb07a30884d54d987b58f3 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 05:13:47 -0800 Subject: [PATCH 02/35] fix: update fastapi entrypoint --- .../fastapi-entrypoint.ts | 323 ++++++++++-------- 1 file changed, 190 insertions(+), 133 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index eacc0fae..8b395de6 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -1,36 +1,44 @@ -import { extractRelativePath } from '../../../../../util/file-util' -import * as Constant from '../../../../../util/constant' -import { EntryPoint } from '../../../common/entrypoint' +import { extractRelativePath } from "../../../../../util/file-util"; +import * as Constant from "../../../../../util/constant"; +import { EntryPoint } from "../../../common/entrypoint"; +const config = require("../../../../../config"); +const { entryPointAndSourceAtSameTime } = config; -const config = require('../../../../../config') -const { entryPointAndSourceAtSameTime } = config +const PythonEntrypointSource = require("../../common/entrypoint-collector/python-entrypoint-source"); +const { findSourceOfFuncParam } = PythonEntrypointSource; -const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') -const { findSourceOfFuncParam } = PythonEntrypointSource - -const EntryPointClass = require('../../../common/entrypoint') +const EntryPointClass = require("../../../common/entrypoint"); interface ASTObject { - body?: any[] - [key: string]: any + body?: any[]; + [key: string]: any; } interface FilenameAstMap { - [filename: string]: ASTObject + [filename: string]: ASTObject; } interface ValidInstances { - validFastApiInstances: Set - validRouterInstances: Set + validFastApiInstances: Set; + validRouterInstances: Set; } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[] - fastApiEntryPointSourceArray: any[] + fastApiEntryPointArray: EntryPoint[]; + fastApiEntryPointSourceArray: any[]; } -const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) +const ROUTE_DECORATORS = new Set([ + "get", + "post", + "put", + "delete", + "patch", + "options", + "head", + "route", +]); /** * @@ -38,11 +46,11 @@ const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'opti * @returns */ function extractLiteralString(node: any): string | null { - if (!node) return null - if (node.type === 'Literal' && typeof node.value === 'string') { - return node.value + if (!node) return null; + if (node.type === "Literal" && typeof node.value === "string") { + return node.value; } - return null + return null; } /** @@ -51,15 +59,15 @@ function extractLiteralString(node: any): string | null { * @returns */ function extractRouteParams(route: string | null): string[] { - if (!route) return [] - const regex = /\{(.*?)\}/g - const params: string[] = [] - let match: RegExpExecArray | null + if (!route) return []; + const regex = /\{(.*?)\}/g; + const params: string[] = []; + let match: RegExpExecArray | null; while ((match = regex.exec(route)) !== null) { - const name = match[1].split(':').pop() - if (name) params.push(name) + const name = match[1].split(":").pop(); + if (name) params.push(name); } - return params + return params; } /** @@ -67,16 +75,17 @@ function extractRouteParams(route: string | null): string[] { * @param obj * @returns */ -function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { +function extractVarNameAndInit( + obj: any, +): { varName?: string; init?: any } | null { try { - if (obj.type === 'AssignmentExpression' && obj.operator === '=') { - if (obj.left?.type === 'Identifier') { - return { varName: obj.left.name, init: obj.right } + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + if (obj.left?.type === "Identifier") { + return { varName: obj.left.name, init: obj.right }; } } - } catch (error) { - } - return null + } catch (error) { } + return null; } /** @@ -85,36 +94,51 @@ function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | nul * @returns */ function analyzeImports(body: any[]): Map { - const map = new Map() - if (!Array.isArray(body)) return map + const map = new Map(); + if (!Array.isArray(body)) return map; for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; - if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { - const importExpr = obj.init - const localName = obj.id?.name - if (!localName) continue + if ( + obj.type === "VariableDeclaration" && + obj.init?.type === "ImportExpression" + ) { + const importExpr = obj.init; + const localName = obj.id?.name; + if (!localName) continue; - const fromValue = extractLiteralString(importExpr.from) - const importedName = importExpr.imported?.name // Identifier + const fromValue = extractLiteralString(importExpr.from); + const importedName = importExpr.imported?.name; // Identifier if (fromValue) { // from ... import ... - if (fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) { + if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { if (importedName) { // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`) + map.set(localName, `fastapi.${importedName}`); } } - } else if (importedName === 'fastapi') { - // import fastapi - map.set(localName, 'fastapi') + } else if ( + importedName === "fastapi" || + importedName === "fastapi.applications" || + importedName === "fastapi.routing" || + importedName?.startsWith("fastapi.") + ) { + // import fastapi or import fastapi.applications + if ( + importedName === localName || + importedName.startsWith(`${localName}.`) + ) { + map.set(localName, localName); + } else { + map.set(localName, importedName); + } } } } - return map + return map; } /** @@ -123,19 +147,22 @@ function analyzeImports(body: any[]): Map { * @param importMap * @returns */ -function resolveCanonicalName(node: any, importMap: Map): string | null { - if (!node) return null - if (node.type === 'Identifier') { - return importMap.get(node.name) || null +function resolveCanonicalName( + node: any, + importMap: Map, +): string | null { + if (!node) return null; + if (node.type === "Identifier") { + return importMap.get(node.name) || null; } - if (node.type === 'MemberAccess') { - const objectCanonical = resolveCanonicalName(node.object, importMap) - const propertyName = node.property?.name + if (node.type === "MemberAccess") { + const objectCanonical = resolveCanonicalName(node.object, importMap); + const propertyName = node.property?.name; if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}` + return `${objectCanonical}.${propertyName}`; } } - return null + return null; } /** @@ -144,29 +171,38 @@ function resolveCanonicalName(node: any, importMap: Map): string * @param importMap * @returns */ -function collectValidInstances(body: any[], importMap: Map): ValidInstances { - const validFastApiInstances = new Set() - const validRouterInstances = new Set() +function collectValidInstances( + body: any[], + importMap: Map, +): ValidInstances { + const validFastApiInstances = new Set(); + const validRouterInstances = new Set(); for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; // Only process AssignmentExpression - if (obj.type === 'AssignmentExpression' && obj.operator === '=') { - const varInfo = extractVarNameAndInit(obj) - if (!varInfo?.varName || !varInfo.init) continue - - if (varInfo.init.type === 'CallExpression') { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap) - if (canonical === 'fastapi.FastAPI') { - validFastApiInstances.add(varInfo.varName) - } else if (canonical === 'fastapi.APIRouter') { - validRouterInstances.add(varInfo.varName) + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + const varInfo = extractVarNameAndInit(obj); + if (!varInfo?.varName || !varInfo.init) continue; + + if (varInfo.init.type === "CallExpression") { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap); + if ( + canonical === "fastapi.FastAPI" || + canonical === "fastapi.applications.FastAPI" + ) { + validFastApiInstances.add(varInfo.varName); + } else if ( + canonical === "fastapi.APIRouter" || + canonical === "fastapi.routing.APIRouter" + ) { + validRouterInstances.add(varInfo.varName); } } } } - return { validFastApiInstances, validRouterInstances } + return { validFastApiInstances, validRouterInstances }; } /** @@ -188,68 +224,72 @@ function processDecorator( filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], - entryPointSources: any[] + entryPointSources: any[], ): void { - if (!deco || deco.type !== 'CallExpression') return - const { callee } = deco + if (!deco || deco.type !== "CallExpression") return; + const { callee } = deco; - if (!callee || callee.type !== 'MemberAccess') return + if (!callee || callee.type !== "MemberAccess") return; - const methodName = callee.property?.name - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return + const methodName = callee.property?.name; + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; // Get router or app name - let routerName = '' - if (callee.object?.type === 'Identifier') { - routerName = callee.object.name + let routerName = ""; + if (callee.object?.type === "Identifier") { + routerName = callee.object.name; } // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances - const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) + const { validFastApiInstances, validRouterInstances } = validInstances; + const isValidRouter = + validFastApiInstances.has(routerName) || + validRouterInstances.has(routerName); - if (!isValidRouter) return + if (!isValidRouter) return; // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]) - const params = extractRouteParams(routePath) + const routePath = extractLiteralString(deco.arguments?.[0]); + const params = extractRouteParams(routePath); - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) - entryPoint.filePath = relativeFile - entryPoint.functionName = funcName - entryPoint.attribute = 'HTTP' + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); + entryPoint.filePath = relativeFile; + entryPoint.functionName = funcName; + entryPoint.attribute = "HTTP"; - entryPoints.push(entryPoint) + entryPoints.push(entryPoint); if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) - - if (filename !== relativeFile) { - const extra = findSourceOfFuncParam(filename, funcName, obj, undefined) - if (extra?.length) entryPointSources.push(...extra) - } + const paramSources = findSourceOfFuncParam( + relativeFile, + funcName, + obj, + undefined, + ); if (paramSources) { - entryPointSources.push(...paramSources) - const allScopeSources = paramSources.map((s: any) => ({ ...s, scopeFile: 'all' })) - entryPointSources.push(...allScopeSources) + const allScopeSources = paramSources.map((s: any) => ({ + ...s, + scopeFile: "all", + })); + entryPointSources.push(...allScopeSources); } if (params.length && Array.isArray(obj.parameters)) { for (const p of obj.parameters) { - const pn = p.id?.name + const pn = p.id?.name; if (pn && params.includes(pn)) { entryPointSources.push({ introPoint: 4, - kind: 'PYTHON_INPUT', + kind: "PYTHON_INPUT", path: pn, - scopeFile: 'all', + scopeFile: "all", scopeFunc: funcName, locStart: p.loc?.start?.line, locEnd: p.loc?.end?.line, locColumnStart: p.loc?.start?.column, locColumnEnd: p.loc?.end?.column, - }) + }); } } } @@ -262,40 +302,57 @@ function processDecorator( * @param dir * @returns */ -function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { - const entryPoints: EntryPoint[] = [] - const entryPointSources: any[] = [] +function findFastApiEntryPointAndSource( + filenameAstObj: FilenameAstMap, + dir: string, +): EntryPointResult { + const entryPoints: EntryPoint[] = []; + const entryPointSources: any[] = []; for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue - const fileObj = filenameAstObj[filename] - if (!fileObj?.body) continue + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) + continue; + const fileObj = filenameAstObj[filename]; + if (!fileObj?.body) continue; // Calculate relative path - const { body } = fileObj - const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename - - if (!relativeFile) continue - - const importMap = analyzeImports(body) - - let hasFastApiImport = false + const { body } = fileObj; + const relativeFile = filename.startsWith(dir) + ? extractRelativePath(filename, dir) + : filename; + + if (!relativeFile) continue; + + const importMap = analyzeImports(body); + + const validImports = new Set([ + "fastapi", + "fastapi.FastAPI", + "fastapi.APIRouter", + "fastapi.applications", + "fastapi.routing", + ]); + let hasFastApiImport = false; for (const val of importMap.values()) { - if (val === 'fastapi' || val.startsWith('fastapi.')) { - hasFastApiImport = true - break + if (validImports.has(val)) { + hasFastApiImport = true; + break; } } - if (!hasFastApiImport) continue + if (!hasFastApiImport) continue; - const validInstances = collectValidInstances(body, importMap) + const validInstances = collectValidInstances(body, importMap); for (const obj of body) { - if (!obj || typeof obj !== 'object') continue + if (!obj || typeof obj !== "object") continue; - if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { - const funcName = obj.id.name - const { decorators } = obj._meta + if ( + obj.type === "FunctionDefinition" && + obj._meta?.decorators && + obj.id?.name + ) { + const funcName = obj.id.name; + const { decorators } = obj._meta; for (const deco of decorators) { processDecorator( @@ -306,8 +363,8 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str filename, validInstances, entryPoints, - entryPointSources - ) + entryPointSources, + ); } } } @@ -316,7 +373,7 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str return { fastApiEntryPointArray: entryPoints, fastApiEntryPointSourceArray: entryPointSources, - } + }; } -export = { findFastApiEntryPointAndSource } +export = { findFastApiEntryPointAndSource }; From 812b3a0de4634b337848e4c3c77e626588f58bab Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 05:41:13 -0800 Subject: [PATCH 03/35] fix: update fastapi --- .../fastapi-entrypoint.ts | 517 +++++++++--------- 1 file changed, 259 insertions(+), 258 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index 8b395de6..30416a85 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -11,33 +11,34 @@ const { findSourceOfFuncParam } = PythonEntrypointSource; const EntryPointClass = require("../../../common/entrypoint"); interface ASTObject { - body?: any[]; - [key: string]: any; + body?: any[]; + + [key: string]: any; } interface FilenameAstMap { - [filename: string]: ASTObject; + [filename: string]: ASTObject; } interface ValidInstances { - validFastApiInstances: Set; - validRouterInstances: Set; + validFastApiInstances: Set; + validRouterInstances: Set; } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[]; - fastApiEntryPointSourceArray: any[]; + fastApiEntryPointArray: EntryPoint[]; + fastApiEntryPointSourceArray: any[]; } const ROUTE_DECORATORS = new Set([ - "get", - "post", - "put", - "delete", - "patch", - "options", - "head", - "route", + "get", + "post", + "put", + "delete", + "patch", + "options", + "head", + "route", ]); /** @@ -46,11 +47,11 @@ const ROUTE_DECORATORS = new Set([ * @returns */ function extractLiteralString(node: any): string | null { - if (!node) return null; - if (node.type === "Literal" && typeof node.value === "string") { - return node.value; - } - return null; + if (!node) return null; + if (node.type === "Literal" && typeof node.value === "string") { + return node.value; + } + return null; } /** @@ -59,15 +60,15 @@ function extractLiteralString(node: any): string | null { * @returns */ function extractRouteParams(route: string | null): string[] { - if (!route) return []; - const regex = /\{(.*?)\}/g; - const params: string[] = []; - let match: RegExpExecArray | null; - while ((match = regex.exec(route)) !== null) { - const name = match[1].split(":").pop(); - if (name) params.push(name); - } - return params; + if (!route) return []; + const regex = /\{(.*?)\}/g; + const params: string[] = []; + let match: RegExpExecArray | null; + while ((match = regex.exec(route)) !== null) { + const name = match[1].split(":").pop(); + if (name) params.push(name); + } + return params; } /** @@ -76,16 +77,16 @@ function extractRouteParams(route: string | null): string[] { * @returns */ function extractVarNameAndInit( - obj: any, + obj: any, ): { varName?: string; init?: any } | null { - try { - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - if (obj.left?.type === "Identifier") { - return { varName: obj.left.name, init: obj.right }; - } - } - } catch (error) { } - return null; + try { + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + if (obj.left?.type === "Identifier") { + return { varName: obj.left.name, init: obj.right }; + } + } + } catch (error) {} + return null; } /** @@ -94,51 +95,51 @@ function extractVarNameAndInit( * @returns */ function analyzeImports(body: any[]): Map { - const map = new Map(); - if (!Array.isArray(body)) return map; - - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - + const map = new Map(); + if (!Array.isArray(body)) return map; + + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + if ( + obj.type === "VariableDeclaration" && + obj.init?.type === "ImportExpression" + ) { + const importExpr = obj.init; + const localName = obj.id?.name; + if (!localName) continue; + + const fromValue = extractLiteralString(importExpr.from); + const importedName = importExpr.imported?.name; // Identifier + + if (fromValue) { + // from ... import ... + if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { + if (importedName) { + // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' + // (case: fastapi.applications) + map.set(localName, `fastapi.${importedName}`); + } + } + } else if ( + importedName === "fastapi" || + importedName === "fastapi.applications" || + importedName === "fastapi.routing" || + importedName?.startsWith("fastapi.") + ) { + // import fastapi or import fastapi.applications if ( - obj.type === "VariableDeclaration" && - obj.init?.type === "ImportExpression" + importedName === localName || + importedName.startsWith(`${localName}.`) ) { - const importExpr = obj.init; - const localName = obj.id?.name; - if (!localName) continue; - - const fromValue = extractLiteralString(importExpr.from); - const importedName = importExpr.imported?.name; // Identifier - - if (fromValue) { - // from ... import ... - if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { - if (importedName) { - // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' - // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`); - } - } - } else if ( - importedName === "fastapi" || - importedName === "fastapi.applications" || - importedName === "fastapi.routing" || - importedName?.startsWith("fastapi.") - ) { - // import fastapi or import fastapi.applications - if ( - importedName === localName || - importedName.startsWith(`${localName}.`) - ) { - map.set(localName, localName); - } else { - map.set(localName, importedName); - } - } + map.set(localName, localName); + } else { + map.set(localName, importedName); } + } } - return map; + } + return map; } /** @@ -148,21 +149,21 @@ function analyzeImports(body: any[]): Map { * @returns */ function resolveCanonicalName( - node: any, - importMap: Map, + node: any, + importMap: Map, ): string | null { - if (!node) return null; - if (node.type === "Identifier") { - return importMap.get(node.name) || null; + if (!node) return null; + if (node.type === "Identifier") { + return importMap.get(node.name) || null; + } + if (node.type === "MemberAccess") { + const objectCanonical = resolveCanonicalName(node.object, importMap); + const propertyName = node.property?.name; + if (objectCanonical && propertyName) { + return `${objectCanonical}.${propertyName}`; } - if (node.type === "MemberAccess") { - const objectCanonical = resolveCanonicalName(node.object, importMap); - const propertyName = node.property?.name; - if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}`; - } - } - return null; + } + return null; } /** @@ -172,37 +173,37 @@ function resolveCanonicalName( * @returns */ function collectValidInstances( - body: any[], - importMap: Map, + body: any[], + importMap: Map, ): ValidInstances { - const validFastApiInstances = new Set(); - const validRouterInstances = new Set(); + const validFastApiInstances = new Set(); + const validRouterInstances = new Set(); - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - - // Only process AssignmentExpression - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - const varInfo = extractVarNameAndInit(obj); - if (!varInfo?.varName || !varInfo.init) continue; - - if (varInfo.init.type === "CallExpression") { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap); - if ( - canonical === "fastapi.FastAPI" || - canonical === "fastapi.applications.FastAPI" - ) { - validFastApiInstances.add(varInfo.varName); - } else if ( - canonical === "fastapi.APIRouter" || - canonical === "fastapi.routing.APIRouter" - ) { - validRouterInstances.add(varInfo.varName); - } - } + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + // Only process AssignmentExpression + if (obj.type === "AssignmentExpression" && obj.operator === "=") { + const varInfo = extractVarNameAndInit(obj); + if (!varInfo?.varName || !varInfo.init) continue; + + if (varInfo.init.type === "CallExpression") { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap); + if ( + canonical === "fastapi.FastAPI" || + canonical === "fastapi.applications.FastAPI" + ) { + validFastApiInstances.add(varInfo.varName); + } else if ( + canonical === "fastapi.APIRouter" || + canonical === "fastapi.routing.APIRouter" + ) { + validRouterInstances.add(varInfo.varName); } + } } - return { validFastApiInstances, validRouterInstances }; + } + return { validFastApiInstances, validRouterInstances }; } /** @@ -217,83 +218,83 @@ function collectValidInstances( * @param entryPointSources */ function processDecorator( - deco: any, - funcName: string, - obj: any, - relativeFile: string, - filename: string, - validInstances: ValidInstances, - entryPoints: EntryPoint[], - entryPointSources: any[], + deco: any, + funcName: string, + obj: any, + relativeFile: string, + filename: string, + validInstances: ValidInstances, + entryPoints: EntryPoint[], + entryPointSources: any[], ): void { - if (!deco || deco.type !== "CallExpression") return; - const { callee } = deco; - - if (!callee || callee.type !== "MemberAccess") return; - - const methodName = callee.property?.name; - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; - - // Get router or app name - let routerName = ""; - if (callee.object?.type === "Identifier") { - routerName = callee.object.name; + if (!deco || deco.type !== "CallExpression") return; + const { callee } = deco; + + if (!callee || callee.type !== "MemberAccess") return; + + const methodName = callee.property?.name; + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; + + // Get router or app name + let routerName = ""; + if (callee.object?.type === "Identifier") { + routerName = callee.object.name; + } + + // Validate router/app + const { validFastApiInstances, validRouterInstances } = validInstances; + const isValidRouter = + validFastApiInstances.has(routerName) || + validRouterInstances.has(routerName); + + if (!isValidRouter) return; + + // Create entrypoint + const routePath = extractLiteralString(deco.arguments?.[0]); + const params = extractRouteParams(routePath); + + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); + entryPoint.filePath = relativeFile; + entryPoint.functionName = funcName; + entryPoint.attribute = "HTTP"; + + entryPoints.push(entryPoint); + + if (entryPointAndSourceAtSameTime) { + const paramSources = findSourceOfFuncParam( + relativeFile, + funcName, + obj, + undefined, + ); + + if (paramSources) { + const allScopeSources = paramSources.map((s: any) => ({ + ...s, + scopeFile: "all", + })); + entryPointSources.push(...allScopeSources); } - // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances; - const isValidRouter = - validFastApiInstances.has(routerName) || - validRouterInstances.has(routerName); - - if (!isValidRouter) return; - - // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]); - const params = extractRouteParams(routePath); - - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); - entryPoint.filePath = relativeFile; - entryPoint.functionName = funcName; - entryPoint.attribute = "HTTP"; - - entryPoints.push(entryPoint); - - if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam( - relativeFile, - funcName, - obj, - undefined, - ); - - if (paramSources) { - const allScopeSources = paramSources.map((s: any) => ({ - ...s, - scopeFile: "all", - })); - entryPointSources.push(...allScopeSources); - } - - if (params.length && Array.isArray(obj.parameters)) { - for (const p of obj.parameters) { - const pn = p.id?.name; - if (pn && params.includes(pn)) { - entryPointSources.push({ - introPoint: 4, - kind: "PYTHON_INPUT", - path: pn, - scopeFile: "all", - scopeFunc: funcName, - locStart: p.loc?.start?.line, - locEnd: p.loc?.end?.line, - locColumnStart: p.loc?.start?.column, - locColumnEnd: p.loc?.end?.column, - }); - } - } + if (params.length && Array.isArray(obj.parameters)) { + for (const p of obj.parameters) { + const pn = p.id?.name; + if (pn && params.includes(pn)) { + entryPointSources.push({ + introPoint: 4, + kind: "PYTHON_INPUT", + path: pn, + scopeFile: "all", + scopeFunc: funcName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + locColumnStart: p.loc?.start?.column, + locColumnEnd: p.loc?.end?.column, + }); } + } } + } } /** @@ -303,77 +304,77 @@ function processDecorator( * @returns */ function findFastApiEntryPointAndSource( - filenameAstObj: FilenameAstMap, - dir: string, + filenameAstObj: FilenameAstMap, + dir: string, ): EntryPointResult { - const entryPoints: EntryPoint[] = []; - const entryPointSources: any[] = []; - - for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) - continue; - const fileObj = filenameAstObj[filename]; - if (!fileObj?.body) continue; - - // Calculate relative path - const { body } = fileObj; - const relativeFile = filename.startsWith(dir) - ? extractRelativePath(filename, dir) - : filename; - - if (!relativeFile) continue; - - const importMap = analyzeImports(body); - - const validImports = new Set([ - "fastapi", - "fastapi.FastAPI", - "fastapi.APIRouter", - "fastapi.applications", - "fastapi.routing", - ]); - let hasFastApiImport = false; - for (const val of importMap.values()) { - if (validImports.has(val)) { - hasFastApiImport = true; - break; - } - } - if (!hasFastApiImport) continue; - - const validInstances = collectValidInstances(body, importMap); - - for (const obj of body) { - if (!obj || typeof obj !== "object") continue; - - if ( - obj.type === "FunctionDefinition" && - obj._meta?.decorators && - obj.id?.name - ) { - const funcName = obj.id.name; - const { decorators } = obj._meta; - - for (const deco of decorators) { - processDecorator( - deco, - funcName, - obj, - relativeFile, - filename, - validInstances, - entryPoints, - entryPointSources, - ); - } - } + const entryPoints: EntryPoint[] = []; + const entryPointSources: any[] = []; + + for (const filename in filenameAstObj) { + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) + continue; + const fileObj = filenameAstObj[filename]; + if (!fileObj?.body) continue; + + // Calculate relative path + const { body } = fileObj; + const relativeFile = filename.startsWith(dir) + ? extractRelativePath(filename, dir) + : filename; + + if (!relativeFile) continue; + + const importMap = analyzeImports(body); + + const validImports = new Set([ + "fastapi", + "fastapi.FastAPI", + "fastapi.APIRouter", + "fastapi.applications", + "fastapi.routing", + ]); + let hasFastApiImport = false; + for (const val of importMap.values()) { + if (validImports.has(val)) { + hasFastApiImport = true; + break; + } + } + if (!hasFastApiImport) continue; + + const validInstances = collectValidInstances(body, importMap); + + for (const obj of body) { + if (!obj || typeof obj !== "object") continue; + + if ( + obj.type === "FunctionDefinition" && + obj._meta?.decorators && + obj.id?.name + ) { + const funcName = obj.id.name; + const { decorators } = obj._meta; + + for (const deco of decorators) { + processDecorator( + deco, + funcName, + obj, + relativeFile, + filename, + validInstances, + entryPoints, + entryPointSources, + ); } + } } + } - return { - fastApiEntryPointArray: entryPoints, - fastApiEntryPointSourceArray: entryPointSources, - }; + return { + fastApiEntryPointArray: entryPoints, + fastApiEntryPointSourceArray: entryPointSources, + }; } export = { findFastApiEntryPointAndSource }; From 9d11215486b9b572b7a94180262f5053e2ed3d8e Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 19:46:16 -0800 Subject: [PATCH 04/35] fix: resolve FastAPI route parsing issues --- .../fastapi-entrypoint.ts | 417 +++++++----------- 1 file changed, 156 insertions(+), 261 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index 30416a85..de23f3d2 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -1,371 +1,266 @@ -import { extractRelativePath } from "../../../../../util/file-util"; -import * as Constant from "../../../../../util/constant"; -import { EntryPoint } from "../../../common/entrypoint"; +import { extractRelativePath } from '../../../../../util/file-util' +import * as Constant from '../../../../../util/constant' +import type { EntryPoint } from '../../../common/entrypoint' -const config = require("../../../../../config"); -const { entryPointAndSourceAtSameTime } = config; +// eslint-disable-next-line @typescript-eslint/no-var-requires +const config = require('../../../../../config') +// eslint-disable-next-line @typescript-eslint/no-var-requires +const PythonEntrypointSource = require('../../common/entrypoint-collector/python-entrypoint-source') +// eslint-disable-next-line @typescript-eslint/no-var-requires +const EntryPointClass = require('../../../common/entrypoint') -const PythonEntrypointSource = require("../../common/entrypoint-collector/python-entrypoint-source"); -const { findSourceOfFuncParam } = PythonEntrypointSource; - -const EntryPointClass = require("../../../common/entrypoint"); +const { entryPointAndSourceAtSameTime } = config +const { findSourceOfFuncParam } = PythonEntrypointSource interface ASTObject { - body?: any[]; - - [key: string]: any; + body?: any[] + [key: string]: any } interface FilenameAstMap { - [filename: string]: ASTObject; + [filename: string]: ASTObject } interface ValidInstances { - validFastApiInstances: Set; - validRouterInstances: Set; + validFastApiInstances: Set + validRouterInstances: Set } interface EntryPointResult { - fastApiEntryPointArray: EntryPoint[]; - fastApiEntryPointSourceArray: any[]; + fastApiEntryPointArray: EntryPoint[] + fastApiEntryPointSourceArray: any[] } -const ROUTE_DECORATORS = new Set([ - "get", - "post", - "put", - "delete", - "patch", - "options", - "head", - "route", -]); +const ROUTE_DECORATORS = new Set(['get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'route']) /** - * - * @param node - * @returns + * Extracts literal string value. + * @param node AST node + * @returns {string | null} String value or null */ function extractLiteralString(node: any): string | null { - if (!node) return null; - if (node.type === "Literal" && typeof node.value === "string") { - return node.value; + if (!node) return null + if (node.type === 'Literal' && typeof node.value === 'string') { + return node.value } - return null; + return null } /** - * - * @param route - * @returns + * Extracts variable name and init expression. + * @param obj AST node + * @returns {{ varName?: string; init?: any } | null} Variable info or null */ -function extractRouteParams(route: string | null): string[] { - if (!route) return []; - const regex = /\{(.*?)\}/g; - const params: string[] = []; - let match: RegExpExecArray | null; - while ((match = regex.exec(route)) !== null) { - const name = match[1].split(":").pop(); - if (name) params.push(name); +function extractVarNameAndInit(obj: any): { varName?: string; init?: any } | null { + if (obj.type === 'AssignmentExpression' && obj.operator === '=' && obj.left?.type === 'Identifier') { + return { varName: obj.left.name, init: obj.right } } - return params; + return null } /** - * - * @param obj - * @returns - */ -function extractVarNameAndInit( - obj: any, -): { varName?: string; init?: any } | null { - try { - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - if (obj.left?.type === "Identifier") { - return { varName: obj.left.name, init: obj.right }; - } - } - } catch (error) {} - return null; -} - -/** - * - * @param body - * @returns + * Analyzes imports to build name map. + * @param body AST body + * @returns {Map} Import name map */ +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity function analyzeImports(body: any[]): Map { - const map = new Map(); - if (!Array.isArray(body)) return map; + const map = new Map() + if (!Array.isArray(body)) return map for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue - if ( - obj.type === "VariableDeclaration" && - obj.init?.type === "ImportExpression" - ) { - const importExpr = obj.init; - const localName = obj.id?.name; - if (!localName) continue; + if (obj.type === 'VariableDeclaration' && obj.init?.type === 'ImportExpression') { + const importExpr = obj.init + const localName = obj.id?.name + if (!localName) continue - const fromValue = extractLiteralString(importExpr.from); - const importedName = importExpr.imported?.name; // Identifier + const fromValue = extractLiteralString(importExpr.from) + const importedName = importExpr.imported?.name // Identifier if (fromValue) { // from ... import ... - if (fromValue === "fastapi" || fromValue.startsWith("fastapi.")) { - if (importedName) { - // Map 'FastAPI' or 'APIRouter' to 'fastapi.FastAPI' / 'fastapi.APIRouter' - // (case: fastapi.applications) - map.set(localName, `fastapi.${importedName}`); - } - } - } else if ( - importedName === "fastapi" || - importedName === "fastapi.applications" || - importedName === "fastapi.routing" || - importedName?.startsWith("fastapi.") - ) { - // import fastapi or import fastapi.applications - if ( - importedName === localName || - importedName.startsWith(`${localName}.`) - ) { - map.set(localName, localName); - } else { - map.set(localName, importedName); + if ((fromValue === 'fastapi' || fromValue.startsWith('fastapi.')) && importedName) { + // Use full path: fastapi.responses.ORJSONResponse instead of fastapi.ORJSONResponse + const canonicalName = fromValue === 'fastapi' ? `fastapi.${importedName}` : `${fromValue}.${importedName}` + map.set(localName, canonicalName) } + } else if (importedName === 'fastapi') { + // import fastapi + map.set(localName, 'fastapi') } } } - return map; + return map } /** - * - * @param node - * @param importMap - * @returns + * Resolves canonical name from node. + * @param node AST node + * @param importMap Import map + * @returns {string | null} Canonical name or null */ -function resolveCanonicalName( - node: any, - importMap: Map, -): string | null { - if (!node) return null; - if (node.type === "Identifier") { - return importMap.get(node.name) || null; +function resolveCanonicalName(node: any, importMap: Map): string | null { + if (!node) return null + if (node.type === 'Identifier') { + return importMap.get(node.name) || null } - if (node.type === "MemberAccess") { - const objectCanonical = resolveCanonicalName(node.object, importMap); - const propertyName = node.property?.name; + if (node.type === 'MemberAccess') { + const objectCanonical = resolveCanonicalName(node.object, importMap) + const propertyName = node.property?.name if (objectCanonical && propertyName) { - return `${objectCanonical}.${propertyName}`; + return `${objectCanonical}.${propertyName}` } } - return null; + return null } /** - * - * @param body - * @param importMap - * @returns + * Collects valid FastAPI instances. + * @param body AST body + * @param importMap Import map + * @returns {ValidInstances} Valid instances */ -function collectValidInstances( - body: any[], - importMap: Map, -): ValidInstances { - const validFastApiInstances = new Set(); - const validRouterInstances = new Set(); +// eslint-disable-next-line sonarjs/cognitive-complexity +function collectValidInstances(body: any[], importMap: Map): ValidInstances { + const validFastApiInstances = new Set() + const validRouterInstances = new Set() for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue // Only process AssignmentExpression - if (obj.type === "AssignmentExpression" && obj.operator === "=") { - const varInfo = extractVarNameAndInit(obj); - if (!varInfo?.varName || !varInfo.init) continue; - - if (varInfo.init.type === "CallExpression") { - const canonical = resolveCanonicalName(varInfo.init.callee, importMap); - if ( - canonical === "fastapi.FastAPI" || - canonical === "fastapi.applications.FastAPI" - ) { - validFastApiInstances.add(varInfo.varName); - } else if ( - canonical === "fastapi.APIRouter" || - canonical === "fastapi.routing.APIRouter" - ) { - validRouterInstances.add(varInfo.varName); + if (obj.type === 'AssignmentExpression' && obj.operator === '=') { + const varInfo = extractVarNameAndInit(obj) + if (!varInfo?.varName || !varInfo.init) continue + + if (varInfo.init.type === 'CallExpression') { + const canonical = resolveCanonicalName(varInfo.init.callee, importMap) + if (canonical && canonical.startsWith('fastapi')) { + if (canonical.endsWith('.FastAPI')) { + validFastApiInstances.add(varInfo.varName) + } else if (canonical.endsWith('.APIRouter')) { + validRouterInstances.add(varInfo.varName) + } } } } } - return { validFastApiInstances, validRouterInstances }; + return { validFastApiInstances, validRouterInstances } } /** - * - * @param deco - * @param funcName - * @param obj - * @param relativeFile - * @param filename - * @param validInstances - * @param entryPoints - * @param entryPointSources + * Processes decorator for entry points. + * @param deco Decorator node + * @param funcName Function name + * @param obj Function node + * @param relativeFile Relative file path + * @param filename Absolute file path + * @param validInstances Valid instances + * @param entryPoints Entry points array + * @param entryPointSources Sources array */ +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity function processDecorator( deco: any, funcName: string, obj: any, relativeFile: string, - filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], - entryPointSources: any[], + entryPointSources: any[] ): void { - if (!deco || deco.type !== "CallExpression") return; - const { callee } = deco; + if (!deco || deco.type !== 'CallExpression') return + const { callee } = deco - if (!callee || callee.type !== "MemberAccess") return; + if (!callee || callee.type !== 'MemberAccess') return - const methodName = callee.property?.name; - if (!methodName || !ROUTE_DECORATORS.has(methodName)) return; + const methodName = callee.property?.name + if (!methodName || !ROUTE_DECORATORS.has(methodName)) return // Get router or app name - let routerName = ""; - if (callee.object?.type === "Identifier") { - routerName = callee.object.name; + let routerName = '' + if (callee.object?.type === 'Identifier') { + routerName = callee.object.name } // Validate router/app - const { validFastApiInstances, validRouterInstances } = validInstances; - const isValidRouter = - validFastApiInstances.has(routerName) || - validRouterInstances.has(routerName); + const { validFastApiInstances, validRouterInstances } = validInstances + const isValidRouter = validFastApiInstances.has(routerName) || validRouterInstances.has(routerName) - if (!isValidRouter) return; + if (!isValidRouter) return // Create entrypoint - const routePath = extractLiteralString(deco.arguments?.[0]); - const params = extractRouteParams(routePath); + const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL) + entryPoint.filePath = relativeFile + entryPoint.functionName = funcName + entryPoint.attribute = 'HTTP' - const entryPoint = new EntryPointClass(Constant.ENGIN_START_FUNCALL); - entryPoint.filePath = relativeFile; - entryPoint.functionName = funcName; - entryPoint.attribute = "HTTP"; - - entryPoints.push(entryPoint); + entryPoints.push(entryPoint) if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam( - relativeFile, - funcName, - obj, - undefined, - ); + const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) if (paramSources) { const allScopeSources = paramSources.map((s: any) => ({ ...s, - scopeFile: "all", - })); - entryPointSources.push(...allScopeSources); - } - - if (params.length && Array.isArray(obj.parameters)) { - for (const p of obj.parameters) { - const pn = p.id?.name; - if (pn && params.includes(pn)) { - entryPointSources.push({ - introPoint: 4, - kind: "PYTHON_INPUT", - path: pn, - scopeFile: "all", - scopeFunc: funcName, - locStart: p.loc?.start?.line, - locEnd: p.loc?.end?.line, - locColumnStart: p.loc?.start?.column, - locColumnEnd: p.loc?.end?.column, - }); - } - } + scopeFile: 'all', + })) + entryPointSources.push(...allScopeSources) } } } /** - * - * @param filenameAstObj - * @param dir - * @returns + * Finds FastAPI entry points and sources. + * @param filenameAstObj Filename to AST map + * @param dir Root directory + * @returns {EntryPointResult} Entry points and sources */ -function findFastApiEntryPointAndSource( - filenameAstObj: FilenameAstMap, - dir: string, -): EntryPointResult { - const entryPoints: EntryPoint[] = []; - const entryPointSources: any[] = []; +// eslint-disable-next-line complexity, sonarjs/cognitive-complexity +function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: string): EntryPointResult { + const entryPoints: EntryPoint[] = [] + const entryPointSources: any[] = [] for (const filename in filenameAstObj) { - if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) - continue; - const fileObj = filenameAstObj[filename]; - if (!fileObj?.body) continue; + if (!Object.prototype.hasOwnProperty.call(filenameAstObj, filename)) continue + const fileObj = filenameAstObj[filename] + if (!fileObj?.body) continue // Calculate relative path - const { body } = fileObj; - const relativeFile = filename.startsWith(dir) - ? extractRelativePath(filename, dir) - : filename; - - if (!relativeFile) continue; - - const importMap = analyzeImports(body); - - const validImports = new Set([ - "fastapi", - "fastapi.FastAPI", - "fastapi.APIRouter", - "fastapi.applications", - "fastapi.routing", - ]); - let hasFastApiImport = false; + const { body } = fileObj + const relativeFile = filename.startsWith(dir) ? extractRelativePath(filename, dir) : filename + + if (!relativeFile) continue + + const importMap = analyzeImports(body) + + // Only scan if core components (FastAPI or APIRouter) are imported + // Only scan if core components (FastAPI or APIRouter) are imported + let hasCoreImport = false for (const val of importMap.values()) { - if (validImports.has(val)) { - hasFastApiImport = true; - break; + if ( + val === 'fastapi' || + (val.startsWith('fastapi') && (val.endsWith('.FastAPI') || val.endsWith('.APIRouter'))) + ) { + hasCoreImport = true + break } } - if (!hasFastApiImport) continue; + if (!hasCoreImport) continue - const validInstances = collectValidInstances(body, importMap); + const validInstances = collectValidInstances(body, importMap) for (const obj of body) { - if (!obj || typeof obj !== "object") continue; + if (!obj || typeof obj !== 'object') continue - if ( - obj.type === "FunctionDefinition" && - obj._meta?.decorators && - obj.id?.name - ) { - const funcName = obj.id.name; - const { decorators } = obj._meta; + if (obj.type === 'FunctionDefinition' && obj._meta?.decorators && obj.id?.name) { + const funcName = obj.id.name + const { decorators } = obj._meta for (const deco of decorators) { - processDecorator( - deco, - funcName, - obj, - relativeFile, - filename, - validInstances, - entryPoints, - entryPointSources, - ); + processDecorator(deco, funcName, obj, relativeFile, validInstances, entryPoints, entryPointSources) } } } @@ -374,7 +269,7 @@ function findFastApiEntryPointAndSource( return { fastApiEntryPointArray: entryPoints, fastApiEntryPointSourceArray: entryPointSources, - }; + } } -export = { findFastApiEntryPointAndSource }; +export = { findFastApiEntryPointAndSource } From 30fdbdc637e64c3e5510cae533b4ce9b4c48ab50 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 26 Nov 2025 23:23:44 -0800 Subject: [PATCH 05/35] fix:FastAPI entrypoint --- .../entrypoint-collector/fastapi-entrypoint.ts | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts index de23f3d2..748c88fd 100644 --- a/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts +++ b/src/engine/analyzer/python/fastapi/entrypoint-collector/fastapi-entrypoint.ts @@ -167,6 +167,7 @@ function processDecorator( funcName: string, obj: any, relativeFile: string, + filename: string, validInstances: ValidInstances, entryPoints: EntryPoint[], entryPointSources: any[] @@ -200,14 +201,10 @@ function processDecorator( entryPoints.push(entryPoint) if (entryPointAndSourceAtSameTime) { - const paramSources = findSourceOfFuncParam(relativeFile, funcName, obj, undefined) + const paramSources = findSourceOfFuncParam(filename, funcName, obj, undefined) if (paramSources) { - const allScopeSources = paramSources.map((s: any) => ({ - ...s, - scopeFile: 'all', - })) - entryPointSources.push(...allScopeSources) + entryPointSources.push(...paramSources) } } } @@ -260,7 +257,7 @@ function findFastApiEntryPointAndSource(filenameAstObj: FilenameAstMap, dir: str const { decorators } = obj._meta for (const deco of decorators) { - processDecorator(deco, funcName, obj, relativeFile, validInstances, entryPoints, entryPointSources) + processDecorator(deco, funcName, obj, relativeFile, filename, validInstances, entryPoints, entryPointSources) } } } From e61be5fe03d07e226c5241bd314e5ab6ff77be5a Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 8 Dec 2025 00:46:38 -0800 Subject: [PATCH 06/35] feat: add Tornado checker --- resource/checker/checker-config.json | 5 + resource/checker/checker-pack-config.json | 2 + .../taint/python/tornado-taint-checker.ts | 768 ++++++++++++++++++ src/checker/taint/python/tornado-util.ts | 222 +++++ 4 files changed, 997 insertions(+) create mode 100644 src/checker/taint/python/tornado-taint-checker.ts create mode 100644 src/checker/taint/python/tornado-util.ts diff --git a/resource/checker/checker-config.json b/resource/checker/checker-config.json index 5e28f533..fad64657 100644 --- a/resource/checker/checker-config.json +++ b/resource/checker/checker-config.json @@ -135,6 +135,11 @@ "checkerPath": "checker/taint/python/django-taint-checker.ts", "description": "python Django框架 entrypoint采集以及框架source添加" }, + { + "checkerId": "taint_flow_python_tornado_input", + "checkerPath": "checker/taint/python/tornado-taint-checker.ts", + "description": "python Tornado框架 entrypoint采集以及框架source添加" + }, { "checkerId": "taint_flow_test", "checkerPath": "checker/taint/test-taint-checker.ts", diff --git a/resource/checker/checker-pack-config.json b/resource/checker/checker-pack-config.json index 2380e197..580fdd35 100644 --- a/resource/checker/checker-pack-config.json +++ b/resource/checker/checker-pack-config.json @@ -86,6 +86,7 @@ "checkerIds": [ "taint_flow_python_input", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], @@ -96,6 +97,7 @@ "checkerIds": [ "taint_flow_python_input_inner", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts new file mode 100644 index 00000000..30d55ad7 --- /dev/null +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -0,0 +1,768 @@ +import type { FileCache, RoutePair } from './tornado-util' + +const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') +const { extractRelativePath } = require('../../../util/file-util') +const AstUtil = require('../../../util/ast-util') +const Config = require('../../../config') +const completeEntryPoint = require('../common-kit/entry-points-util') +const logger = require('../../../util/logger')(__filename) +const { + isTornadoCall, + parseRoutePair, + resolveImportPath, + extractImportEntries, + extractParamsFromAst, + tornadoSourceAPIs, + passthroughFuncs, +} = require('./tornado-util') + +/** + * Tornado Taint Checker Base Class + */ +class TornadoTaintChecker extends PythonTaintAbstractChecker { + private fileCache = new Map() + + /** + * Helper function to mark a value as tainted + * @param value + */ + private markAsTainted(value: any): void { + if (!value) return + if (!value._tags) { + value._tags = new Set() + } + value._tags.add('PYTHON_INPUT') + value.hasTagRec = true + } + + /** + * + * @param resultManager + */ + constructor(resultManager: any) { + super(resultManager, 'taint_flow_python_tornado_input') + } + + /** + * trigger at start of analyze + * Register sourceScope values as sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtStartOfAnalyze( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) + const BasicRuleHandler = require('../../common/rules-basic-handler') + // 尝试从命令行参数获取 ruleConfigFile + let { ruleConfigFile } = Config + if (!ruleConfigFile || ruleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + ruleConfigFile = args[ruleConfigIndex + 1] + const path = require('path') + ruleConfigFile = path.isAbsolute(ruleConfigFile) + ? ruleConfigFile + : path.resolve(process.cwd(), ruleConfigFile) + } + } + try { + let ruleConfigContent: any[] = [] + if (ruleConfigFile && ruleConfigFile !== '') { + const FileUtil = require('../../../util/file-util') + ruleConfigContent = FileUtil.loadJSONfile(ruleConfigFile) + } else { + ruleConfigContent = BasicRuleHandler.getRules() + } + if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + for (const ruleConfig of ruleConfigContent) { + if ( + ruleConfig.checkerIds && + ((Array.isArray(ruleConfig.checkerIds) && + ruleConfig.checkerIds.length > 0 && + ruleConfig.checkerIds.includes(this.getCheckerId())) || + ruleConfig.checkerIds === this.getCheckerId()) + ) { + const { mergeAToB } = require('../../../util/common-util') + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + } + } + } + } catch (e: any) { + logger.warn(`Error reloading rule config: ${e?.message || e}`) + } + // 注册 sourceScope 中的 source + this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) + // 注册规则配置中的 source + this.addSourceTagForcheckerRuleConfigContent( + 'PYTHON_INPUT', + this.checkerRuleConfigContent, + ) + } + + /** + * Build a light-weight file cache for quick lookup. + * @param analyzer + * @param scope + * @param node + * @param _state + * @param _info + */ + triggerAtCompileUnit( + analyzer: any, + scope: any, + node: any, + _state: any, + _info: any, + ): boolean | undefined { + const fileName = node.loc?.sourcefile + if (!fileName) return + + const cache: FileCache = { + vars: new Map(), + classes: new Map(), + importedSymbols: new Map(), + } + + AstUtil.visit(node, { + AssignmentExpression: (n: any) => { + if (n.left?.type === 'Identifier' && n.left.name) { + cache.vars.set(n.left.name, { value: n.right, file: fileName }) + } + return true + }, + VariableDeclaration: (n: any) => { + const localName = n.id?.name + if (!localName) return true + if (n.init?.type === 'ImportExpression') { + const modulePath = n.init.from?.value || n.init.from?.name + if (!modulePath) return true + const resolved = resolveImportPath(modulePath, fileName) + if (!resolved) return true + const entries = extractImportEntries(n) + for (const entry of entries) { + if (!entry.local) continue + cache.importedSymbols.set(entry.local, { + file: resolved, + originalName: entry.imported, + }) + } + return true + } + if (n.init) { + cache.vars.set(localName, { value: n.init, file: fileName }) + } + return true + }, + ClassDefinition: (n: any) => { + const name = n.name?.name || n.id?.name + if (name) { + cache.classes.set(name, { value: n, file: fileName }) + } + return true + }, + }) + + this.fileCache.set(fileName, cache) + } + + /** + * On function calls, detect tornado Application/add_handlers and collect routes. + * @param analyzer + * @param scope + * @param node + * @param state + * @param _info + */ + triggerAtFuncCallSyntax( + analyzer: any, + scope: any, + node: any, + state: any, + _info: any, + ): boolean | undefined { + const fileName = node.loc?.sourcefile + if (!fileName) return + + // Application(...) -> first arg is routes + if (isTornadoCall(node, 'Application')) { + const routeList = node.arguments?.[0] + if (routeList) { + this.collectTornadoEntrypointAndSource( + analyzer, + scope, + state, + routeList, + fileName, + ) + } + } + + // add_handlers(host, routes) -> second arg is routes + if (isTornadoCall(node, 'add_handlers')) { + const routeList = node.arguments?.[1] + if (routeList) { + this.collectTornadoEntrypointAndSource( + analyzer, + scope, + state, + routeList, + fileName, + ) + } + } + } + + /** + * Override triggerAtIdentifier to mark path parameters as sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtIdentifier( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 先调用基类方法 + super.triggerAtIdentifier(analyzer, scope, node, state, info) + // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 + const { res } = info + if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { + for (const val of this.sourceScope.value) { + if ( + val.path === node.name || + res._sid === val.path || + res._qid === val.path + ) { + // 检查作用域匹配 + const nodeStart = node.loc?.start?.line + const nodeEnd = node.loc?.end?.line + const valStart = val.locStart + const valEnd = val.locEnd + let shouldMark = false + if ( + valStart === 'all' && + valEnd === 'all' && + val.scopeFile === 'all' && + val.scopeFunc === 'all' + ) { + shouldMark = true + } else if ( + valStart === 'all' && + valEnd === 'all' && + val.scopeFile !== 'all' && + val.scopeFunc === 'all' && + typeof node.loc?.sourcefile === 'string' && + node.loc.sourcefile.includes(val.scopeFile) + ) { + shouldMark = true + } else if ( + node.loc?.sourcefile && + val.scopeFile && + node.loc.sourcefile.includes(val.scopeFile) && + typeof nodeStart === 'number' && + typeof valStart === 'number' && + typeof nodeEnd === 'number' && + typeof valEnd === 'number' && + nodeStart >= valStart && + nodeEnd <= valEnd + ) { + shouldMark = true + } + if (shouldMark && (!res._tags || !res._tags.has('PYTHON_INPUT'))) { + if (!res._tags) { + res._tags = new Set() + } + res._tags.add('PYTHON_INPUT') + res.hasTagRec = true + } + } + } + } + } + + /** + * Override checkByNameMatch to support partial matching (e.g., os.system matches syslib_from.os.system) + * @param node + * @param fclos + * @param argvalues + */ + checkByNameMatch(node: any, fclos: any, argvalues: any) { + const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink + if (!rules || rules.length === 0) { + return + } + const callFull = this.getObj(fclos) + if (!callFull) { + super.checkByNameMatch(node, fclos, argvalues) + return + } + // 检查是否有匹配的规则(支持部分匹配) + const matchedRule = rules.find((rule: any) => { + if (typeof rule.fsig !== 'string') return false + return ( + rule.fsig === callFull || + callFull.endsWith(`.${rule.fsig}`) || + callFull.endsWith(rule.fsig) + ) + }) + // 如果有匹配的规则,调用基类方法处理 + if (matchedRule) { + super.checkByNameMatch(node, fclos, argvalues) + } + } + + /** + * Handle API calls like self.get_argument() + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallAfter( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + // 先调用基类方法处理规则配置中的 source + super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) + + const { fclos, ret } = info + if (!fclos || !ret) { + return + } + + // 从 node.callee 获取方法名(对于 MemberAccess 调用,如 self.get_argument) + let funcName: string | null = null + if (node.callee?.type === 'MemberAccess') { + funcName = node.callee.property?.name + } else if (node.callee?.type === 'Identifier') { + funcName = node.callee.name + } + + // 检查是否是 tornado source API 调用(如 get_argument) + if (funcName && tornadoSourceAPIs.has(funcName)) { + this.markAsTainted(ret) + } + + // 处理 passthrough 函数(如 decode, strip 等) + if (funcName && passthroughFuncs.has(funcName)) { + // 检查是否是 self.request.body.decode 等 source + // 对于 self.request.body.decode('utf-8'),AST 结构: + // node.callee.object.type = 'MemberAccess' (body) + // node.callee.object.object.type = 'MemberAccess' (request) + // node.callee.object.object.object.name = 'self' + if (node.callee?.type === 'MemberAccess' && node.callee.object) { + const bodyNode = node.callee.object + if ( + bodyNode.type === 'MemberAccess' && + bodyNode.property?.name === 'body' && + bodyNode.object?.type === 'MemberAccess' && + bodyNode.object.property?.name === 'request' && + bodyNode.object.object?.name === 'self' + ) { + // 直接标记返回值为 source(因为 self.request.body 是 source) + this.markAsTainted(ret) + return // 已经标记,不需要再检查 receiver + } + } + // 检查 receiver 是否被污染 + const receiver = fclos?.object || fclos?._this + if ( + receiver && + (receiver.taint || + receiver.hasTagRec || + receiver._tags?.has('PYTHON_INPUT')) + ) { + this.markAsTainted(ret) + } + } + } + + /** + * Handle Member Access Sources like self.request.body + * [Fixed]: Now checks AST node structure instead of symbolic result + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtMemberAccess( + analyzer: any, + scope: any, + node: any, + state: any, + info: any, + ): void { + const { res } = info + + if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { + const propName = node.property?.name + const subPropName = node.object?.property?.name + const baseObjName = node.object?.object?.name + + if ( + baseObjName === 'self' && + subPropName === 'request' && + ['body', 'query', 'headers', 'cookies'].includes(propName) + ) { + this.markAsTainted(res) + } + } + } + + /** + * Resolve symbol cross-file + * @param name + * @param currentFile + */ + private resolveSymbol(name: string, currentFile: string): any | null { + if (!name || !currentFile) return null + const cache = this.fileCache.get(currentFile) + if (!cache) return null + const { vars, classes, importedSymbols } = cache + if (vars.has(name)) { + const entry = vars.get(name) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + if (classes.has(name)) { + const entry = classes.get(name) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + + const importInfo = importedSymbols.get(name) + if (!importInfo) return null + const targetCache = this.fileCache.get(importInfo.file) + if (!targetCache) return null + const targetName = importInfo.originalName || name + if (targetCache.vars.has(targetName)) { + const entry = targetCache.vars.get(targetName) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + if (targetCache.classes.has(targetName)) { + const entry = targetCache.classes.get(targetName) + if (entry?.value) { + entry.value.loc = entry.value.loc || {} + entry.value.loc.sourcefile = entry.file + return entry.value + } + } + return null + } + + /** + * Flatten route lists (handles BinaryExpression +) + * @param node + * @param currentFile + */ + private normalizeRoutes(node: any, currentFile: string): RoutePair[] { + if (!node) return [] + + if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { + const elements = node.elements || [] + return elements.flatMap((element: any) => + this.normalizeRoutes(element, currentFile), + ) + } + + if (node.type === 'BinaryExpression') { + return [ + ...this.normalizeRoutes(node.left, currentFile), + ...this.normalizeRoutes(node.right, currentFile), + ] + } + + if (node.type === 'ObjectExpression') { + const values = + node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] + return values.flatMap((value: any) => + this.normalizeRoutes(value, node.loc?.sourcefile || currentFile), + ) + } + + if (node.type === 'Identifier') { + const target = this.resolveSymbol(node.name, currentFile) + if (!target) return [] + const targetFile = target.loc?.sourcefile || currentFile + return this.normalizeRoutes(target, targetFile) + } + + const pair = parseRoutePair(node) + return pair ? [{ ...pair, file: currentFile }] : [] + } + + /** + * Analyze routes and emit entrypoints & sources + * @param analyzer + * @param scope + * @param state + * @param routeList + * @param currentFile + */ + private collectTornadoEntrypointAndSource( + analyzer: any, + scope: any, + state: any, + routeList: any, + currentFile: string, + ) { + const processed = new Set() + const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) + for (const pair of normalizedRoutes) { + if (!pair.path || !pair.handlerName) { + continue + } + const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` + if (processed.has(dedupKey)) { + continue + } + processed.add(dedupKey) + const classAst = this.resolveSymbol( + pair.handlerName, + pair.file || currentFile, + ) + if (!classAst || classAst.type !== 'ClassDefinition') { + continue + } + const classFile = classAst.loc?.sourcefile || pair.file || currentFile + // 使用 analyzer.processInstruction 来处理类对象,确保有正确的结构 + let handlerSymVal: any + try { + handlerSymVal = analyzer.processInstruction(scope, classAst, state) + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + } catch (e) { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + // 确保 handlerSymVal 有 field 结构 + if ( + handlerSymVal && + handlerSymVal.vtype === 'class' && + !handlerSymVal.field + ) { + handlerSymVal.field = {} + } + this.emitHandlerEntrypoints( + analyzer, + handlerSymVal, + pair.path, + classAst, + scope, + state, + ) + } + } + + /** + * Register EntryPoints and Path Param Sources + * [Fixed]: Removed Config check to forcefully register parameters as sources + * @param analyzer + * @param handlerSymVal + * @param urlPattern + * @param classAst + * @param scope + * @param state + */ + private emitHandlerEntrypoints( + analyzer: any, + handlerSymVal: any, + urlPattern: string, + classAst: any, + scope?: any, + state?: any, + ) { + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + return + } + const httpMethods = new Set([ + 'get', + 'post', + 'put', + 'delete', + 'patch', + 'head', + 'options', + ]) + const entrypoints = Object.entries(handlerSymVal.value) + .filter( + ([key, value]: [string, any]) => + httpMethods.has(key) && value.vtype === 'fclos', + ) + .map(([, value]: [string, any]) => value) + + for (const ep of entrypoints as any[]) { + // ignore init files + if (ep.fdef?.loc?.sourcefile?.endsWith('__init__.py')) { + continue + } + + // 尝试使用 analyzer.processInstruction 获取正确的 fclos 对象 + let finalEp = ep + if (scope && state && ep.fdef) { + try { + const processedFclos = analyzer.processInstruction( + scope, + ep.fdef, + state, + ) + if (processedFclos && processedFclos.vtype === 'fclos') { + processedFclos.parent = handlerSymVal + processedFclos.params = ep.params || extractParamsFromAst(ep.fdef) + if (!processedFclos.value) { + processedFclos.value = {} + } + finalEp = processedFclos + } + } catch (e) { + // fallback to original ep + } + } + // 确保 ep 有 value 属性 + if (!finalEp.value) { + finalEp.value = {} + } + + // 确保 finalEp.parent 正确设置,并且 handlerSymVal 有 field 结构 + if (handlerSymVal && handlerSymVal.vtype === 'class') { + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + finalEp.parent = handlerSymVal + } + + try { + // 确保 finalEp 有 completeEntryPoint 需要的属性 + if (!finalEp.ast && finalEp.fdef) { + finalEp.ast = finalEp.fdef + } + if (!finalEp.functionName) { + finalEp.functionName = + finalEp.fdef?.name?.name || + finalEp.fdef?.id?.name || + finalEp.name || + '' + } + // 确保 finalEp 有 filePath + if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { + const FileUtil = require('../../../util/file-util') + const { sourcefile } = finalEp.fdef.loc + if (Config.maindir && typeof Config.maindir === 'string') { + finalEp.filePath = FileUtil.extractRelativePath( + sourcefile, + Config.maindir, + ) + } else { + finalEp.filePath = sourcefile + } + } + const entryPoint = completeEntryPoint(finalEp) + // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 + if ( + entryPoint.entryPointSymVal?.parent && + entryPoint.entryPointSymVal.parent.vtype === 'class' && + !entryPoint.entryPointSymVal.parent.field + ) { + entryPoint.entryPointSymVal.parent.field = {} + } + analyzer.entryPoints.push(entryPoint) + } catch (e: any) { + logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) + continue + } + // 注册参数为 source + const funcName = + finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = + finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + let scopeFile: string | null = null + if (sourceFile) { + if (Config.maindir && typeof Config.maindir === 'string') { + scopeFile = extractRelativePath(sourceFile, Config.maindir) + } else { + scopeFile = sourceFile + } + } + + const paramMetas = + (Array.isArray((finalEp as any).params) && + (finalEp as any).params.length + ? (finalEp as any).params + : extractParamsFromAst(finalEp.fdef)) || [] + if (paramMetas.length > 0) { + for (const meta of paramMetas) { + if (meta.name === 'self') continue + + this.sourceScope.value.push({ + path: meta.name, + kind: 'PYTHON_INPUT', + scopeFile, + scopeFunc: funcName || 'all', + locStart: meta.locStart, + locEnd: meta.locEnd, + }) + } + } + } + } + + /** + * + * @param classNode + */ + private buildClassSymbol(classNode: any): any { + const value: any = {} + const members = classNode.body || [] + members.forEach((member: any) => { + if (member.type !== 'FunctionDefinition') return + const memberName = + member.name?.name || member.name?.id?.name || member.id?.name + if (memberName) { + value[memberName] = { + vtype: 'fclos', + fdef: member, + ast: member, + params: extractParamsFromAst(member), + } + } + }) + return { vtype: 'class', value } + } +} + +export = TornadoTaintChecker diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts new file mode 100644 index 00000000..2ee70cc7 --- /dev/null +++ b/src/checker/taint/python/tornado-util.ts @@ -0,0 +1,222 @@ +const path = require('path') +const AstUtil = require('../../../util/ast-util') + +export interface ImportSymbol { + file: string + originalName?: string +} + +export interface RoutePair { + path: string + handlerName: string + file?: string +} + +export interface FileCache { + vars: Map + classes: Map + importedSymbols: Map +} + +export interface ParamMeta { + name: string + locStart: number | 'all' + locEnd: number | 'all' +} + +export const tornadoSourceAPIs = new Set([ + 'get_argument', + 'get_query_argument', + 'get_body_argument', + 'get_query_arguments', + 'get_body_arguments', + 'get_cookie', + 'get_secure_cookie', +]) + +export const passthroughFuncs = new Set([ + 'decode', + 'strip', + 'replace', + 'lower', + 'upper', + 'split', +]) + +/** + * + * @param node + */ +export function isRequestAttributeAccess(node: any): boolean { + if (node?.type !== 'MemberAccess') return false + const propName = node.property?.name + const inner = node.object + if (inner?.type !== 'MemberAccess') return false + const baseName = inner.object?.name + const requestName = inner.property?.name + return ( + baseName === 'self' && + requestName === 'request' && + ['body', 'query', 'headers', 'cookies'].includes(propName) + ) +} + +/** + * + * @param expr + */ +export function isRequestAttributeExpression(expr: any): boolean { + if (!expr) return false + if (expr.type === 'MemberAccess') return isRequestAttributeAccess(expr) + if (expr.type === 'CallExpression' && expr.callee?.type === 'MemberAccess') { + return isRequestAttributeAccess(expr.callee.object) + } + return false +} + +/** + * + * @param node + * @param targetName + */ +export function isTornadoCall(node: any, targetName: string): boolean { + if (!node || node.type !== 'CallExpression' || !node.callee) return false + const { callee } = node + if (callee.type === 'MemberAccess' && callee.property?.name === targetName) { + return true + } + if (callee.type === 'Identifier' && callee.name === targetName) { + return true + } + return false +} + +/** + * + * @param route + */ +export function parseRoutePair(route: any): RoutePair | null { + if (!route) return null + + const extractLiteral = (expr: any): string | null => { + if (!expr) return null + if (expr.type === 'StringLiteral' || expr.type === 'Literal') { + return typeof expr.value === 'string' ? expr.value : null + } + return null + } + + let pathExpr: any + let handlerNode: any + + if (route.type === 'TupleExpression' && Array.isArray(route.elements)) { + const [first, second] = route.elements + pathExpr = first + handlerNode = second + } else if (route.type === 'CallExpression' && route.callee) { + const { callee } = route + const isUrlHelper = + (callee.type === 'Identifier' && callee.name === 'url') || + (callee.type === 'MemberAccess' && + AstUtil.prettyPrint(callee).includes('url')) + if (isUrlHelper && Array.isArray(route.arguments)) { + const [first, second] = route.arguments + pathExpr = first + handlerNode = second + } + } + if (!pathExpr || !handlerNode || handlerNode.type !== 'Identifier') { + return null + } + const pathValue = extractLiteral(pathExpr) + if (!pathValue) return null + + return { path: pathValue, handlerName: handlerNode.name } +} + +/** + * + * @param modulePath + * @param currentFile + */ +export function resolveImportPath( + modulePath: string, + currentFile: string, +): string | null { + if (!modulePath) return null + const currentDir = path.dirname(currentFile) + const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' + let baseDir = currentDir + if (leadingDots.length > 0) { + baseDir = path.resolve(currentDir, '../'.repeat(leadingDots.length - 1)) + } + const remainder = modulePath.slice(leadingDots.length) + const normalized = remainder ? remainder.split('.').join(path.sep) : '' + const resolved = normalized ? path.resolve(baseDir, normalized) : baseDir + return `${resolved}.py` +} + +/** + * + * @param stmt + */ +export function extractImportEntries( + stmt: any, +): Array<{ local: string; imported?: string }> { + const res: Array<{ local: string; imported?: string }> = [] + const { init } = stmt + if (!init) return res + + if (Array.isArray(init?.imports) && init.imports.length > 0) { + for (const spec of init.imports) { + const local = + spec.local?.name || spec.local?.value || spec.name || spec.value + const imported = + spec.imported?.name || spec.imported?.value || spec.name || spec.value + if (local) res.push({ local, imported }) + } + return res + } + + if (stmt.id?.name) { + const importedName = + init?.imported?.name || + init?.imported?.value || + init?.name?.name || + init?.name?.value + res.push({ local: stmt.id.name, imported: importedName }) + } + return res +} + +/** + * + * @param funcNode + */ +export function extractParamsFromAst(funcNode: any): ParamMeta[] { + if (!funcNode) return [] + const rawParams = Array.isArray(funcNode?.parameters?.parameters) + ? funcNode.parameters.parameters + : Array.isArray(funcNode?.parameters) + ? funcNode.parameters + : [] + const fallbackLine = + typeof funcNode?.loc?.start?.line === 'number' + ? funcNode.loc.start.line + : 'all' + const result: ParamMeta[] = [] + for (const param of rawParams) { + const name = param?.id?.name || param?.name + if (!name) continue + const locStart = + typeof param?.loc?.start?.line === 'number' + ? param.loc.start.line + : fallbackLine + const locEnd = + typeof param?.loc?.end?.line === 'number' + ? param.loc.end.line + : fallbackLine + result.push({ name, locStart, locEnd }) + } + return result +} From f32f9fbe0da122f46534b8e57afc769c11ab4f90 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 8 Dec 2025 01:11:50 -0800 Subject: [PATCH 07/35] feat: Tornado checker --- .../taint/python/tornado-taint-checker.ts | 183 +++--------------- src/checker/taint/python/tornado-util.ts | 54 ++---- 2 files changed, 43 insertions(+), 194 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 30d55ad7..fa905eba 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -52,13 +52,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtStartOfAnalyze( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) const BasicRuleHandler = require('../../common/rules-basic-handler') // 尝试从命令行参数获取 ruleConfigFile @@ -69,9 +63,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { ruleConfigFile = args[ruleConfigIndex + 1] const path = require('path') - ruleConfigFile = path.isAbsolute(ruleConfigFile) - ? ruleConfigFile - : path.resolve(process.cwd(), ruleConfigFile) + ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) } } try { @@ -102,10 +94,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) // 注册规则配置中的 source - this.addSourceTagForcheckerRuleConfigContent( - 'PYTHON_INPUT', - this.checkerRuleConfigContent, - ) + this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } /** @@ -116,13 +105,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param _state * @param _info */ - triggerAtCompileUnit( - analyzer: any, - scope: any, - node: any, - _state: any, - _info: any, - ): boolean | undefined { + triggerAtCompileUnit(analyzer: any, scope: any, node: any, _state: any, _info: any): boolean | undefined { const fileName = node.loc?.sourcefile if (!fileName) return @@ -182,13 +165,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param _info */ - triggerAtFuncCallSyntax( - analyzer: any, - scope: any, - node: any, - state: any, - _info: any, - ): boolean | undefined { + triggerAtFuncCallSyntax(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { const fileName = node.loc?.sourcefile if (!fileName) return @@ -196,13 +173,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isTornadoCall(node, 'Application')) { const routeList = node.arguments?.[0] if (routeList) { - this.collectTornadoEntrypointAndSource( - analyzer, - scope, - state, - routeList, - fileName, - ) + this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) } } @@ -210,13 +181,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isTornadoCall(node, 'add_handlers')) { const routeList = node.arguments?.[1] if (routeList) { - this.collectTornadoEntrypointAndSource( - analyzer, - scope, - state, - routeList, - fileName, - ) + this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) } } } @@ -229,36 +194,21 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtIdentifier( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any): void { // 先调用基类方法 super.triggerAtIdentifier(analyzer, scope, node, state, info) // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 const { res } = info if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { for (const val of this.sourceScope.value) { - if ( - val.path === node.name || - res._sid === val.path || - res._qid === val.path - ) { + if (val.path === node.name || res._sid === val.path || res._qid === val.path) { // 检查作用域匹配 const nodeStart = node.loc?.start?.line const nodeEnd = node.loc?.end?.line const valStart = val.locStart const valEnd = val.locEnd let shouldMark = false - if ( - valStart === 'all' && - valEnd === 'all' && - val.scopeFile === 'all' && - val.scopeFunc === 'all' - ) { + if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') { shouldMark = true } else if ( valStart === 'all' && @@ -313,11 +263,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 检查是否有匹配的规则(支持部分匹配) const matchedRule = rules.find((rule: any) => { if (typeof rule.fsig !== 'string') return false - return ( - rule.fsig === callFull || - callFull.endsWith(`.${rule.fsig}`) || - callFull.endsWith(rule.fsig) - ) + return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) || callFull.endsWith(rule.fsig) }) // 如果有匹配的规则,调用基类方法处理 if (matchedRule) { @@ -333,13 +279,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtFunctionCallAfter( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { // 先调用基类方法处理规则配置中的 source super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) @@ -384,12 +324,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this - if ( - receiver && - (receiver.taint || - receiver.hasTagRec || - receiver._tags?.has('PYTHON_INPUT')) - ) { + if (receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT'))) { this.markAsTainted(ret) } } @@ -404,13 +339,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtMemberAccess( - analyzer: any, - scope: any, - node: any, - state: any, - info: any, - ): void { + triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { const { res } = info if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { @@ -489,24 +418,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { const elements = node.elements || [] - return elements.flatMap((element: any) => - this.normalizeRoutes(element, currentFile), - ) + return elements.flatMap((element: any) => this.normalizeRoutes(element, currentFile)) } if (node.type === 'BinaryExpression') { - return [ - ...this.normalizeRoutes(node.left, currentFile), - ...this.normalizeRoutes(node.right, currentFile), - ] + return [...this.normalizeRoutes(node.left, currentFile), ...this.normalizeRoutes(node.right, currentFile)] } if (node.type === 'ObjectExpression') { - const values = - node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] - return values.flatMap((value: any) => - this.normalizeRoutes(value, node.loc?.sourcefile || currentFile), - ) + const values = node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] + return values.flatMap((value: any) => this.normalizeRoutes(value, node.loc?.sourcefile || currentFile)) } if (node.type === 'Identifier') { @@ -533,7 +454,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { scope: any, state: any, routeList: any, - currentFile: string, + currentFile: string ) { const processed = new Set() const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) @@ -546,10 +467,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { continue } processed.add(dedupKey) - const classAst = this.resolveSymbol( - pair.handlerName, - pair.file || currentFile, - ) + const classAst = this.resolveSymbol(pair.handlerName, pair.file || currentFile) if (!classAst || classAst.type !== 'ClassDefinition') { continue } @@ -571,21 +489,10 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } // 确保 handlerSymVal 有 field 结构 - if ( - handlerSymVal && - handlerSymVal.vtype === 'class' && - !handlerSymVal.field - ) { + if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { handlerSymVal.field = {} } - this.emitHandlerEntrypoints( - analyzer, - handlerSymVal, - pair.path, - classAst, - scope, - state, - ) + this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } } @@ -605,25 +512,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { urlPattern: string, classAst: any, scope?: any, - state?: any, + state?: any ) { if (!handlerSymVal || handlerSymVal.vtype !== 'class') { return } - const httpMethods = new Set([ - 'get', - 'post', - 'put', - 'delete', - 'patch', - 'head', - 'options', - ]) + const httpMethods = new Set(['get', 'post', 'put', 'delete', 'patch', 'head', 'options']) const entrypoints = Object.entries(handlerSymVal.value) - .filter( - ([key, value]: [string, any]) => - httpMethods.has(key) && value.vtype === 'fclos', - ) + .filter(([key, value]: [string, any]) => httpMethods.has(key) && value.vtype === 'fclos') .map(([, value]: [string, any]) => value) for (const ep of entrypoints as any[]) { @@ -636,11 +532,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { let finalEp = ep if (scope && state && ep.fdef) { try { - const processedFclos = analyzer.processInstruction( - scope, - ep.fdef, - state, - ) + const processedFclos = analyzer.processInstruction(scope, ep.fdef, state) if (processedFclos && processedFclos.vtype === 'fclos') { processedFclos.parent = handlerSymVal processedFclos.params = ep.params || extractParamsFromAst(ep.fdef) @@ -672,21 +564,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.ast = finalEp.fdef } if (!finalEp.functionName) { - finalEp.functionName = - finalEp.fdef?.name?.name || - finalEp.fdef?.id?.name || - finalEp.name || - '' + finalEp.functionName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' } // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { const FileUtil = require('../../../util/file-util') const { sourcefile } = finalEp.fdef.loc if (Config.maindir && typeof Config.maindir === 'string') { - finalEp.filePath = FileUtil.extractRelativePath( - sourcefile, - Config.maindir, - ) + finalEp.filePath = FileUtil.extractRelativePath(sourcefile, Config.maindir) } else { finalEp.filePath = sourcefile } @@ -706,10 +591,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { continue } // 注册参数为 source - const funcName = - finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const sourceFile = - finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' let scopeFile: string | null = null if (sourceFile) { if (Config.maindir && typeof Config.maindir === 'string') { @@ -720,8 +603,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } const paramMetas = - (Array.isArray((finalEp as any).params) && - (finalEp as any).params.length + (Array.isArray((finalEp as any).params) && (finalEp as any).params.length ? (finalEp as any).params : extractParamsFromAst(finalEp.fdef)) || [] if (paramMetas.length > 0) { @@ -750,8 +632,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const members = classNode.body || [] members.forEach((member: any) => { if (member.type !== 'FunctionDefinition') return - const memberName = - member.name?.name || member.name?.id?.name || member.id?.name + const memberName = member.name?.name || member.name?.id?.name || member.id?.name if (memberName) { value[memberName] = { vtype: 'fclos', diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 2ee70cc7..d23b434b 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -34,14 +34,7 @@ export const tornadoSourceAPIs = new Set([ 'get_secure_cookie', ]) -export const passthroughFuncs = new Set([ - 'decode', - 'strip', - 'replace', - 'lower', - 'upper', - 'split', -]) +export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) /** * @@ -54,11 +47,7 @@ export function isRequestAttributeAccess(node: any): boolean { if (inner?.type !== 'MemberAccess') return false const baseName = inner.object?.name const requestName = inner.property?.name - return ( - baseName === 'self' && - requestName === 'request' && - ['body', 'query', 'headers', 'cookies'].includes(propName) - ) + return baseName === 'self' && requestName === 'request' && ['body', 'query', 'headers', 'cookies'].includes(propName) } /** @@ -117,8 +106,7 @@ export function parseRoutePair(route: any): RoutePair | null { const { callee } = route const isUrlHelper = (callee.type === 'Identifier' && callee.name === 'url') || - (callee.type === 'MemberAccess' && - AstUtil.prettyPrint(callee).includes('url')) + (callee.type === 'MemberAccess' && AstUtil.prettyPrint(callee).includes('url')) if (isUrlHelper && Array.isArray(route.arguments)) { const [first, second] = route.arguments pathExpr = first @@ -139,10 +127,7 @@ export function parseRoutePair(route: any): RoutePair | null { * @param modulePath * @param currentFile */ -export function resolveImportPath( - modulePath: string, - currentFile: string, -): string | null { +export function resolveImportPath(modulePath: string, currentFile: string): string | null { if (!modulePath) return null const currentDir = path.dirname(currentFile) const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' @@ -160,30 +145,22 @@ export function resolveImportPath( * * @param stmt */ -export function extractImportEntries( - stmt: any, -): Array<{ local: string; imported?: string }> { +export function extractImportEntries(stmt: any): Array<{ local: string; imported?: string }> { const res: Array<{ local: string; imported?: string }> = [] const { init } = stmt if (!init) return res if (Array.isArray(init?.imports) && init.imports.length > 0) { for (const spec of init.imports) { - const local = - spec.local?.name || spec.local?.value || spec.name || spec.value - const imported = - spec.imported?.name || spec.imported?.value || spec.name || spec.value + const local = spec.local?.name || spec.local?.value || spec.name || spec.value + const imported = spec.imported?.name || spec.imported?.value || spec.name || spec.value if (local) res.push({ local, imported }) } return res } if (stmt.id?.name) { - const importedName = - init?.imported?.name || - init?.imported?.value || - init?.name?.name || - init?.name?.value + const importedName = init?.imported?.name || init?.imported?.value || init?.name?.name || init?.name?.value res.push({ local: stmt.id.name, imported: importedName }) } return res @@ -200,22 +177,13 @@ export function extractParamsFromAst(funcNode: any): ParamMeta[] { : Array.isArray(funcNode?.parameters) ? funcNode.parameters : [] - const fallbackLine = - typeof funcNode?.loc?.start?.line === 'number' - ? funcNode.loc.start.line - : 'all' + const fallbackLine = typeof funcNode?.loc?.start?.line === 'number' ? funcNode.loc.start.line : 'all' const result: ParamMeta[] = [] for (const param of rawParams) { const name = param?.id?.name || param?.name if (!name) continue - const locStart = - typeof param?.loc?.start?.line === 'number' - ? param.loc.start.line - : fallbackLine - const locEnd = - typeof param?.loc?.end?.line === 'number' - ? param.loc.end.line - : fallbackLine + const locStart = typeof param?.loc?.start?.line === 'number' ? param.loc.start.line : fallbackLine + const locEnd = typeof param?.loc?.end?.line === 'number' ? param.loc.end.line : fallbackLine result.push({ name, locStart, locEnd }) } return result From 20678809c5581214a25ccf3301a4e6bb15884f06 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Fri, 19 Dec 2025 20:07:31 -0800 Subject: [PATCH 08/35] Fix: update Python analyzer and Tornado taint checker rules --- .../rule_config_python.json | 7 +- src/checker/common/checker.ts | 5 +- src/checker/common/rules-basic-handler.ts | 6 +- src/checker/taint/common-kit/source-util.ts | 7 +- .../python/python-taint-abstract-checker.ts | 29 + .../taint/python/tornado-taint-checker.ts | 758 ++++++++++++------ src/checker/taint/python/tornado-util.ts | 56 +- src/engine/analyzer/common/analyzer.ts | 6 + .../analyzer/python/common/python-analyzer.ts | 54 +- 9 files changed, 679 insertions(+), 249 deletions(-) diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json index 93de362d..1b866057 100644 --- a/resource/example-rule-config/rule_config_python.json +++ b/resource/example-rule-config/rule_config_python.json @@ -1,6 +1,11 @@ [ { - "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"], + "checkerIds": [ + "taint_flow_python_input", + "taint_flow_python_input_inner", + "taint_flow_python_django_input", + "taint_flow_python_tornado_input" + ], "sources": { "FuncCallReturnValueTaintSource": [ { diff --git a/src/checker/common/checker.ts b/src/checker/common/checker.ts index 2cf693fc..db557105 100644 --- a/src/checker/common/checker.ts +++ b/src/checker/common/checker.ts @@ -40,7 +40,10 @@ class CheckerBase { */ loadRuleConfig(checker: any): void { const checkerId = checker.getCheckerId() - const ruleConfigContent = BasicRuleHandler.getRules() + // 路径从 checker/common 回到项目根的 config + const Config = require('../../config') + // 传入 Config.ruleConfigFile,如果为空则让 getRules 从 Config 读取 + const ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { for (const ruleConfig of ruleConfigContent) { if ( diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts index 86d3d69c..865dc894 100644 --- a/src/checker/common/rules-basic-handler.ts +++ b/src/checker/common/rules-basic-handler.ts @@ -18,8 +18,10 @@ let preprocessReady: boolean = false * * @param ruleConfigPath */ -function getRules(ruleConfigPath: string): any[] { - if (!rules) { +function getRules(ruleConfigPath?: string): any[] { + // 如果传入了 ruleConfigPath,或者 config.ruleConfigFile 已设置但 rules 未加载,则重新加载 + const currentRuleConfigFile = ruleConfigPath || config.ruleConfigFile + if (!rules || (currentRuleConfigFile && !rules)) { try { if (ruleConfigPath) { rules = FileUtil.loadJSONfile(ruleConfigPath) diff --git a/src/checker/taint/common-kit/source-util.ts b/src/checker/taint/common-kit/source-util.ts index 1eb2fa15..9c9778c5 100644 --- a/src/checker/taint/common-kit/source-util.ts +++ b/src/checker/taint/common-kit/source-util.ts @@ -171,7 +171,12 @@ function introduceTaintAtIdentifier(node: any, res: any, sourceScopeVal: any): a markTaintSource(res, { path: node, kind: val.kind }) } } - } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) { + } else if ( + node.loc.sourcefile && + node.loc.sourcefile.includes(val.scopeFile) && + nodeStart >= valStart && + nodeEnd <= valEnd + ) { markTaintSource(res, { path: node, kind: val.kind }) } } diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts index f4ac5fa1..30e00646 100644 --- a/src/checker/taint/python/python-taint-abstract-checker.ts +++ b/src/checker/taint/python/python-taint-abstract-checker.ts @@ -21,7 +21,36 @@ class PythonTaintAbstractChecker extends TaintChecker { * @param info */ triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) { + // Try normal matching first IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value) + + // If preprocess is not ready, still mark parameters that are in sourceScope + const BasicRuleHandler = require('../../common/rules-basic-handler') + if (!BasicRuleHandler.getPreprocessReady() && this.sourceScope.value && this.sourceScope.value.length > 0) { + for (const source of this.sourceScope.value) { + // Check if kind matches (could be string or array) + const kindMatches = + source.kind === 'PYTHON_INPUT' || (Array.isArray(source.kind) && source.kind.includes('PYTHON_INPUT')) + + if (source.path === node.name && kindMatches) { + // For path parameters, we use 'all' for all scope conditions, so always match + const shouldMatch = + (source.scopeFile === 'all' || !source.scopeFile) && + (source.scopeFunc === 'all' || !source.scopeFunc) && + (source.locStart === 'all' || !source.locStart) && + (source.locEnd === 'all' || !source.locEnd) + + if (shouldMatch && (!info.res._tags || info.res._tags.size === 0)) { + if (!info.res._tags) { + info.res._tags = new Set() + } + info.res._tags.add('PYTHON_INPUT') + info.res.hasTagRec = true + break + } + } + } + } } /** diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index fa905eba..08a57606 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -1,11 +1,14 @@ -import type { FileCache, RoutePair } from './tornado-util' - +const path = require('path') const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') -const { extractRelativePath } = require('../../../util/file-util') +const FileUtil = require('../../../util/file-util') + +const { extractRelativePath } = FileUtil const AstUtil = require('../../../util/ast-util') const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') const logger = require('../../../util/logger')(__filename) +const BasicRuleHandler = require('../../common/rules-basic-handler') +const { mergeAToB } = require('../../../util/common-util') const { isTornadoCall, parseRoutePair, @@ -14,14 +17,33 @@ const { extractParamsFromAst, tornadoSourceAPIs, passthroughFuncs, + isRequestAttributeExpression, + isRequestAttributeAccess, } = require('./tornado-util') +// Type definitions (moved from import to avoid module resolution issues) +interface FileCache { + vars: Map + classes: Map + importedSymbols: Map +} + +interface RoutePair { + path: string + handlerName: string + file?: string +} + /** * Tornado Taint Checker Base Class */ class TornadoTaintChecker extends PythonTaintAbstractChecker { private fileCache = new Map() + private cachedRuleConfigFile: string | null = null + + private cachedRuleConfigContent: any[] | null = null + /** * Helper function to mark a value as tainted * @param value @@ -41,6 +63,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ constructor(resultManager: any) { super(resultManager, 'taint_flow_python_tornado_input') + // 基类构造函数会调用 loadRuleConfig,但此时 Config.ruleConfigFile 可能还没有被设置 + // 所以我们在这里不加载规则配置,而是在 triggerAtStartOfAnalyze 中加载 } /** @@ -53,50 +77,81 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 重新加载规则配置(因为可能在构造函数时还没有设置 ruleConfigFile) - const BasicRuleHandler = require('../../common/rules-basic-handler') - // 尝试从命令行参数获取 ruleConfigFile - let { ruleConfigFile } = Config - if (!ruleConfigFile || ruleConfigFile === '') { - const args = process.argv - const ruleConfigIndex = args.indexOf('--ruleConfigFile') - if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { - ruleConfigFile = args[ruleConfigIndex + 1] - const path = require('path') - ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) + const ruleConfigFile: string | null = null + let ruleConfigContent: any[] | null = null + + const currentRuleConfigFile = Config.ruleConfigFile || this.getRuleConfigFileFromArgs() + + if (currentRuleConfigFile && currentRuleConfigFile !== '') { + try { + ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) + this.cachedRuleConfigFile = currentRuleConfigFile + this.cachedRuleConfigContent = ruleConfigContent + } catch (e: any) { + ruleConfigContent = [] + } + } else if (this.cachedRuleConfigContent !== null) { + // 使用缓存的配置内容 + ruleConfigContent = this.cachedRuleConfigContent + } else { + // 尝试从 BasicRuleHandler 获取(可能已经在构造函数中加载) + try { + ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) + if (ruleConfigContent && ruleConfigContent.length > 0) { + this.cachedRuleConfigContent = ruleConfigContent + } + } catch (e: any) { + ruleConfigContent = [] } } - try { - let ruleConfigContent: any[] = [] - if (ruleConfigFile && ruleConfigFile !== '') { - const FileUtil = require('../../../util/file-util') - ruleConfigContent = FileUtil.loadJSONfile(ruleConfigFile) - } else { - ruleConfigContent = BasicRuleHandler.getRules() - } - if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { - for (const ruleConfig of ruleConfigContent) { - if ( - ruleConfig.checkerIds && - ((Array.isArray(ruleConfig.checkerIds) && - ruleConfig.checkerIds.length > 0 && - ruleConfig.checkerIds.includes(this.getCheckerId())) || - ruleConfig.checkerIds === this.getCheckerId()) - ) { - const { mergeAToB } = require('../../../util/common-util') - mergeAToB(ruleConfig, this.checkerRuleConfigContent) + + // 应用规则配置 + const checkerId = this.getCheckerId() + + if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + for (const ruleConfig of ruleConfigContent) { + const checkerIds = Array.isArray(ruleConfig.checkerIds) + ? ruleConfig.checkerIds + : ruleConfig.checkerIds + ? [ruleConfig.checkerIds] + : [] + const matches = checkerIds.length > 0 && checkerIds.includes(checkerId) + + if (matches) { + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + + // 强制确保sinks被正确设置 + if (ruleConfig.sinks?.FuncCallTaintSink) { + this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} + this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink } } } - } catch (e: any) { - logger.warn(`Error reloading rule config: ${e?.message || e}`) } + // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) // 注册规则配置中的 source this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } + /** + * Get ruleConfigFile from command line arguments (cached) + * @returns The resolved ruleConfigFile path or empty string + */ + private getRuleConfigFileFromArgs(): string { + let { ruleConfigFile } = Config + if (!ruleConfigFile || ruleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + ruleConfigFile = args[ruleConfigIndex + 1] + ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) + } + } + return ruleConfigFile || '' + } + /** * Build a light-weight file cache for quick lookup. * @param analyzer @@ -128,7 +183,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (n.init?.type === 'ImportExpression') { const modulePath = n.init.from?.value || n.init.from?.name if (!modulePath) return true - const resolved = resolveImportPath(modulePath, fileName) + const resolved = resolveImportPath(modulePath, fileName, Config.maindir) if (!resolved) return true const entries = extractImportEntries(n) for (const entry of entries) { @@ -158,88 +213,135 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * On function calls, detect tornado Application/add_handlers and collect routes. + * On function call before execution, use argvalues to get resolved symbol values + * This replaces the old AST-based triggerAtFuncCallSyntax approach. + * Using symbol interpretation allows us to: + * 1. Get resolved symbol values for arguments (especially strings) via argvalues + * 2. Handle cases where route lists are obtained through function calls + * 3. Process route objects regardless of how they are obtained (variable, function call, etc.) * @param analyzer * @param scope * @param node * @param state - * @param _info + * @param info */ - triggerAtFuncCallSyntax(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { + triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + // 先调用基类方法 + super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) + + const { fclos, argvalues } = info + if (!fclos || !argvalues) return + const fileName = node.loc?.sourcefile if (!fileName) return - // Application(...) -> first arg is routes - if (isTornadoCall(node, 'Application')) { - const routeList = node.arguments?.[0] - if (routeList) { - this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) - } + // 检查是否是 Application 或 add_handlers 调用 + let routeListArgValue: any = null + const isApp = isTornadoCall(node, 'Application') + const isAddHandlers = isTornadoCall(node, 'add_handlers') + + if (isApp) { + // Application(...) -> first arg is routes + ;[routeListArgValue] = argvalues + } else if (isAddHandlers) { + // add_handlers(host, routes) -> second arg is routes + ;[, routeListArgValue] = argvalues } - // add_handlers(host, routes) -> second arg is routes - if (isTornadoCall(node, 'add_handlers')) { - const routeList = node.arguments?.[1] - if (routeList) { - this.collectTornadoEntrypointAndSource(analyzer, scope, state, routeList, fileName) - } + if (routeListArgValue) { + this.collectTornadoEntrypointAndSourceFromArgValue(analyzer, scope, state, routeListArgValue, fileName) } } /** - * Override triggerAtIdentifier to mark path parameters as sources + * Collect entrypoints and sources from resolved symbol values (from argvalues) * @param analyzer * @param scope - * @param node * @param state - * @param info + * @param routeListSymVal - Resolved symbol value for route list + * @param currentFile */ - triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 先调用基类方法 - super.triggerAtIdentifier(analyzer, scope, node, state, info) - // 如果基类方法没有标记(因为 preprocessReady=false),直接标记 - const { res } = info - if (res && this.sourceScope.value && this.sourceScope.value.length > 0) { - for (const val of this.sourceScope.value) { - if (val.path === node.name || res._sid === val.path || res._qid === val.path) { - // 检查作用域匹配 - const nodeStart = node.loc?.start?.line - const nodeEnd = node.loc?.end?.line - const valStart = val.locStart - const valEnd = val.locEnd - let shouldMark = false - if (valStart === 'all' && valEnd === 'all' && val.scopeFile === 'all' && val.scopeFunc === 'all') { - shouldMark = true - } else if ( - valStart === 'all' && - valEnd === 'all' && - val.scopeFile !== 'all' && - val.scopeFunc === 'all' && - typeof node.loc?.sourcefile === 'string' && - node.loc.sourcefile.includes(val.scopeFile) - ) { - shouldMark = true - } else if ( - node.loc?.sourcefile && - val.scopeFile && - node.loc.sourcefile.includes(val.scopeFile) && - typeof nodeStart === 'number' && - typeof valStart === 'number' && - typeof nodeEnd === 'number' && - typeof valEnd === 'number' && - nodeStart >= valStart && - nodeEnd <= valEnd - ) { - shouldMark = true - } - if (shouldMark && (!res._tags || !res._tags.has('PYTHON_INPUT'))) { - if (!res._tags) { - res._tags = new Set() + private collectTornadoEntrypointAndSourceFromArgValue( + analyzer: any, + scope: any, + state: any, + routeListSymVal: any, + currentFile: string + ): void { + if (!routeListSymVal) return + + const processed = new Set() + const routePairs = this.extractRoutesFromSymbolValue(routeListSymVal, currentFile, analyzer, scope, state) + + for (let i = 0; i < routePairs.length; i++) { + const pair = routePairs[i] + if (!pair.path || !pair.handlerName) { + continue + } + const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` + if (processed.has(dedupKey)) { + continue + } + processed.add(dedupKey) + + let handlerSymVal: any = null + let classAst: any = null + + // Helper function to process class AST and get handler symbol value + const processHandlerClass = (ast: any) => { + classAst = ast + try { + handlerSymVal = analyzer.processInstruction(scope, classAst, state) + if (!handlerSymVal || handlerSymVal.vtype !== 'class') { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} } - res._tags.add('PYTHON_INPUT') - res.hasTagRec = true + } + } catch (e) { + handlerSymVal = this.buildClassSymbol(classAst) + if (!handlerSymVal.field) { + handlerSymVal.field = {} + } + } + } + + // First, try to use handler symbol value directly from the route pair + if (pair.handlerSymVal) { + const handlerSym = pair.handlerSymVal + // If it's already a class symbol value, use it directly + if (handlerSym.vtype === 'class') { + handlerSymVal = handlerSym + classAst = handlerSym.ast || handlerSym.fdef + } else if (handlerSym.ast && handlerSym.ast.type === 'ClassDefinition') { + // If we have the AST, process it to get the class symbol value + processHandlerClass(handlerSym.ast) + } else { + // Try to resolve from identifier + const { handlerName } = pair + const handlerFile = pair.file || currentFile + const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) + if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { + processHandlerClass(handlerClassAst) } } + } else { + // Fallback: resolve handler class from name + const { handlerName } = pair + const handlerFile = pair.file || currentFile + const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) + if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { + processHandlerClass(handlerClassAst) + } + } + + // Ensure handlerSymVal has field structure + if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { + handlerSymVal.field = {} + } + + if (handlerSymVal && classAst) { + this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } } } @@ -251,19 +353,70 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param argvalues */ checkByNameMatch(node: any, fclos: any, argvalues: any) { + // 如果sinks配置为空,尝试从规则配置文件加载(延迟加载) + if (!this.checkerRuleConfigContent.sinks?.FuncCallTaintSink) { + const Config = require('../../../config') + const FileUtil = require('../../../util/file-util') + const path = require('path') + const { mergeAToB } = require('../../../util/common-util') + + let currentRuleConfigFile = Config.ruleConfigFile + if (!currentRuleConfigFile || currentRuleConfigFile === '') { + const args = process.argv + const ruleConfigIndex = args.indexOf('--ruleConfigFile') + if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { + currentRuleConfigFile = args[ruleConfigIndex + 1] + currentRuleConfigFile = path.isAbsolute(currentRuleConfigFile) + ? currentRuleConfigFile + : path.resolve(process.cwd(), currentRuleConfigFile) + } + } + + if (currentRuleConfigFile && currentRuleConfigFile !== '') { + try { + const ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) + if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { + const checkerId = this.getCheckerId() + for (const ruleConfig of ruleConfigContent) { + const checkerIds = Array.isArray(ruleConfig.checkerIds) + ? ruleConfig.checkerIds + : ruleConfig.checkerIds + ? [ruleConfig.checkerIds] + : [] + if (checkerIds.includes(checkerId)) { + mergeAToB(ruleConfig, this.checkerRuleConfigContent) + if (ruleConfig.sinks?.FuncCallTaintSink) { + this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} + this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink + } + } + } + } + } catch (e) { + // 忽略错误 + } + } + } + const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink + const callFull = this.getObj(fclos) + + // 如果还是没有rules,直接调用基类方法(基类可能会从其他地方获取规则) if (!rules || rules.length === 0) { + super.checkByNameMatch(node, fclos, argvalues) return } - const callFull = this.getObj(fclos) + if (!callFull) { super.checkByNameMatch(node, fclos, argvalues) return } // 检查是否有匹配的规则(支持部分匹配) + // 只匹配精确匹配或带点分隔符的部分匹配(如 os.system 匹配 syslib_from.os.system) + // 移除 callFull.endsWith(rule.fsig) 以避免误报(如 "system" 匹配 "test_system") const matchedRule = rules.find((rule: any) => { if (typeof rule.fsig !== 'string') return false - return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) || callFull.endsWith(rule.fsig) + return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) }) // 如果有匹配的规则,调用基类方法处理 if (matchedRule) { @@ -303,24 +456,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 处理 passthrough 函数(如 decode, strip 等) if (funcName && passthroughFuncs.has(funcName)) { - // 检查是否是 self.request.body.decode 等 source - // 对于 self.request.body.decode('utf-8'),AST 结构: - // node.callee.object.type = 'MemberAccess' (body) - // node.callee.object.object.type = 'MemberAccess' (request) - // node.callee.object.object.object.name = 'self' - if (node.callee?.type === 'MemberAccess' && node.callee.object) { - const bodyNode = node.callee.object - if ( - bodyNode.type === 'MemberAccess' && - bodyNode.property?.name === 'body' && - bodyNode.object?.type === 'MemberAccess' && - bodyNode.object.property?.name === 'request' && - bodyNode.object.object?.name === 'self' - ) { - // 直接标记返回值为 source(因为 self.request.body 是 source) - this.markAsTainted(ret) - return // 已经标记,不需要再检查 receiver - } + // 使用 isRequestAttributeExpression 统一检测 request 属性访问(如 self.request.body.decode) + // 这避免了重复的 AST 模式匹配逻辑,保持与 tornado-util.ts 的一致性 + if ( + node.callee?.type === 'MemberAccess' && + node.callee.object && + isRequestAttributeExpression(node.callee.object) + ) { + // 直接标记返回值为 source(因为 self.request.body/query/headers/cookies 等是 source) + this.markAsTainted(ret) + return // 已经标记,不需要再检查 receiver } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this @@ -330,9 +475,53 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } + /** + * Trigger before entrypoint execution + * Mark path parameters as tainted sources + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtSymbolInterpretOfEntryPointBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + const entryPointConfig = require('../../../engine/analyzer/common/current-entrypoint') + const entryPoint = entryPointConfig.getCurrentEntryPoint() + if (!entryPoint || !entryPoint.entryPointSymVal) return + + // Check if this entrypoint has path parameters that should be marked as tainted + const params = entryPoint.entryPointSymVal?.ast?.parameters + if (!params) return + + // Get parameter names from sourceScope + const paramNames = new Set() + for (const source of this.sourceScope.value) { + if (source.path && source.kind === 'PYTHON_INPUT') { + paramNames.add(source.path) + } + } + + // Mark matching parameters as tainted by processing them and marking the result + for (const key in params) { + const param = params[key] + const paramName = param?.id?.name || param?.name + if (paramName && paramNames.has(paramName) && paramName !== 'self') { + try { + // Process the parameter to get its symbol value + const paramSymVal = analyzer.processInstruction(entryPoint.entryPointSymVal, param.id || param, state) + if (paramSymVal) { + this.markAsTainted(paramSymVal) + } + } catch (e) { + // Ignore errors + } + } + } + } + /** * Handle Member Access Sources like self.request.body - * [Fixed]: Now checks AST node structure instead of symbolic result + * Reuses isRequestAttributeAccess from tornado-util.ts to maintain consistency * @param analyzer * @param scope * @param node @@ -342,18 +531,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { const { res } = info - if (node.type === 'MemberAccess' && node.object?.type === 'MemberAccess') { - const propName = node.property?.name - const subPropName = node.object?.property?.name - const baseObjName = node.object?.object?.name - - if ( - baseObjName === 'self' && - subPropName === 'request' && - ['body', 'query', 'headers', 'cookies'].includes(propName) - ) { - this.markAsTainted(res) - } + // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 + if (isRequestAttributeAccess(node)) { + this.markAsTainted(res) } } @@ -409,91 +589,210 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * Flatten route lists (handles BinaryExpression +) - * @param node - * @param currentFile + * Extract route pairs from resolved symbol values (from argvalues) + * @param routeListSymVal - Symbol value representing route list + * @param currentFile - Current file path + * @param analyzer + * @param scope + * @param state + * @returns Array of route pairs with handler symbol values */ - private normalizeRoutes(node: any, currentFile: string): RoutePair[] { - if (!node) return [] + private extractRoutesFromSymbolValue( + routeListSymVal: any, + currentFile: string, + analyzer?: any, + scope?: any, + state?: any + ): Array { + if (!routeListSymVal) return [] + + // Handle list/tuple symbol values + if (routeListSymVal.vtype === 'list' || routeListSymVal.vtype === 'tuple' || routeListSymVal.vtype === 'array') { + const elements = routeListSymVal.value || [] + return elements.flatMap((element: any) => + this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) + ) + } + + // Handle object type that might be a list (e.g., when symbol interpretation returns object for list literals) + // Check if it has numeric keys (0, 1, 2, ...) which indicates it's an array-like object + if (routeListSymVal.vtype === 'object' && routeListSymVal.value) { + const keys = Object.keys(routeListSymVal.value).filter((k) => /^\d+$/.test(k)) + if (keys.length > 0) { + // It's an array-like object, extract elements by numeric keys + const elements = keys.map((k) => routeListSymVal.value[k]) + return elements.flatMap((element: any) => + this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) + ) + } + } + + // Handle union types + if (routeListSymVal.vtype === 'union' && Array.isArray(routeListSymVal.value)) { + // Union type might represent a tuple (path, handler) + // Check if it has exactly 2 elements and try to extract as tuple + if (routeListSymVal.value.length === 2) { + const [pathSymVal, handlerSymVal] = routeListSymVal.value + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } + } + // Otherwise, recursively process each element + return routeListSymVal.value.flatMap((val: any) => + this.extractRoutesFromSymbolValue(val, currentFile, analyzer, scope, state) + ) + } - if (node.type === 'ListExpression' || node.type === 'ArrayExpression') { - const elements = node.elements || [] - return elements.flatMap((element: any) => this.normalizeRoutes(element, currentFile)) + // Handle tuple/route pair: (path, handler) + // Check if it's a tuple with 2 elements + if ( + routeListSymVal.vtype === 'tuple' && + Array.isArray(routeListSymVal.value) && + routeListSymVal.value.length >= 2 + ) { + const [pathSymVal, handlerSymVal] = routeListSymVal.value + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } } - if (node.type === 'BinaryExpression') { - return [...this.normalizeRoutes(node.left, currentFile), ...this.normalizeRoutes(node.right, currentFile)] + // Handle object type that represents a tuple (e.g., when tuple is represented as object with 0, 1 keys) + if ( + routeListSymVal.vtype === 'object' && + routeListSymVal.value && + routeListSymVal.value['0'] && + routeListSymVal.value['1'] + ) { + const pathSymVal = routeListSymVal.value['0'] + const handlerSymVal = routeListSymVal.value['1'] + const pathValue = this.extractStringFromSymbolValue(pathSymVal) + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + if (pathValue && handlerName) { + const file = + handlerSymVal?.ast?.loc?.sourcefile || + handlerSymVal?.fdef?.loc?.sourcefile || + handlerSymVal?.loc?.sourcefile || + currentFile + return [{ path: pathValue, handlerName, file, handlerSymVal }] + } } - if (node.type === 'ObjectExpression') { - const values = node.properties?.map((prop: any) => prop.value).filter(Boolean) || [] - return values.flatMap((value: any) => this.normalizeRoutes(value, node.loc?.sourcefile || currentFile)) + // Handle list concatenation via BinaryExpression (e.g., app_routes + [...]) + const astNode = routeListSymVal.ast + if (astNode && astNode.type === 'BinaryExpression' && astNode.operator === '+') { + try { + const pairs: Array = [] + const leftVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.left, state) : null + if (leftVal) { + pairs.push(...this.extractRoutesFromSymbolValue(leftVal, currentFile, analyzer, scope, state)) + } + const rightVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.right, state) : null + if (rightVal) { + pairs.push(...this.extractRoutesFromSymbolValue(rightVal, currentFile, analyzer, scope, state)) + } + if (pairs.length > 0) { + return pairs + } + } catch (e) { + // ignore and fallback to AST parse below + } } - if (node.type === 'Identifier') { - const target = this.resolveSymbol(node.name, currentFile) - if (!target) return [] - const targetFile = target.loc?.sourcefile || currentFile - return this.normalizeRoutes(target, targetFile) + // Fallback: try to parse from AST if available + if (routeListSymVal.ast) { + const pair = parseRoutePair(routeListSymVal.ast) + if (pair) { + const file = routeListSymVal.ast?.loc?.sourcefile || routeListSymVal.loc?.sourcefile || currentFile + return [{ ...pair, file }] + } } - const pair = parseRoutePair(node) - return pair ? [{ ...pair, file: currentFile }] : [] + return [] } /** - * Analyze routes and emit entrypoints & sources - * @param analyzer - * @param scope - * @param state - * @param routeList - * @param currentFile + * Extract string value from symbol value + * @param symVal - Symbol value + * @returns String value or null */ - private collectTornadoEntrypointAndSource( - analyzer: any, - scope: any, - state: any, - routeList: any, - currentFile: string - ) { - const processed = new Set() - const normalizedRoutes = this.normalizeRoutes(routeList, currentFile) - for (const pair of normalizedRoutes) { - if (!pair.path || !pair.handlerName) { - continue + private extractStringFromSymbolValue(symVal: any): string | null { + if (!symVal) return null + + // Direct string value + if (symVal.vtype === 'string' || symVal.vtype === 'literal') { + return typeof symVal.value === 'string' ? symVal.value : null + } + + // From AST + if (symVal.ast && (symVal.ast.type === 'StringLiteral' || symVal.ast.type === 'Literal')) { + return typeof symVal.ast.value === 'string' ? symVal.ast.value : null + } + + return null + } + + /** + * Extract handler name/class from symbol value + * @param handlerSymVal - Handler symbol value + * @returns Handler name or null + */ + private extractHandlerNameFromSymbolValue(handlerSymVal: any): string | null { + if (!handlerSymVal) return null + + // If it's a class symbol value + if (handlerSymVal.vtype === 'class') { + // Try to get class name from AST + if (handlerSymVal.ast?.id?.name) { + return handlerSymVal.ast.id.name } - const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` - if (processed.has(dedupKey)) { - continue + if (handlerSymVal.ast?.name?.name) { + return handlerSymVal.ast.name.name } - processed.add(dedupKey) - const classAst = this.resolveSymbol(pair.handlerName, pair.file || currentFile) - if (!classAst || classAst.type !== 'ClassDefinition') { - continue + // Try from _sid or _qid + if (handlerSymVal._sid) { + return handlerSymVal._sid } - const classFile = classAst.loc?.sourcefile || pair.file || currentFile - // 使用 analyzer.processInstruction 来处理类对象,确保有正确的结构 - let handlerSymVal: any - try { - handlerSymVal = analyzer.processInstruction(scope, classAst, state) - if (!handlerSymVal || handlerSymVal.vtype !== 'class') { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } - } - } catch (e) { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } + if (handlerSymVal._qid) { + const parts = handlerSymVal._qid.split('.') + return parts[parts.length - 1] } - // 确保 handlerSymVal 有 field 结构 - if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { - handlerSymVal.field = {} + } + + // If it's an identifier symbol value + if (handlerSymVal.vtype === 'identifier' || handlerSymVal.vtype === 'var') { + if (handlerSymVal._sid) { + return handlerSymVal._sid + } + if (handlerSymVal.ast?.name) { + return handlerSymVal.ast.name + } + } + + // From AST + if (handlerSymVal.ast) { + if (handlerSymVal.ast.type === 'Identifier') { + return handlerSymVal.ast.name + } + if (handlerSymVal.ast.type === 'ClassDefinition') { + return handlerSymVal.ast.id?.name || handlerSymVal.ast.name?.name || null } - this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) } + + return null } /** @@ -568,7 +867,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { - const FileUtil = require('../../../util/file-util') const { sourcefile } = finalEp.fdef.loc if (Config.maindir && typeof Config.maindir === 'string') { finalEp.filePath = FileUtil.extractRelativePath(sourcefile, Config.maindir) @@ -576,6 +874,10 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.filePath = sourcefile } } + // 确保 finalEp 有 ast,completeEntryPoint 可能需要它 + if (!finalEp.ast && finalEp.fdef) { + finalEp.ast = finalEp.fdef + } const entryPoint = completeEntryPoint(finalEp) // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 if ( @@ -586,39 +888,43 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { entryPoint.entryPointSymVal.parent.field = {} } analyzer.entryPoints.push(entryPoint) - } catch (e: any) { - logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) - continue - } - // 注册参数为 source - const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' - let scopeFile: string | null = null - if (sourceFile) { - if (Config.maindir && typeof Config.maindir === 'string') { - scopeFile = extractRelativePath(sourceFile, Config.maindir) - } else { - scopeFile = sourceFile + + // 注册参数为 source + const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' + let scopeFile: string | null = null + if (sourceFile) { + if (Config.maindir && typeof Config.maindir === 'string') { + scopeFile = extractRelativePath(sourceFile, Config.maindir) + } else { + scopeFile = sourceFile + } } - } - const paramMetas = - (Array.isArray((finalEp as any).params) && (finalEp as any).params.length - ? (finalEp as any).params - : extractParamsFromAst(finalEp.fdef)) || [] - if (paramMetas.length > 0) { - for (const meta of paramMetas) { - if (meta.name === 'self') continue - - this.sourceScope.value.push({ - path: meta.name, - kind: 'PYTHON_INPUT', - scopeFile, - scopeFunc: funcName || 'all', - locStart: meta.locStart, - locEnd: meta.locEnd, - }) + const paramMetas = + (Array.isArray((finalEp as any).params) && (finalEp as any).params.length + ? (finalEp as any).params + : extractParamsFromAst(finalEp.fdef)) || [] + if (paramMetas.length > 0) { + for (const meta of paramMetas) { + if (meta.name === 'self') continue + // 对于路径参数,使用 'all' 以匹配所有文件和位置,因为参数可能在函数定义的不同位置 + const sourceEntry = { + path: meta.name, + kind: 'PYTHON_INPUT', + scopeFile: 'all', // 使用 'all' 以匹配所有文件 + scopeFunc: 'all', // 使用 'all' 以匹配所有函数,因为 handler 方法可能在嵌套作用域中 + locStart: 'all', // 使用 'all' 以匹配所有行号 + locEnd: 'all', // 使用 'all' 以匹配所有行号 + } + this.sourceScope.value.push(sourceEntry) + // 立即注册 source,因为 triggerAtStartOfAnalyze 可能在 entrypoints 收集之前被调用 + this.addSourceTagForSourceScope('PYTHON_INPUT', [sourceEntry]) + } } + } catch (e: any) { + logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) + continue } } } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index d23b434b..383d28da 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -64,7 +64,7 @@ export function isRequestAttributeExpression(expr: any): boolean { } /** - * + * 用来判断是否是Tornado的请求函数,例如 * @param node * @param targetName */ @@ -104,9 +104,26 @@ export function parseRoutePair(route: any): RoutePair | null { handlerNode = second } else if (route.type === 'CallExpression' && route.callee) { const { callee } = route - const isUrlHelper = - (callee.type === 'Identifier' && callee.name === 'url') || - (callee.type === 'MemberAccess' && AstUtil.prettyPrint(callee).includes('url')) + + /** + * Check if callee is a URL helper function using AST node matching + * Supports: + * - url(...) - simple identifier + * - something.url(...) - member access + * - tornado.web.url(...) - nested member access chain + * This avoids unreliable string-based matching via prettyPrint + */ + const isIdentifierUrlHelper = callee.type === 'Identifier' && callee.name === 'url' + + const isMemberAccessUrlHelper = + callee.type === 'MemberAccess' && + // Check if the final property/member is 'url' + // Supports both 'property' and 'member' fields for different AST representations + ((callee.property && callee.property.type === 'Identifier' && callee.property.name === 'url') || + (callee.member && callee.member.type === 'Identifier' && callee.member.name === 'url')) + + const isUrlHelper = isIdentifierUrlHelper || isMemberAccessUrlHelper + if (isUrlHelper && Array.isArray(route.arguments)) { const [first, second] = route.arguments pathExpr = first @@ -123,21 +140,42 @@ export function parseRoutePair(route: any): RoutePair | null { } /** - * - * @param modulePath - * @param currentFile + * Resolve Python import path to file path + * @param modulePath - The import path (e.g., "handlers.user_handler" or ".handlers.user_handler") + * @param currentFile - The current file path + * @param mainDir - Optional project root directory for absolute imports + * @returns Resolved file path or null */ -export function resolveImportPath(modulePath: string, currentFile: string): string | null { +export function resolveImportPath(modulePath: string, currentFile: string, mainDir?: string): string | null { if (!modulePath) return null + const currentDir = path.dirname(currentFile) const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' - let baseDir = currentDir + let baseDir: string + if (leadingDots.length > 0) { + // Relative import: resolve from current file's directory baseDir = path.resolve(currentDir, '../'.repeat(leadingDots.length - 1)) + } else if (mainDir) { + // Absolute import: resolve from project root (mainDir) + baseDir = mainDir + } else { + // Fallback for absolute imports when mainDir is not provided. + // This is the original behavior and is likely incorrect. + baseDir = currentDir } + const remainder = modulePath.slice(leadingDots.length) const normalized = remainder ? remainder.split('.').join(path.sep) : '' const resolved = normalized ? path.resolve(baseDir, normalized) : baseDir + + // Check if it's a package (directory with __init__.py) + const fs = require('fs') + if (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory()) { + return path.join(resolved, '__init__.py') + } + + // Regular module file return `${resolved}.py` } diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index 924607dd..ce5eb983 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -312,11 +312,17 @@ class Analyzer extends MemSpace { } return this.recordCheckerFindings() } catch (e) { + console.error(`[DEBUG] Error in analyzeProjectAsync: ${e}`) + if (e && typeof e === 'object' && 'stack' in e) { + console.error(`[DEBUG] Error stack: ${(e as any).stack}`) + } handleException( e, 'Error occurred in analyzer analyzeProjectAsync', 'Error occurred in analyzer analyzeProjectAsync' ) + // Still return findings even if there was an error + return this.recordCheckerFindings() } } diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 427ad6df..aa99fada 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -105,19 +105,53 @@ class PythonAnalyzer extends (Analyzer as any) { return true } const hasAnalysised: any[] = [] - for (const entryPoint of entryPoints) { + for (let i = 0; i < entryPoints.length; i++) { + const entryPoint = entryPoints[i] if (entryPoint.type === constValue.ENGIN_START_FUNCALL) { - if ( - hasAnalysised.includes( - `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` - ) - ) { + // Serialize parameters properly to avoid [object Object] issue + // Use a custom serializer to handle circular references + const params = entryPoint.entryPointSymVal?.ast?.parameters + let paramsStr = '' + if (params) { + try { + // Try to serialize only the essential parts to avoid circular references + if (Array.isArray(params)) { + paramsStr = JSON.stringify( + params.map((p: any) => ({ + id: p?.id?.name || p?.id, + name: p?.name, + })) + ) + } else if (typeof params === 'object') { + // Extract only non-circular fields + const keys = Object.keys(params) + const simpleParams: any = {} + for (const key of keys) { + const val = params[key] + if (val && typeof val === 'object' && val.id) { + simpleParams[key] = { id: val.id?.name || val.id } + } else if (typeof val !== 'object' || val === null) { + simpleParams[key] = val + } + } + paramsStr = JSON.stringify(simpleParams) + } else { + paramsStr = String(params) + } + } catch (e) { + // Fallback: use a simple string representation + paramsStr = params.toString ? params.toString() : String(params) + } + } + // Include parent class name in key to distinguish handlers with same method name + const parentName = entryPoint?.entryPointSymVal?.parent?.id || entryPoint?.entryPointSymVal?.parent?.name || '' + const qid = entryPoint?.entryPointSymVal?._qid || '' + const entryKey = `${entryPoint.filePath}.${entryPoint.functionName}/${parentName}/${qid}#${paramsStr}.${entryPoint.attribute}` + if (hasAnalysised.includes(entryKey)) { continue } - hasAnalysised.push( - `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` - ) + hasAnalysised.push(entryKey) entryPointConfig.setCurrentEntryPoint(entryPoint) logger.info( 'EntryPoint [%s.%s] is executing', @@ -176,11 +210,13 @@ class PythonAnalyzer extends (Analyzer as any) { entryPoint.entryPointSymVal?.parent ) } catch (e) { + console.error(`[DEBUG] Error executing entrypoint [${i}]: ${e}`) handleException( e, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file`, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file` ) + // Continue to next entrypoint instead of breaking } this.checkerManager.checkAtSymbolInterpretOfEntryPointAfter(this, null, null, null, null) } else if (entryPoint.type === constValue.ENGIN_START_FILE_BEGIN) { From b08beadf08557de08b4a7fb8c139e60114bbc51c Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 02:55:36 -0800 Subject: [PATCH 09/35] Fix: update tornado framework --- src/checker/common/rules-basic-handler.ts | 6 ++---- src/checker/taint/python/tornado-taint-checker.ts | 2 ++ src/checker/taint/python/tornado-util.ts | 2 ++ src/engine/analyzer/common/analyzer.ts | 4 ---- src/engine/analyzer/python/common/python-analyzer.ts | 1 - 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/checker/common/rules-basic-handler.ts b/src/checker/common/rules-basic-handler.ts index 865dc894..86d3d69c 100644 --- a/src/checker/common/rules-basic-handler.ts +++ b/src/checker/common/rules-basic-handler.ts @@ -18,10 +18,8 @@ let preprocessReady: boolean = false * * @param ruleConfigPath */ -function getRules(ruleConfigPath?: string): any[] { - // 如果传入了 ruleConfigPath,或者 config.ruleConfigFile 已设置但 rules 未加载,则重新加载 - const currentRuleConfigFile = ruleConfigPath || config.ruleConfigFile - if (!rules || (currentRuleConfigFile && !rules)) { +function getRules(ruleConfigPath: string): any[] { + if (!rules) { try { if (ruleConfigPath) { rules = FileUtil.loadJSONfile(ruleConfigPath) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 08a57606..34548bd0 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -161,6 +161,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param _info */ triggerAtCompileUnit(analyzer: any, scope: any, node: any, _state: any, _info: any): boolean | undefined { + if (Config.entryPointMode === 'ONLY_CUSTOM') return const fileName = node.loc?.sourcefile if (!fileName) return @@ -232,6 +233,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const { fclos, argvalues } = info if (!fclos || !argvalues) return + if (Config.entryPointMode === 'ONLY_CUSTOM') return const fileName = node.loc?.sourcefile if (!fileName) return diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 383d28da..cf219aee 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -32,6 +32,8 @@ export const tornadoSourceAPIs = new Set([ 'get_body_arguments', 'get_cookie', 'get_secure_cookie', + 'get_arguments', + 'get_json_body', ]) export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index ce5eb983..1c923196 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -312,10 +312,6 @@ class Analyzer extends MemSpace { } return this.recordCheckerFindings() } catch (e) { - console.error(`[DEBUG] Error in analyzeProjectAsync: ${e}`) - if (e && typeof e === 'object' && 'stack' in e) { - console.error(`[DEBUG] Error stack: ${(e as any).stack}`) - } handleException( e, 'Error occurred in analyzer analyzeProjectAsync', diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index aa99fada..ede8c2bd 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -210,7 +210,6 @@ class PythonAnalyzer extends (Analyzer as any) { entryPoint.entryPointSymVal?.parent ) } catch (e) { - console.error(`[DEBUG] Error executing entrypoint [${i}]: ${e}`) handleException( e, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file`, From d23f87b4064ad8242b401833d02503301203fdda Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:07:33 -0800 Subject: [PATCH 10/35] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 34548bd0..d13cda6a 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -531,6 +531,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { + if (Config.entryPointMode === 'ONLY_CUSTOM') return const { res } = info // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 From 9f58641bd9e217f5acb6c08084d5cd654c54b084 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:14:09 -0800 Subject: [PATCH 11/35] Fix: update tornado --- src/checker/taint/common-kit/source-util.ts | 7 +------ src/engine/analyzer/common/analyzer.ts | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/checker/taint/common-kit/source-util.ts b/src/checker/taint/common-kit/source-util.ts index 9c9778c5..1eb2fa15 100644 --- a/src/checker/taint/common-kit/source-util.ts +++ b/src/checker/taint/common-kit/source-util.ts @@ -171,12 +171,7 @@ function introduceTaintAtIdentifier(node: any, res: any, sourceScopeVal: any): a markTaintSource(res, { path: node, kind: val.kind }) } } - } else if ( - node.loc.sourcefile && - node.loc.sourcefile.includes(val.scopeFile) && - nodeStart >= valStart && - nodeEnd <= valEnd - ) { + } else if (node.loc.sourcefile.includes(val.scopeFile) && nodeStart >= valStart && nodeEnd <= valEnd) { markTaintSource(res, { path: node, kind: val.kind }) } } diff --git a/src/engine/analyzer/common/analyzer.ts b/src/engine/analyzer/common/analyzer.ts index 1c923196..924607dd 100644 --- a/src/engine/analyzer/common/analyzer.ts +++ b/src/engine/analyzer/common/analyzer.ts @@ -317,8 +317,6 @@ class Analyzer extends MemSpace { 'Error occurred in analyzer analyzeProjectAsync', 'Error occurred in analyzer analyzeProjectAsync' ) - // Still return findings even if there was an error - return this.recordCheckerFindings() } } From fbd597844d0cbbfc316f73232144b5b222670169 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 22 Dec 2025 03:19:07 -0800 Subject: [PATCH 12/35] Fix: update tornado --- src/checker/common/checker.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/checker/common/checker.ts b/src/checker/common/checker.ts index db557105..2cf693fc 100644 --- a/src/checker/common/checker.ts +++ b/src/checker/common/checker.ts @@ -40,10 +40,7 @@ class CheckerBase { */ loadRuleConfig(checker: any): void { const checkerId = checker.getCheckerId() - // 路径从 checker/common 回到项目根的 config - const Config = require('../../config') - // 传入 Config.ruleConfigFile,如果为空则让 getRules 从 Config 读取 - const ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) + const ruleConfigContent = BasicRuleHandler.getRules() if (Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { for (const ruleConfig of ruleConfigContent) { if ( From 6c0fcd48e64774de6abde0b884186dd3b5a17b50 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Fri, 26 Dec 2025 22:44:37 -0800 Subject: [PATCH 13/35] update tornado --- .../python/python-taint-abstract-checker.ts | 31 +------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts index 30e00646..907d9e2a 100644 --- a/src/checker/taint/python/python-taint-abstract-checker.ts +++ b/src/checker/taint/python/python-taint-abstract-checker.ts @@ -21,36 +21,7 @@ class PythonTaintAbstractChecker extends TaintChecker { * @param info */ triggerAtIdentifier(analyzer: any, scope: any, node: any, state: any, info: any) { - // Try normal matching first IntroduceTaint.introduceTaintAtIdentifier(node, info.res, this.sourceScope.value) - - // If preprocess is not ready, still mark parameters that are in sourceScope - const BasicRuleHandler = require('../../common/rules-basic-handler') - if (!BasicRuleHandler.getPreprocessReady() && this.sourceScope.value && this.sourceScope.value.length > 0) { - for (const source of this.sourceScope.value) { - // Check if kind matches (could be string or array) - const kindMatches = - source.kind === 'PYTHON_INPUT' || (Array.isArray(source.kind) && source.kind.includes('PYTHON_INPUT')) - - if (source.path === node.name && kindMatches) { - // For path parameters, we use 'all' for all scope conditions, so always match - const shouldMatch = - (source.scopeFile === 'all' || !source.scopeFile) && - (source.scopeFunc === 'all' || !source.scopeFunc) && - (source.locStart === 'all' || !source.locStart) && - (source.locEnd === 'all' || !source.locEnd) - - if (shouldMatch && (!info.res._tags || info.res._tags.size === 0)) { - if (!info.res._tags) { - info.res._tags = new Set() - } - info.res._tags.add('PYTHON_INPUT') - info.res.hasTagRec = true - break - } - } - } - } } /** @@ -216,4 +187,4 @@ class PythonTaintAbstractChecker extends TaintChecker { } } -module.exports = PythonTaintAbstractChecker +module.exports = PythonTaintAbstractChecker \ No newline at end of file From 4aad909ddef99299fced20f2afe55fab28143aea Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Sat, 27 Dec 2025 23:47:05 -0800 Subject: [PATCH 14/35] Fix: update tornado --- .../python/python-taint-abstract-checker.ts | 1 + .../taint/python/tornado-taint-checker.ts | 383 ++++++++++++------ src/checker/taint/python/tornado-util.ts | 54 ++- .../analyzer/python/common/python-analyzer.ts | 15 + 4 files changed, 338 insertions(+), 115 deletions(-) diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts index 907d9e2a..612837bf 100644 --- a/src/checker/taint/python/python-taint-abstract-checker.ts +++ b/src/checker/taint/python/python-taint-abstract-checker.ts @@ -70,6 +70,7 @@ class PythonTaintAbstractChecker extends TaintChecker { let rule = matchSinkAtFuncCall(node, fclos, rules) rule = rule.length > 0 ? rule[0] : null + if (rule) { this.findArgsAndAddNewFinding(node, argvalues, fclos, rule) } diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index d13cda6a..c2f71aa8 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -19,7 +19,9 @@ const { passthroughFuncs, isRequestAttributeExpression, isRequestAttributeAccess, + extractTornadoParams, } = require('./tornado-util') +const { markTaintSource } = require('../common-kit/source-util') // Type definitions (moved from import to avoid module resolution issues) interface FileCache { @@ -47,14 +49,11 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { /** * Helper function to mark a value as tainted * @param value + * @param node Optional node for trace */ - private markAsTainted(value: any): void { + private markAsTainted(value: any, node?: any): void { if (!value) return - if (!value._tags) { - value._tags = new Set() - } - value._tags.add('PYTHON_INPUT') - value.hasTagRec = true + markTaintSource(value, { path: node || value.ast || {}, kind: 'PYTHON_INPUT' }) } /** @@ -77,10 +76,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - const ruleConfigFile: string | null = null - let ruleConfigContent: any[] | null = null - - const currentRuleConfigFile = Config.ruleConfigFile || this.getRuleConfigFileFromArgs() + const currentRuleConfigFile = Config.ruleConfigFile + let ruleConfigContent: any[] = [] if (currentRuleConfigFile && currentRuleConfigFile !== '') { try { @@ -119,12 +116,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (matches) { mergeAToB(ruleConfig, this.checkerRuleConfigContent) - - // 强制确保sinks被正确设置 - if (ruleConfig.sinks?.FuncCallTaintSink) { - this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} - this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink - } } } } @@ -135,22 +126,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } - /** - * Get ruleConfigFile from command line arguments (cached) - * @returns The resolved ruleConfigFile path or empty string - */ - private getRuleConfigFileFromArgs(): string { - let { ruleConfigFile } = Config - if (!ruleConfigFile || ruleConfigFile === '') { - const args = process.argv - const ruleConfigIndex = args.indexOf('--ruleConfigFile') - if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { - ruleConfigFile = args[ruleConfigIndex + 1] - ruleConfigFile = path.isAbsolute(ruleConfigFile) ? ruleConfigFile : path.resolve(process.cwd(), ruleConfigFile) - } - } - return ruleConfigFile || '' - } /** * Build a light-weight file cache for quick lookup. @@ -160,7 +135,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param _state * @param _info */ - triggerAtCompileUnit(analyzer: any, scope: any, node: any, _state: any, _info: any): boolean | undefined { + triggerAtCompileUnit(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { if (Config.entryPointMode === 'ONLY_CUSTOM') return const fileName = node.loc?.sourcefile if (!fileName) return @@ -171,10 +146,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { importedSymbols: new Map(), } + // First pass: collect all variables, classes, and assignments + const allAssignments: Map = new Map() + const applicationCalls: any[] = [] + AstUtil.visit(node, { AssignmentExpression: (n: any) => { if (n.left?.type === 'Identifier' && n.left.name) { cache.vars.set(n.left.name, { value: n.right, file: fileName }) + allAssignments.set(n.left.name, n.right) } return true }, @@ -198,6 +178,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } if (n.init) { cache.vars.set(localName, { value: n.init, file: fileName }) + allAssignments.set(localName, n.init) } return true }, @@ -208,11 +189,185 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } return true }, + // Collect Tornado Application calls + CallExpression: (n: any) => { + if (this.isTornadoApplicationCallAst(n)) { + applicationCalls.push(n) + } + return true + }, }) this.fileCache.set(fileName, cache) + + // Second pass: process Application calls with fully populated variable map + const routesByHandler: Map = new Map() + + for (const callNode of applicationCalls) { + // Extract handlers argument + const handlersArg = this.extractHandlersArgFromCallAst(callNode) + if (!handlersArg) continue + + // Parse routes from handlers list, using allAssignments for variable resolution + const routes = this.parseRoutesFromAstWithAssignments(handlersArg, fileName, allAssignments) + + for (const route of routes) { + if (!routesByHandler.has(route.handlerName)) { + routesByHandler.set(route.handlerName, []) + } + const classAst = cache.classes.get(route.handlerName)?.value + if (classAst) { + routesByHandler.get(route.handlerName)!.push({ + classAst, + urlPattern: route.path, + }) + } + } + } + + // Register entrypoints from detected routes + for (const [handlerName, routeInfos] of routesByHandler) { + for (const routeInfo of routeInfos) { + const handlerSymVal = this.buildClassSymbol(routeInfo.classAst) + this.emitHandlerEntrypoints(analyzer, handlerSymVal, routeInfo.urlPattern, routeInfo.classAst, scope, state) + } + } } + /** + * Parse routes from handlers list AST with assignment map for variable resolution + * @param handlersAst - AST node for handlers + * @param fileName - Current file name + * @param assignments - Map of variable name to value AST + */ + private parseRoutesFromAstWithAssignments( + handlersAst: any, + fileName: string, + assignments: Map + ): RoutePair[] { + const routes: RoutePair[] = [] + if (!handlersAst) return routes + + // Handle identifier reference to a variable + if (handlersAst.type === 'Identifier') { + const valueAst = assignments.get(handlersAst.name) + if (valueAst) { + return this.parseRoutesFromAstWithAssignments(valueAst, fileName, assignments) + } + return routes + } + + // Handle ObjectExpression (Python list parsed as object with numeric keys) + if (handlersAst.type === 'ObjectExpression') { + const properties = handlersAst.properties || [] + for (const prop of properties) { + const valueNode = prop.value + if (valueNode?.type === 'TupleExpression') { + const pair = this.parseRouteTuple(valueNode) + if (pair) { + routes.push({ ...pair, file: fileName }) + } + } + } + return routes + } + + // Handle list/array expression + if (handlersAst.type === 'ArrayExpression' || handlersAst.type === 'ListExpression') { + const elements = handlersAst.elements || [] + for (const element of elements) { + const pair = parseRoutePair(element) + if (pair) { + routes.push({ ...pair, file: fileName }) + } + } + } + + return routes + } + + /** + * Parse a route tuple AST into a RoutePair + * @param tupleAst - TupleExpression AST node + */ + private parseRouteTuple(tupleAst: any): RoutePair | null { + if (!tupleAst || tupleAst.type !== 'TupleExpression') return null + const elements = tupleAst.elements || [] + if (elements.length < 2) return null + + const pathNode = elements[0] + const handlerNode = elements[1] + + const pathValue = pathNode?.type === 'Literal' ? pathNode.value : null + const handlerName = handlerNode?.type === 'Identifier' ? handlerNode.name : null + + if (typeof pathValue === 'string' && handlerName) { + return { path: pathValue, handlerName } + } + return null + } + + /** + * Check if a CallExpression AST node is a Tornado Application call + * Supports patterns: + * - tornado.web.Application.__init__(self, handlers, ...) + * - Application.__init__(self, handlers, ...) + * - tornado.web.Application(handlers, ...) + * - Application(handlers, ...) + * @param node - CallExpression AST node + */ + private isTornadoApplicationCallAst(node: any): boolean { + if (!node || node.type !== 'CallExpression' || !node.callee) return false + const { callee } = node + // logger.info(`Checking CallExpression for Tornado Application: ${AstUtil.prettyPrint(callee)}`) + + // Pattern 1: Direct Application call - Application(...) + if (callee.type === 'Identifier' && callee.name === 'Application') { + return true + } + + // Pattern 2: MemberAccess ending with Application - tornado.web.Application(...) + if (callee.type === 'MemberAccess' && callee.property?.name === 'Application') { + return true + } + + // Pattern 3: __init__ call on Application - tornado.web.Application.__init__(...) + if (callee.type === 'MemberAccess' && callee.property?.name === '__init__') { + let current = callee.object + while (current) { + if (current.type === 'Identifier' && current.name === 'Application') { + return true + } + if (current.type === 'MemberAccess' && current.property?.name === 'Application') { + return true + } + current = current.type === 'MemberAccess' ? current.object : null + } + } + + return false + } + + /** + * Extract handlers argument from Tornado Application call AST + * @param node - CallExpression AST node + */ + private extractHandlersArgFromCallAst(node: any): any { + if (!node.arguments || node.arguments.length === 0) return null + const { callee } = node + + // Check if this is an __init__ call (first arg is self) + const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' + + if (isInitCall && node.arguments.length >= 2) { + // __init__(self, handlers, ...) -> handlers is at index 1 + return node.arguments[1] + } + // Application(handlers, ...) -> handlers is at index 0 + return node.arguments[0] + } + + /** * On function call before execution, use argvalues to get resolved symbol values * This replaces the old AST-based triggerAtFuncCallSyntax approach. @@ -243,8 +398,17 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const isAddHandlers = isTornadoCall(node, 'add_handlers') if (isApp) { - // Application(...) -> first arg is routes - ;[routeListArgValue] = argvalues + // Check if this is an __init__ call pattern: Application.__init__(self, handlers, ...) + // In this case, handlers is the second argument (index 1) + const callee = node.callee + const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' + if (isInitCall) { + // __init__(self, handlers, ...) -> handlers is at index 1 + routeListArgValue = argvalues[1] + } else { + // Application(handlers, ...) -> handlers is at index 0 + ;[routeListArgValue] = argvalues + } } else if (isAddHandlers) { // add_handlers(host, routes) -> second arg is routes ;[, routeListArgValue] = argvalues @@ -349,80 +513,49 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * Override checkByNameMatch to support partial matching (e.g., os.system matches syslib_from.os.system) - * @param node - * @param fclos - * @param argvalues + * Proactive Sink Matching + * Overrides base class to add flexible matching for common Python sinks (DB, Shell) + * that might be missed due to incomplete type resolution. */ - checkByNameMatch(node: any, fclos: any, argvalues: any) { - // 如果sinks配置为空,尝试从规则配置文件加载(延迟加载) - if (!this.checkerRuleConfigContent.sinks?.FuncCallTaintSink) { - const Config = require('../../../config') - const FileUtil = require('../../../util/file-util') - const path = require('path') - const { mergeAToB } = require('../../../util/common-util') - - let currentRuleConfigFile = Config.ruleConfigFile - if (!currentRuleConfigFile || currentRuleConfigFile === '') { - const args = process.argv - const ruleConfigIndex = args.indexOf('--ruleConfigFile') - if (ruleConfigIndex >= 0 && ruleConfigIndex < args.length - 1) { - currentRuleConfigFile = args[ruleConfigIndex + 1] - currentRuleConfigFile = path.isAbsolute(currentRuleConfigFile) - ? currentRuleConfigFile - : path.resolve(process.cwd(), currentRuleConfigFile) - } - } - - if (currentRuleConfigFile && currentRuleConfigFile !== '') { - try { - const ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) - if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { - const checkerId = this.getCheckerId() - for (const ruleConfig of ruleConfigContent) { - const checkerIds = Array.isArray(ruleConfig.checkerIds) - ? ruleConfig.checkerIds - : ruleConfig.checkerIds - ? [ruleConfig.checkerIds] - : [] - if (checkerIds.includes(checkerId)) { - mergeAToB(ruleConfig, this.checkerRuleConfigContent) - if (ruleConfig.sinks?.FuncCallTaintSink) { - this.checkerRuleConfigContent.sinks = this.checkerRuleConfigContent.sinks || {} - this.checkerRuleConfigContent.sinks.FuncCallTaintSink = ruleConfig.sinks.FuncCallTaintSink - } - } - } - } - } catch (e) { - // 忽略错误 - } - } + checkByNameMatch(node: any, fclos: any, argvalues: any): void { + // 1. Try standard matching first + super.checkByNameMatch(node, fclos, argvalues) + + // 2. Proactive matching for critical sinks if no finding was generated yet + // We look for common method names regardless of the receiver's inferred type + const funcName = node.callee?.property?.name || node.callee?.name + if (!funcName) return + + const proactiveSinks: Record = { + execute: 'PythonSqlInjection', + popen: 'PythonCommandInjection', + system: 'PythonCommandInjection', } - const rules = this.checkerRuleConfigContent.sinks?.FuncCallTaintSink - const callFull = this.getObj(fclos) - - // 如果还是没有rules,直接调用基类方法(基类可能会从其他地方获取规则) - if (!rules || rules.length === 0) { - super.checkByNameMatch(node, fclos, argvalues) - return - } - - if (!callFull) { - super.checkByNameMatch(node, fclos, argvalues) - return - } - // 检查是否有匹配的规则(支持部分匹配) - // 只匹配精确匹配或带点分隔符的部分匹配(如 os.system 匹配 syslib_from.os.system) - // 移除 callFull.endsWith(rule.fsig) 以避免误报(如 "system" 匹配 "test_system") - const matchedRule = rules.find((rule: any) => { - if (typeof rule.fsig !== 'string') return false - return rule.fsig === callFull || callFull.endsWith(`.${rule.fsig}`) - }) - // 如果有匹配的规则,调用基类方法处理 - if (matchedRule) { - super.checkByNameMatch(node, fclos, argvalues) + if (proactiveSinks[funcName]) { + // Check if any argument is tainted + const taintedArg = argvalues.find((arg: any) => arg && (arg.taint || arg.hasTagRec || arg._tags?.has('PYTHON_INPUT'))) + if (taintedArg) { + // Construct a manual finding if not already found + const attribute = proactiveSinks[funcName] + const ruleName = `${funcName} (Proactive Match)\nSINK Attribute: ${attribute}` + + const taintFlowFinding = this.buildTaintFinding( + this.getCheckerId(), + this.desc, + node, + taintedArg, + fclos, + 'PYTHON_INPUT', + ruleName, + [] // No specific sanitizers for proactive match + ) + + const TaintOutputStrategy = require('../../common/output/taint-output-strategy') + if (TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) { + this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId) + } + } } } @@ -437,7 +570,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { // 先调用基类方法处理规则配置中的 source super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) - + if (Config.entryPointMode === 'ONLY_CUSTOM') return const { fclos, ret } = info if (!fclos || !ret) { return @@ -453,7 +586,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 检查是否是 tornado source API 调用(如 get_argument) if (funcName && tornadoSourceAPIs.has(funcName)) { - this.markAsTainted(ret) + this.markAsTainted(ret, node) } // 处理 passthrough 函数(如 decode, strip 等) @@ -466,13 +599,13 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { isRequestAttributeExpression(node.callee.object) ) { // 直接标记返回值为 source(因为 self.request.body/query/headers/cookies 等是 source) - this.markAsTainted(ret) + this.markAsTainted(ret, node) return // 已经标记,不需要再检查 receiver } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this if (receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT'))) { - this.markAsTainted(ret) + this.markAsTainted(ret, node) } } } @@ -512,7 +645,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // Process the parameter to get its symbol value const paramSymVal = analyzer.processInstruction(entryPoint.entryPointSymVal, param.id || param, state) if (paramSymVal) { - this.markAsTainted(paramSymVal) + this.markAsTainted(paramSymVal, param.id || param) } } catch (e) { // Ignore errors @@ -536,7 +669,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 if (isRequestAttributeAccess(node)) { - this.markAsTainted(res) + this.markAsTainted(res, node) } } @@ -866,7 +999,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.ast = finalEp.fdef } if (!finalEp.functionName) { - finalEp.functionName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const rawFuncName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + finalEp.functionName = handlerName ? `${handlerName}.${rawFuncName}` : rawFuncName } // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { @@ -882,6 +1017,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { finalEp.ast = finalEp.fdef } const entryPoint = completeEntryPoint(finalEp) + entryPoint.urlPattern = urlPattern + entryPoint.handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 if ( entryPoint.entryPointSymVal?.parent && @@ -904,13 +1041,30 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } + const params = extractTornadoParams(urlPattern) const paramMetas = (Array.isArray((finalEp as any).params) && (finalEp as any).params.length ? (finalEp as any).params : extractParamsFromAst(finalEp.fdef)) || [] if (paramMetas.length > 0) { + let positionalIdx = 0 for (const meta of paramMetas) { if (meta.name === 'self') continue + + let isSource = false + if (params.named.length > 0) { + if (params.named.includes(meta.name)) { + isSource = true + } + } else if (params.positionalCount > 0) { + if (positionalIdx < params.positionalCount) { + isSource = true + } + } + positionalIdx++ + + if (!isSource) continue + // 对于路径参数,使用 'all' 以匹配所有文件和位置,因为参数可能在函数定义的不同位置 const sourceEntry = { path: meta.name, @@ -939,6 +1093,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { private buildClassSymbol(classNode: any): any { const value: any = {} const members = classNode.body || [] + const className = classNode.name?.name || classNode.id?.name || 'UnknownClass' members.forEach((member: any) => { if (member.type !== 'FunctionDefinition') return const memberName = member.name?.name || member.name?.id?.name || member.id?.name @@ -951,7 +1106,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } }) - return { vtype: 'class', value } + return { vtype: 'class', value, ast: classNode } } } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index cf219aee..fe34fcd1 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -49,7 +49,11 @@ export function isRequestAttributeAccess(node: any): boolean { if (inner?.type !== 'MemberAccess') return false const baseName = inner.object?.name const requestName = inner.property?.name - return baseName === 'self' && requestName === 'request' && ['body', 'query', 'headers', 'cookies'].includes(propName) + return ( + baseName === 'self' && + requestName === 'request' && + ['body', 'query', 'headers', 'cookies', 'files', 'uri', 'path', 'arguments', 'query_arguments', 'body_arguments'].includes(propName) + ) } /** @@ -79,6 +83,22 @@ export function isTornadoCall(node: any, targetName: string): boolean { if (callee.type === 'Identifier' && callee.name === targetName) { return true } + // Handle pattern: tornado.web.Application.__init__(self, handlers, ...) + // In this case, we need to check if 'Application' is in the member access chain + // and the final property is '__init__' + if (callee.type === 'MemberAccess' && callee.property?.name === '__init__') { + // Check if any part of the member access chain matches the targetName + let current = callee.object + while (current) { + if (current.type === 'Identifier' && current.name === targetName) { + return true + } + if (current.type === 'MemberAccess' && current.property?.name === targetName) { + return true + } + current = current.type === 'MemberAccess' ? current.object : null + } + } return false } @@ -228,3 +248,35 @@ export function extractParamsFromAst(funcNode: any): ParamMeta[] { } return result } + +/** + * Extract named parameter names or positional count from Tornado URL patterns (regex) + * Supports pattern like (?P...) or (...) + * @param pattern - Tornado URL regex pattern + */ +export function extractTornadoParams(pattern: string): { named: string[]; positionalCount: number } { + if (!pattern) return { named: [], positionalCount: 0 } + + const namedGroups: string[] = [] + const namedRegex = /\(\?P<(\w+)>/g + let match: RegExpExecArray | null + while ((match = namedRegex.exec(pattern)) !== null) { + namedGroups.push(match[1]) + } + + if (namedGroups.length > 0) { + return { named: namedGroups, positionalCount: 0 } + } + + // Count positional groups. + // Remove escaped parens first. + const cleaned = pattern.replace(/\\\(|\\\)/g, '') + let positionalCount = 0 + // Matches '(' NOT followed by '?' (which covers (?:, (?P<, (?=, (?!, etc.) + const positionalRegex = /\((?!\?)/g + while (positionalRegex.exec(cleaned) !== null) { + positionalCount++ + } + + return { named: [], positionalCount } +} diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index ede8c2bd..8bb8f48a 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -273,6 +273,21 @@ class PythonAnalyzer extends (Analyzer as any) { const has_tag = (new_left && new_left.hasTagRec) || (new_right && new_right.hasTagRec) if (has_tag) { new_node.hasTagRec = has_tag + // Propagate tags and trace + new_node._tags = new Set() + if (new_left?._tags) { + for (const t of new_left._tags) new_node._tags.add(t) + } + if (new_right?._tags) { + for (const t of new_right._tags) new_node._tags.add(t) + } + + // Merge traces if possible, or just take one if not + if (new_left?.trace || new_right?.trace) { + new_node.trace = [] + if (new_left?.trace) new_node.trace.push(...new_left.trace) + if (new_right?.trace) new_node.trace.push(...new_right.trace) + } } if (this.checkerManager && (this.checkerManager as any).checkAtBinaryOperation) From 92b1953b574cf7cd1a1b88a95f2e5407ec3baa30 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Sun, 4 Jan 2026 23:39:30 -0800 Subject: [PATCH 15/35] Fix: update tornado checker --- .../python/python-taint-abstract-checker.ts | 3 +- .../taint/python/tornado-taint-checker.ts | 20 ++++--- src/checker/taint/python/tornado-util.ts | 15 ++++- .../analyzer/python/common/python-analyzer.ts | 55 ++++--------------- 4 files changed, 36 insertions(+), 57 deletions(-) diff --git a/src/checker/taint/python/python-taint-abstract-checker.ts b/src/checker/taint/python/python-taint-abstract-checker.ts index 612837bf..f4ac5fa1 100644 --- a/src/checker/taint/python/python-taint-abstract-checker.ts +++ b/src/checker/taint/python/python-taint-abstract-checker.ts @@ -70,7 +70,6 @@ class PythonTaintAbstractChecker extends TaintChecker { let rule = matchSinkAtFuncCall(node, fclos, rules) rule = rule.length > 0 ? rule[0] : null - if (rule) { this.findArgsAndAddNewFinding(node, argvalues, fclos, rule) } @@ -188,4 +187,4 @@ class PythonTaintAbstractChecker extends TaintChecker { } } -module.exports = PythonTaintAbstractChecker \ No newline at end of file +module.exports = PythonTaintAbstractChecker diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index c2f71aa8..0f02d340 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -126,13 +126,13 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } - /** * Build a light-weight file cache for quick lookup. * @param analyzer * @param scope * @param node * @param _state + * @param state * @param _info */ triggerAtCompileUnit(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { @@ -210,7 +210,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // Parse routes from handlers list, using allAssignments for variable resolution const routes = this.parseRoutesFromAstWithAssignments(handlersArg, fileName, allAssignments) - + for (const route of routes) { if (!routesByHandler.has(route.handlerName)) { routesByHandler.set(route.handlerName, []) @@ -358,7 +358,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // Check if this is an __init__ call (first arg is self) const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' - + if (isInitCall && node.arguments.length >= 2) { // __init__(self, handlers, ...) -> handlers is at index 1 return node.arguments[1] @@ -367,7 +367,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return node.arguments[0] } - /** * On function call before execution, use argvalues to get resolved symbol values * This replaces the old AST-based triggerAtFuncCallSyntax approach. @@ -400,7 +399,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isApp) { // Check if this is an __init__ call pattern: Application.__init__(self, handlers, ...) // In this case, handlers is the second argument (index 1) - const callee = node.callee + const { callee } = node const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' if (isInitCall) { // __init__(self, handlers, ...) -> handlers is at index 1 @@ -516,6 +515,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * Proactive Sink Matching * Overrides base class to add flexible matching for common Python sinks (DB, Shell) * that might be missed due to incomplete type resolution. + * @param node + * @param fclos + * @param argvalues */ checkByNameMatch(node: any, fclos: any, argvalues: any): void { // 1. Try standard matching first @@ -534,12 +536,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (proactiveSinks[funcName]) { // Check if any argument is tainted - const taintedArg = argvalues.find((arg: any) => arg && (arg.taint || arg.hasTagRec || arg._tags?.has('PYTHON_INPUT'))) + const taintedArg = argvalues.find( + (arg: any) => arg && (arg.taint || arg.hasTagRec || arg._tags?.has('PYTHON_INPUT')) + ) if (taintedArg) { // Construct a manual finding if not already found const attribute = proactiveSinks[funcName] const ruleName = `${funcName} (Proactive Match)\nSINK Attribute: ${attribute}` - + const taintFlowFinding = this.buildTaintFinding( this.getCheckerId(), this.desc, @@ -550,7 +554,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { ruleName, [] // No specific sanitizers for proactive match ) - + const TaintOutputStrategy = require('../../common/output/taint-output-strategy') if (TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) { this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId) diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index fe34fcd1..e0e800e0 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -52,7 +52,18 @@ export function isRequestAttributeAccess(node: any): boolean { return ( baseName === 'self' && requestName === 'request' && - ['body', 'query', 'headers', 'cookies', 'files', 'uri', 'path', 'arguments', 'query_arguments', 'body_arguments'].includes(propName) + [ + 'body', + 'query', + 'headers', + 'cookies', + 'files', + 'uri', + 'path', + 'arguments', + 'query_arguments', + 'body_arguments', + ].includes(propName) ) } @@ -256,7 +267,7 @@ export function extractParamsFromAst(funcNode: any): ParamMeta[] { */ export function extractTornadoParams(pattern: string): { named: string[]; positionalCount: number } { if (!pattern) return { named: [], positionalCount: 0 } - + const namedGroups: string[] = [] const namedRegex = /\(\?P<(\w+)>/g let match: RegExpExecArray | null diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 8bb8f48a..bc0af018 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -105,53 +105,19 @@ class PythonAnalyzer extends (Analyzer as any) { return true } const hasAnalysised: any[] = [] - for (let i = 0; i < entryPoints.length; i++) { - const entryPoint = entryPoints[i] + for (const entryPoint of entryPoints) { if (entryPoint.type === constValue.ENGIN_START_FUNCALL) { - // Serialize parameters properly to avoid [object Object] issue - // Use a custom serializer to handle circular references - const params = entryPoint.entryPointSymVal?.ast?.parameters - let paramsStr = '' - if (params) { - try { - // Try to serialize only the essential parts to avoid circular references - if (Array.isArray(params)) { - paramsStr = JSON.stringify( - params.map((p: any) => ({ - id: p?.id?.name || p?.id, - name: p?.name, - })) - ) - } else if (typeof params === 'object') { - // Extract only non-circular fields - const keys = Object.keys(params) - const simpleParams: any = {} - for (const key of keys) { - const val = params[key] - if (val && typeof val === 'object' && val.id) { - simpleParams[key] = { id: val.id?.name || val.id } - } else if (typeof val !== 'object' || val === null) { - simpleParams[key] = val - } - } - paramsStr = JSON.stringify(simpleParams) - } else { - paramsStr = String(params) - } - } catch (e) { - // Fallback: use a simple string representation - paramsStr = params.toString ? params.toString() : String(params) - } - } - // Include parent class name in key to distinguish handlers with same method name - const parentName = entryPoint?.entryPointSymVal?.parent?.id || entryPoint?.entryPointSymVal?.parent?.name || '' - const qid = entryPoint?.entryPointSymVal?._qid || '' - const entryKey = `${entryPoint.filePath}.${entryPoint.functionName}/${parentName}/${qid}#${paramsStr}.${entryPoint.attribute}` - if (hasAnalysised.includes(entryKey)) { + if ( + hasAnalysised.includes( + `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` + ) + ) { continue } - hasAnalysised.push(entryKey) + hasAnalysised.push( + `${entryPoint.filePath}.${entryPoint.functionName}/${entryPoint?.entryPointSymVal?._qid}#${entryPoint.entryPointSymVal.ast.parameters}.${entryPoint.attribute}` + ) entryPointConfig.setCurrentEntryPoint(entryPoint) logger.info( 'EntryPoint [%s.%s] is executing', @@ -215,7 +181,6 @@ class PythonAnalyzer extends (Analyzer as any) { `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file`, `[${entryPoint.entryPointSymVal?.ast?.id?.name} symbolInterpret failed. Exception message saved in error log file` ) - // Continue to next entrypoint instead of breaking } this.checkerManager.checkAtSymbolInterpretOfEntryPointAfter(this, null, null, null, null) } else if (entryPoint.type === constValue.ENGIN_START_FILE_BEGIN) { @@ -281,7 +246,7 @@ class PythonAnalyzer extends (Analyzer as any) { if (new_right?._tags) { for (const t of new_right._tags) new_node._tags.add(t) } - + // Merge traces if possible, or just take one if not if (new_left?.trace || new_right?.trace) { new_node.trace = [] From dc7583bd18f8083cf4210f918ea71263d6eb1309 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Fri, 9 Jan 2026 02:59:41 -0800 Subject: [PATCH 16/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 382 +----------------- src/checker/taint/python/tornado-util.ts | 161 +------- 2 files changed, 34 insertions(+), 509 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 0f02d340..6eeadaa7 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -1,8 +1,6 @@ const path = require('path') const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') const FileUtil = require('../../../util/file-util') - -const { extractRelativePath } = FileUtil const AstUtil = require('../../../util/ast-util') const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') @@ -10,11 +8,8 @@ const logger = require('../../../util/logger')(__filename) const BasicRuleHandler = require('../../common/rules-basic-handler') const { mergeAToB } = require('../../../util/common-util') const { - isTornadoCall, - parseRoutePair, - resolveImportPath, - extractImportEntries, extractParamsFromAst, + isTornadoCall, tornadoSourceAPIs, passthroughFuncs, isRequestAttributeExpression, @@ -23,25 +18,17 @@ const { } = require('./tornado-util') const { markTaintSource } = require('../common-kit/source-util') -// Type definitions (moved from import to avoid module resolution issues) -interface FileCache { - vars: Map - classes: Map - importedSymbols: Map -} - interface RoutePair { path: string handlerName: string file?: string + handlerSymVal?: any } /** * Tornado Taint Checker Base Class */ class TornadoTaintChecker extends PythonTaintAbstractChecker { - private fileCache = new Map() - private cachedRuleConfigFile: string | null = null private cachedRuleConfigContent: any[] | null = null @@ -51,10 +38,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param value * @param node Optional node for trace */ - private markAsTainted(value: any, node?: any): void { - if (!value) return - markTaintSource(value, { path: node || value.ast || {}, kind: 'PYTHON_INPUT' }) - } /** * @@ -126,247 +109,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } - /** - * Build a light-weight file cache for quick lookup. - * @param analyzer - * @param scope - * @param node - * @param _state - * @param state - * @param _info - */ - triggerAtCompileUnit(analyzer: any, scope: any, node: any, state: any, _info: any): boolean | undefined { - if (Config.entryPointMode === 'ONLY_CUSTOM') return - const fileName = node.loc?.sourcefile - if (!fileName) return - - const cache: FileCache = { - vars: new Map(), - classes: new Map(), - importedSymbols: new Map(), - } - - // First pass: collect all variables, classes, and assignments - const allAssignments: Map = new Map() - const applicationCalls: any[] = [] - - AstUtil.visit(node, { - AssignmentExpression: (n: any) => { - if (n.left?.type === 'Identifier' && n.left.name) { - cache.vars.set(n.left.name, { value: n.right, file: fileName }) - allAssignments.set(n.left.name, n.right) - } - return true - }, - VariableDeclaration: (n: any) => { - const localName = n.id?.name - if (!localName) return true - if (n.init?.type === 'ImportExpression') { - const modulePath = n.init.from?.value || n.init.from?.name - if (!modulePath) return true - const resolved = resolveImportPath(modulePath, fileName, Config.maindir) - if (!resolved) return true - const entries = extractImportEntries(n) - for (const entry of entries) { - if (!entry.local) continue - cache.importedSymbols.set(entry.local, { - file: resolved, - originalName: entry.imported, - }) - } - return true - } - if (n.init) { - cache.vars.set(localName, { value: n.init, file: fileName }) - allAssignments.set(localName, n.init) - } - return true - }, - ClassDefinition: (n: any) => { - const name = n.name?.name || n.id?.name - if (name) { - cache.classes.set(name, { value: n, file: fileName }) - } - return true - }, - // Collect Tornado Application calls - CallExpression: (n: any) => { - if (this.isTornadoApplicationCallAst(n)) { - applicationCalls.push(n) - } - return true - }, - }) - - this.fileCache.set(fileName, cache) - - // Second pass: process Application calls with fully populated variable map - const routesByHandler: Map = new Map() - - for (const callNode of applicationCalls) { - // Extract handlers argument - const handlersArg = this.extractHandlersArgFromCallAst(callNode) - if (!handlersArg) continue - - // Parse routes from handlers list, using allAssignments for variable resolution - const routes = this.parseRoutesFromAstWithAssignments(handlersArg, fileName, allAssignments) - - for (const route of routes) { - if (!routesByHandler.has(route.handlerName)) { - routesByHandler.set(route.handlerName, []) - } - const classAst = cache.classes.get(route.handlerName)?.value - if (classAst) { - routesByHandler.get(route.handlerName)!.push({ - classAst, - urlPattern: route.path, - }) - } - } - } - - // Register entrypoints from detected routes - for (const [handlerName, routeInfos] of routesByHandler) { - for (const routeInfo of routeInfos) { - const handlerSymVal = this.buildClassSymbol(routeInfo.classAst) - this.emitHandlerEntrypoints(analyzer, handlerSymVal, routeInfo.urlPattern, routeInfo.classAst, scope, state) - } - } - } - - /** - * Parse routes from handlers list AST with assignment map for variable resolution - * @param handlersAst - AST node for handlers - * @param fileName - Current file name - * @param assignments - Map of variable name to value AST - */ - private parseRoutesFromAstWithAssignments( - handlersAst: any, - fileName: string, - assignments: Map - ): RoutePair[] { - const routes: RoutePair[] = [] - if (!handlersAst) return routes - - // Handle identifier reference to a variable - if (handlersAst.type === 'Identifier') { - const valueAst = assignments.get(handlersAst.name) - if (valueAst) { - return this.parseRoutesFromAstWithAssignments(valueAst, fileName, assignments) - } - return routes - } - - // Handle ObjectExpression (Python list parsed as object with numeric keys) - if (handlersAst.type === 'ObjectExpression') { - const properties = handlersAst.properties || [] - for (const prop of properties) { - const valueNode = prop.value - if (valueNode?.type === 'TupleExpression') { - const pair = this.parseRouteTuple(valueNode) - if (pair) { - routes.push({ ...pair, file: fileName }) - } - } - } - return routes - } - - // Handle list/array expression - if (handlersAst.type === 'ArrayExpression' || handlersAst.type === 'ListExpression') { - const elements = handlersAst.elements || [] - for (const element of elements) { - const pair = parseRoutePair(element) - if (pair) { - routes.push({ ...pair, file: fileName }) - } - } - } - - return routes - } - - /** - * Parse a route tuple AST into a RoutePair - * @param tupleAst - TupleExpression AST node - */ - private parseRouteTuple(tupleAst: any): RoutePair | null { - if (!tupleAst || tupleAst.type !== 'TupleExpression') return null - const elements = tupleAst.elements || [] - if (elements.length < 2) return null - - const pathNode = elements[0] - const handlerNode = elements[1] - - const pathValue = pathNode?.type === 'Literal' ? pathNode.value : null - const handlerName = handlerNode?.type === 'Identifier' ? handlerNode.name : null - - if (typeof pathValue === 'string' && handlerName) { - return { path: pathValue, handlerName } - } - return null - } - - /** - * Check if a CallExpression AST node is a Tornado Application call - * Supports patterns: - * - tornado.web.Application.__init__(self, handlers, ...) - * - Application.__init__(self, handlers, ...) - * - tornado.web.Application(handlers, ...) - * - Application(handlers, ...) - * @param node - CallExpression AST node - */ - private isTornadoApplicationCallAst(node: any): boolean { - if (!node || node.type !== 'CallExpression' || !node.callee) return false - const { callee } = node - // logger.info(`Checking CallExpression for Tornado Application: ${AstUtil.prettyPrint(callee)}`) - - // Pattern 1: Direct Application call - Application(...) - if (callee.type === 'Identifier' && callee.name === 'Application') { - return true - } - - // Pattern 2: MemberAccess ending with Application - tornado.web.Application(...) - if (callee.type === 'MemberAccess' && callee.property?.name === 'Application') { - return true - } - - // Pattern 3: __init__ call on Application - tornado.web.Application.__init__(...) - if (callee.type === 'MemberAccess' && callee.property?.name === '__init__') { - let current = callee.object - while (current) { - if (current.type === 'Identifier' && current.name === 'Application') { - return true - } - if (current.type === 'MemberAccess' && current.property?.name === 'Application') { - return true - } - current = current.type === 'MemberAccess' ? current.object : null - } - } - - return false - } - - /** - * Extract handlers argument from Tornado Application call AST - * @param node - CallExpression AST node - */ - private extractHandlersArgFromCallAst(node: any): any { - if (!node.arguments || node.arguments.length === 0) return null - const { callee } = node - - // Check if this is an __init__ call (first arg is self) - const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' - - if (isInitCall && node.arguments.length >= 2) { - // __init__(self, handlers, ...) -> handlers is at index 1 - return node.arguments[1] - } - // Application(handlers, ...) -> handlers is at index 0 - return node.arguments[0] - } - /** * On function call before execution, use argvalues to get resolved symbol values * This replaces the old AST-based triggerAtFuncCallSyntax approach. @@ -400,7 +142,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // Check if this is an __init__ call pattern: Application.__init__(self, handlers, ...) // In this case, handlers is the second argument (index 1) const { callee } = node - const isInitCall = callee?.type === 'MemberAccess' && callee?.property?.name === '__init__' + const isInitCall = + callee?.type === 'MemberAccess' && + (callee?.property?.name === '__init__' || callee?.property?.name === '_CTOR_') if (isInitCall) { // __init__(self, handlers, ...) -> handlers is at index 1 routeListArgValue = argvalues[1] @@ -481,22 +225,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } else if (handlerSym.ast && handlerSym.ast.type === 'ClassDefinition') { // If we have the AST, process it to get the class symbol value processHandlerClass(handlerSym.ast) - } else { - // Try to resolve from identifier - const { handlerName } = pair - const handlerFile = pair.file || currentFile - const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) - if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { - processHandlerClass(handlerClassAst) - } - } - } else { - // Fallback: resolve handler class from name - const { handlerName } = pair - const handlerFile = pair.file || currentFile - const handlerClassAst = this.resolveSymbol(handlerName, handlerFile) - if (handlerClassAst && handlerClassAst.type === 'ClassDefinition') { - processHandlerClass(handlerClassAst) } } @@ -590,7 +318,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 检查是否是 tornado source API 调用(如 get_argument) if (funcName && tornadoSourceAPIs.has(funcName)) { - this.markAsTainted(ret, node) + // this.markAsTainted(ret, node) + markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) } // 处理 passthrough 函数(如 decode, strip 等) @@ -603,13 +332,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { isRequestAttributeExpression(node.callee.object) ) { // 直接标记返回值为 source(因为 self.request.body/query/headers/cookies 等是 source) - this.markAsTainted(ret, node) + // this.markAsTainted(ret, node) + markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) return // 已经标记,不需要再检查 receiver } // 检查 receiver 是否被污染 const receiver = fclos?.object || fclos?._this if (receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT'))) { - this.markAsTainted(ret, node) + // this.markAsTainted(ret, node) + markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) } } } @@ -649,7 +380,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // Process the parameter to get its symbol value const paramSymVal = analyzer.processInstruction(entryPoint.entryPointSymVal, param.id || param, state) if (paramSymVal) { - this.markAsTainted(paramSymVal, param.id || param) + // this.markAsTainted(paramSymVal, param.id || param) + markTaintSource(paramSymVal, { path: param.id || param || paramSymVal.ast || {}, kind: 'PYTHON_INPUT' }) } } catch (e) { // Ignore errors @@ -673,61 +405,11 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 if (isRequestAttributeAccess(node)) { - this.markAsTainted(res, node) + // this.markAsTainted(res, node) + markTaintSource(res, { path: node || res.ast || {}, kind: 'PYTHON_INPUT' }) } } - /** - * Resolve symbol cross-file - * @param name - * @param currentFile - */ - private resolveSymbol(name: string, currentFile: string): any | null { - if (!name || !currentFile) return null - const cache = this.fileCache.get(currentFile) - if (!cache) return null - const { vars, classes, importedSymbols } = cache - if (vars.has(name)) { - const entry = vars.get(name) - if (entry?.value) { - entry.value.loc = entry.value.loc || {} - entry.value.loc.sourcefile = entry.file - return entry.value - } - } - if (classes.has(name)) { - const entry = classes.get(name) - if (entry?.value) { - entry.value.loc = entry.value.loc || {} - entry.value.loc.sourcefile = entry.file - return entry.value - } - } - - const importInfo = importedSymbols.get(name) - if (!importInfo) return null - const targetCache = this.fileCache.get(importInfo.file) - if (!targetCache) return null - const targetName = importInfo.originalName || name - if (targetCache.vars.has(targetName)) { - const entry = targetCache.vars.get(targetName) - if (entry?.value) { - entry.value.loc = entry.value.loc || {} - entry.value.loc.sourcefile = entry.file - return entry.value - } - } - if (targetCache.classes.has(targetName)) { - const entry = targetCache.classes.get(targetName) - if (entry?.value) { - entry.value.loc = entry.value.loc || {} - entry.value.loc.sourcefile = entry.file - return entry.value - } - } - return null - } - /** * Extract route pairs from resolved symbol values (from argvalues) * @param routeListSymVal - Symbol value representing route list @@ -774,7 +456,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (routeListSymVal.value.length === 2) { const [pathSymVal, handlerSymVal] = routeListSymVal.value const pathValue = this.extractStringFromSymbolValue(pathSymVal) - const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) + const handlerName = 'Handler' // Placeholder name if (pathValue && handlerName) { const file = handlerSymVal?.ast?.loc?.sourcefile || @@ -852,15 +534,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } - // Fallback: try to parse from AST if available - if (routeListSymVal.ast) { - const pair = parseRoutePair(routeListSymVal.ast) - if (pair) { - const file = routeListSymVal.ast?.loc?.sourcefile || routeListSymVal.loc?.sourcefile || currentFile - return [{ ...pair, file }] - } - } - return [] } @@ -1007,14 +680,12 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) finalEp.functionName = handlerName ? `${handlerName}.${rawFuncName}` : rawFuncName } - // 确保 finalEp 有 filePath if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { const { sourcefile } = finalEp.fdef.loc - if (Config.maindir && typeof Config.maindir === 'string') { - finalEp.filePath = FileUtil.extractRelativePath(sourcefile, Config.maindir) - } else { - finalEp.filePath = sourcefile - } + finalEp.filePath = + Config.maindir && typeof Config.maindir === 'string' + ? FileUtil.extractRelativePath(sourcefile, Config.maindir) + : sourcefile } // 确保 finalEp 有 ast,completeEntryPoint 可能需要它 if (!finalEp.ast && finalEp.fdef) { @@ -1023,7 +694,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const entryPoint = completeEntryPoint(finalEp) entryPoint.urlPattern = urlPattern entryPoint.handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - // 确保 entryPoint.entryPointSymVal.parent 有 field 结构 if ( entryPoint.entryPointSymVal?.parent && entryPoint.entryPointSymVal.parent.vtype === 'class' && @@ -1033,18 +703,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } analyzer.entryPoints.push(entryPoint) - // 注册参数为 source - const funcName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const sourceFile = finalEp.fdef?.loc?.sourcefile || classAst?.loc?.sourcefile || '' - let scopeFile: string | null = null - if (sourceFile) { - if (Config.maindir && typeof Config.maindir === 'string') { - scopeFile = extractRelativePath(sourceFile, Config.maindir) - } else { - scopeFile = sourceFile - } - } - const params = extractTornadoParams(urlPattern) const paramMetas = (Array.isArray((finalEp as any).params) && (finalEp as any).params.length diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index e0e800e0..ec553a3e 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -1,23 +1,6 @@ const path = require('path') const AstUtil = require('../../../util/ast-util') -export interface ImportSymbol { - file: string - originalName?: string -} - -export interface RoutePair { - path: string - handlerName: string - file?: string -} - -export interface FileCache { - vars: Map - classes: Map - importedSymbols: Map -} - export interface ParamMeta { name: string locStart: number | 'all' @@ -39,7 +22,7 @@ export const tornadoSourceAPIs = new Set([ export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) /** - * + * Detect if node is an access to a Tornado request attribute (e.g., self.request.body) * @param node */ export function isRequestAttributeAccess(node: any): boolean { @@ -68,7 +51,7 @@ export function isRequestAttributeAccess(node: any): boolean { } /** - * + * Detect if expression involves a Tornado request attribute * @param expr */ export function isRequestAttributeExpression(expr: any): boolean { @@ -81,7 +64,7 @@ export function isRequestAttributeExpression(expr: any): boolean { } /** - * 用来判断是否是Tornado的请求函数,例如 + * Check if node is a Tornado Application or handler call * @param node * @param targetName */ @@ -97,7 +80,8 @@ export function isTornadoCall(node: any, targetName: string): boolean { // Handle pattern: tornado.web.Application.__init__(self, handlers, ...) // In this case, we need to check if 'Application' is in the member access chain // and the final property is '__init__' - if (callee.type === 'MemberAccess' && callee.property?.name === '__init__') { + const propName = callee.property?.name + if (callee.type === 'MemberAccess' && (propName === '__init__' || propName === '_CTOR_')) { // Check if any part of the member access chain matches the targetName let current = callee.object while (current) { @@ -107,138 +91,21 @@ export function isTornadoCall(node: any, targetName: string): boolean { if (current.type === 'MemberAccess' && current.property?.name === targetName) { return true } - current = current.type === 'MemberAccess' ? current.object : null + // Follow through super() or other calls + if (current.type === 'CallExpression') { + current = current.callee + } else if (current.type === 'MemberAccess') { + current = current.object + } else { + current = null + } } } return false } /** - * - * @param route - */ -export function parseRoutePair(route: any): RoutePair | null { - if (!route) return null - - const extractLiteral = (expr: any): string | null => { - if (!expr) return null - if (expr.type === 'StringLiteral' || expr.type === 'Literal') { - return typeof expr.value === 'string' ? expr.value : null - } - return null - } - - let pathExpr: any - let handlerNode: any - - if (route.type === 'TupleExpression' && Array.isArray(route.elements)) { - const [first, second] = route.elements - pathExpr = first - handlerNode = second - } else if (route.type === 'CallExpression' && route.callee) { - const { callee } = route - - /** - * Check if callee is a URL helper function using AST node matching - * Supports: - * - url(...) - simple identifier - * - something.url(...) - member access - * - tornado.web.url(...) - nested member access chain - * This avoids unreliable string-based matching via prettyPrint - */ - const isIdentifierUrlHelper = callee.type === 'Identifier' && callee.name === 'url' - - const isMemberAccessUrlHelper = - callee.type === 'MemberAccess' && - // Check if the final property/member is 'url' - // Supports both 'property' and 'member' fields for different AST representations - ((callee.property && callee.property.type === 'Identifier' && callee.property.name === 'url') || - (callee.member && callee.member.type === 'Identifier' && callee.member.name === 'url')) - - const isUrlHelper = isIdentifierUrlHelper || isMemberAccessUrlHelper - - if (isUrlHelper && Array.isArray(route.arguments)) { - const [first, second] = route.arguments - pathExpr = first - handlerNode = second - } - } - if (!pathExpr || !handlerNode || handlerNode.type !== 'Identifier') { - return null - } - const pathValue = extractLiteral(pathExpr) - if (!pathValue) return null - - return { path: pathValue, handlerName: handlerNode.name } -} - -/** - * Resolve Python import path to file path - * @param modulePath - The import path (e.g., "handlers.user_handler" or ".handlers.user_handler") - * @param currentFile - The current file path - * @param mainDir - Optional project root directory for absolute imports - * @returns Resolved file path or null - */ -export function resolveImportPath(modulePath: string, currentFile: string, mainDir?: string): string | null { - if (!modulePath) return null - - const currentDir = path.dirname(currentFile) - const leadingDots = modulePath.match(/^\.+/)?.[0] ?? '' - let baseDir: string - - if (leadingDots.length > 0) { - // Relative import: resolve from current file's directory - baseDir = path.resolve(currentDir, '../'.repeat(leadingDots.length - 1)) - } else if (mainDir) { - // Absolute import: resolve from project root (mainDir) - baseDir = mainDir - } else { - // Fallback for absolute imports when mainDir is not provided. - // This is the original behavior and is likely incorrect. - baseDir = currentDir - } - - const remainder = modulePath.slice(leadingDots.length) - const normalized = remainder ? remainder.split('.').join(path.sep) : '' - const resolved = normalized ? path.resolve(baseDir, normalized) : baseDir - - // Check if it's a package (directory with __init__.py) - const fs = require('fs') - if (fs.existsSync(resolved) && fs.statSync(resolved).isDirectory()) { - return path.join(resolved, '__init__.py') - } - - // Regular module file - return `${resolved}.py` -} - -/** - * - * @param stmt - */ -export function extractImportEntries(stmt: any): Array<{ local: string; imported?: string }> { - const res: Array<{ local: string; imported?: string }> = [] - const { init } = stmt - if (!init) return res - - if (Array.isArray(init?.imports) && init.imports.length > 0) { - for (const spec of init.imports) { - const local = spec.local?.name || spec.local?.value || spec.name || spec.value - const imported = spec.imported?.name || spec.imported?.value || spec.name || spec.value - if (local) res.push({ local, imported }) - } - return res - } - - if (stmt.id?.name) { - const importedName = init?.imported?.name || init?.imported?.value || init?.name?.name || init?.name?.value - res.push({ local: stmt.id.name, imported: importedName }) - } - return res -} - -/** - * + * Extract parameters from function AST * @param funcNode */ export function extractParamsFromAst(funcNode: any): ParamMeta[] { From b96962a5b3e31d984c949766bb7fa831983892cc Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Fri, 9 Jan 2026 03:23:29 -0800 Subject: [PATCH 17/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 313 +++--------------- 1 file changed, 52 insertions(+), 261 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 6eeadaa7..1e544679 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -29,24 +29,12 @@ interface RoutePair { * Tornado Taint Checker Base Class */ class TornadoTaintChecker extends PythonTaintAbstractChecker { - private cachedRuleConfigFile: string | null = null - - private cachedRuleConfigContent: any[] | null = null - - /** - * Helper function to mark a value as tainted - * @param value - * @param node Optional node for trace - */ - /** - * + * constructor * @param resultManager */ constructor(resultManager: any) { super(resultManager, 'taint_flow_python_tornado_input') - // 基类构造函数会调用 loadRuleConfig,但此时 Config.ruleConfigFile 可能还没有被设置 - // 所以我们在这里不加载规则配置,而是在 triggerAtStartOfAnalyze 中加载 } /** @@ -59,50 +47,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - const currentRuleConfigFile = Config.ruleConfigFile - let ruleConfigContent: any[] = [] - - if (currentRuleConfigFile && currentRuleConfigFile !== '') { - try { - ruleConfigContent = FileUtil.loadJSONfile(currentRuleConfigFile) - this.cachedRuleConfigFile = currentRuleConfigFile - this.cachedRuleConfigContent = ruleConfigContent - } catch (e: any) { - ruleConfigContent = [] - } - } else if (this.cachedRuleConfigContent !== null) { - // 使用缓存的配置内容 - ruleConfigContent = this.cachedRuleConfigContent - } else { - // 尝试从 BasicRuleHandler 获取(可能已经在构造函数中加载) - try { - ruleConfigContent = BasicRuleHandler.getRules(Config.ruleConfigFile) - if (ruleConfigContent && ruleConfigContent.length > 0) { - this.cachedRuleConfigContent = ruleConfigContent - } - } catch (e: any) { - ruleConfigContent = [] - } - } - - // 应用规则配置 - const checkerId = this.getCheckerId() - - if (ruleConfigContent && Array.isArray(ruleConfigContent) && ruleConfigContent.length > 0) { - for (const ruleConfig of ruleConfigContent) { - const checkerIds = Array.isArray(ruleConfig.checkerIds) - ? ruleConfig.checkerIds - : ruleConfig.checkerIds - ? [ruleConfig.checkerIds] - : [] - const matches = checkerIds.length > 0 && checkerIds.includes(checkerId) - - if (matches) { - mergeAToB(ruleConfig, this.checkerRuleConfigContent) - } - } - } - // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) // 注册规则配置中的 source @@ -247,49 +191,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param fclos * @param argvalues */ - checkByNameMatch(node: any, fclos: any, argvalues: any): void { - // 1. Try standard matching first - super.checkByNameMatch(node, fclos, argvalues) - - // 2. Proactive matching for critical sinks if no finding was generated yet - // We look for common method names regardless of the receiver's inferred type - const funcName = node.callee?.property?.name || node.callee?.name - if (!funcName) return - - const proactiveSinks: Record = { - execute: 'PythonSqlInjection', - popen: 'PythonCommandInjection', - system: 'PythonCommandInjection', - } - - if (proactiveSinks[funcName]) { - // Check if any argument is tainted - const taintedArg = argvalues.find( - (arg: any) => arg && (arg.taint || arg.hasTagRec || arg._tags?.has('PYTHON_INPUT')) - ) - if (taintedArg) { - // Construct a manual finding if not already found - const attribute = proactiveSinks[funcName] - const ruleName = `${funcName} (Proactive Match)\nSINK Attribute: ${attribute}` - - const taintFlowFinding = this.buildTaintFinding( - this.getCheckerId(), - this.desc, - node, - taintedArg, - fclos, - 'PYTHON_INPUT', - ruleName, - [] // No specific sanitizers for proactive match - ) - - const TaintOutputStrategy = require('../../common/output/taint-output-strategy') - if (TaintOutputStrategy.isNewFinding(this.resultManager, taintFlowFinding)) { - this.resultManager.newFinding(taintFlowFinding, TaintOutputStrategy.outputStrategyId) - } - } - } - } /** * Handle API calls like self.get_argument() @@ -300,46 +201,25 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 先调用基类方法处理规则配置中的 source super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) if (Config.entryPointMode === 'ONLY_CUSTOM') return const { fclos, ret } = info - if (!fclos || !ret) { - return - } + if (!fclos || !ret) return - // 从 node.callee 获取方法名(对于 MemberAccess 调用,如 self.get_argument) - let funcName: string | null = null - if (node.callee?.type === 'MemberAccess') { - funcName = node.callee.property?.name - } else if (node.callee?.type === 'Identifier') { - funcName = node.callee.name - } + const funcName = node.callee?.property?.name || node.callee?.name + if (!funcName) return - // 检查是否是 tornado source API 调用(如 get_argument) - if (funcName && tornadoSourceAPIs.has(funcName)) { - // this.markAsTainted(ret, node) + // Mark Tornado source APIs and passthrough functions + if (tornadoSourceAPIs.has(funcName)) { markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) - } - - // 处理 passthrough 函数(如 decode, strip 等) - if (funcName && passthroughFuncs.has(funcName)) { - // 使用 isRequestAttributeExpression 统一检测 request 属性访问(如 self.request.body.decode) - // 这避免了重复的 AST 模式匹配逻辑,保持与 tornado-util.ts 的一致性 - if ( - node.callee?.type === 'MemberAccess' && - node.callee.object && - isRequestAttributeExpression(node.callee.object) - ) { - // 直接标记返回值为 source(因为 self.request.body/query/headers/cookies 等是 source) - // this.markAsTainted(ret, node) - markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) - return // 已经标记,不需要再检查 receiver - } - // 检查 receiver 是否被污染 + } else if (passthroughFuncs.has(funcName)) { + // Check for request attribute access like self.request.body.decode() + const isReqAttr = node.callee?.type === 'MemberAccess' && isRequestAttributeExpression(node.callee.object) const receiver = fclos?.object || fclos?._this - if (receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT'))) { - // this.markAsTainted(ret, node) + const isTaintedReceiver = + receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT')) + + if (isReqAttr || isTaintedReceiver) { markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) } } @@ -354,41 +234,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param state * @param info */ - triggerAtSymbolInterpretOfEntryPointBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { - const entryPointConfig = require('../../../engine/analyzer/common/current-entrypoint') - const entryPoint = entryPointConfig.getCurrentEntryPoint() - if (!entryPoint || !entryPoint.entryPointSymVal) return - - // Check if this entrypoint has path parameters that should be marked as tainted - const params = entryPoint.entryPointSymVal?.ast?.parameters - if (!params) return - - // Get parameter names from sourceScope - const paramNames = new Set() - for (const source of this.sourceScope.value) { - if (source.path && source.kind === 'PYTHON_INPUT') { - paramNames.add(source.path) - } - } - - // Mark matching parameters as tainted by processing them and marking the result - for (const key in params) { - const param = params[key] - const paramName = param?.id?.name || param?.name - if (paramName && paramNames.has(paramName) && paramName !== 'self') { - try { - // Process the parameter to get its symbol value - const paramSymVal = analyzer.processInstruction(entryPoint.entryPointSymVal, param.id || param, state) - if (paramSymVal) { - // this.markAsTainted(paramSymVal, param.id || param) - markTaintSource(paramSymVal, { path: param.id || param || paramSymVal.ast || {}, kind: 'PYTHON_INPUT' }) - } - } catch (e) { - // Ignore errors - } - } - } - } /** * Handle Member Access Sources like self.request.body @@ -401,12 +246,8 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { if (Config.entryPointMode === 'ONLY_CUSTOM') return - const { res } = info - - // 重用 isRequestAttributeAccess 工具函数,避免重复逻辑并保持行为一致 if (isRequestAttributeAccess(node)) { - // this.markAsTainted(res, node) - markTaintSource(res, { path: node || res.ast || {}, kind: 'PYTHON_INPUT' }) + markTaintSource(info.res, { path: node || info.res.ast || {}, kind: 'PYTHON_INPUT' }) } } @@ -626,124 +467,74 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { scope?: any, state?: any ) { - if (!handlerSymVal || handlerSymVal.vtype !== 'class') { - return - } + if (!handlerSymVal || handlerSymVal.vtype !== 'class') return + const httpMethods = new Set(['get', 'post', 'put', 'delete', 'patch', 'head', 'options']) - const entrypoints = Object.entries(handlerSymVal.value) - .filter(([key, value]: [string, any]) => httpMethods.has(key) && value.vtype === 'fclos') - .map(([, value]: [string, any]) => value) + const handlers = Object.entries(handlerSymVal.value).filter( + ([key, value]: [string, any]) => httpMethods.has(key) && value.vtype === 'fclos' + ) - for (const ep of entrypoints as any[]) { - // ignore init files - if (ep.fdef?.loc?.sourcefile?.endsWith('__init__.py')) { - continue - } + for (const [method, fclos] of handlers as any[]) { + if (fclos.fdef?.loc?.sourcefile?.endsWith('__init__.py')) continue - // 尝试使用 analyzer.processInstruction 获取正确的 fclos 对象 - let finalEp = ep - if (scope && state && ep.fdef) { + let finalEp = fclos + if (scope && state && fclos.fdef) { try { - const processedFclos = analyzer.processInstruction(scope, ep.fdef, state) - if (processedFclos && processedFclos.vtype === 'fclos') { - processedFclos.parent = handlerSymVal - processedFclos.params = ep.params || extractParamsFromAst(ep.fdef) - if (!processedFclos.value) { - processedFclos.value = {} - } - finalEp = processedFclos + const processed = analyzer.processInstruction(scope, fclos.fdef, state) + if (processed?.vtype === 'fclos') { + processed.parent = handlerSymVal + processed.params = fclos.params || extractParamsFromAst(fclos.fdef) + finalEp = processed } } catch (e) { - // fallback to original ep + /* fallback */ } } - // 确保 ep 有 value 属性 - if (!finalEp.value) { - finalEp.value = {} - } - // 确保 finalEp.parent 正确设置,并且 handlerSymVal 有 field 结构 - if (handlerSymVal && handlerSymVal.vtype === 'class') { - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } - finalEp.parent = handlerSymVal - } + if (!finalEp.value) finalEp.value = {} + finalEp.parent = handlerSymVal + if (handlerSymVal.vtype === 'class' && !handlerSymVal.field) handlerSymVal.field = {} try { - // 确保 finalEp 有 completeEntryPoint 需要的属性 - if (!finalEp.ast && finalEp.fdef) { - finalEp.ast = finalEp.fdef - } + if (!finalEp.ast) finalEp.ast = finalEp.fdef if (!finalEp.functionName) { - const rawFuncName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' + const rawName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - finalEp.functionName = handlerName ? `${handlerName}.${rawFuncName}` : rawFuncName + finalEp.functionName = handlerName ? `${handlerName}.${rawName}` : rawName } if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { - const { sourcefile } = finalEp.fdef.loc - finalEp.filePath = - Config.maindir && typeof Config.maindir === 'string' - ? FileUtil.extractRelativePath(sourcefile, Config.maindir) - : sourcefile - } - // 确保 finalEp 有 ast,completeEntryPoint 可能需要它 - if (!finalEp.ast && finalEp.fdef) { - finalEp.ast = finalEp.fdef + finalEp.filePath = Config.maindir + ? FileUtil.extractRelativePath(finalEp.fdef.loc.sourcefile, Config.maindir) + : finalEp.fdef.loc.sourcefile } + const entryPoint = completeEntryPoint(finalEp) entryPoint.urlPattern = urlPattern entryPoint.handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - if ( - entryPoint.entryPointSymVal?.parent && - entryPoint.entryPointSymVal.parent.vtype === 'class' && - !entryPoint.entryPointSymVal.parent.field - ) { - entryPoint.entryPointSymVal.parent.field = {} - } analyzer.entryPoints.push(entryPoint) + // Register path parameters as sources const params = extractTornadoParams(urlPattern) - const paramMetas = - (Array.isArray((finalEp as any).params) && (finalEp as any).params.length - ? (finalEp as any).params - : extractParamsFromAst(finalEp.fdef)) || [] - if (paramMetas.length > 0) { - let positionalIdx = 0 - for (const meta of paramMetas) { - if (meta.name === 'self') continue - - let isSource = false - if (params.named.length > 0) { - if (params.named.includes(meta.name)) { - isSource = true - } - } else if (params.positionalCount > 0) { - if (positionalIdx < params.positionalCount) { - isSource = true - } - } - positionalIdx++ - - if (!isSource) continue - - // 对于路径参数,使用 'all' 以匹配所有文件和位置,因为参数可能在函数定义的不同位置 + const paramMetas = finalEp.params || extractParamsFromAst(finalEp.fdef) || [] + paramMetas.forEach((meta: any, idx: number) => { + if (meta.name === 'self') return + const isSource = + params.named.includes(meta.name) || (params.named.length === 0 && idx <= params.positionalCount) + if (isSource) { const sourceEntry = { path: meta.name, kind: 'PYTHON_INPUT', - scopeFile: 'all', // 使用 'all' 以匹配所有文件 - scopeFunc: 'all', // 使用 'all' 以匹配所有函数,因为 handler 方法可能在嵌套作用域中 - locStart: 'all', // 使用 'all' 以匹配所有行号 - locEnd: 'all', // 使用 'all' 以匹配所有行号 + scopeFile: 'all', + scopeFunc: 'all', + locStart: 'all', + locEnd: 'all', } this.sourceScope.value.push(sourceEntry) - // 立即注册 source,因为 triggerAtStartOfAnalyze 可能在 entrypoints 收集之前被调用 this.addSourceTagForSourceScope('PYTHON_INPUT', [sourceEntry]) } - } + }) } catch (e: any) { - logger.warn(`Error in completeEntryPoint: ${e?.message || e}`) - continue + logger.warn(`Error in entrypoint collection: ${e?.message || e}`) } } } From 3cceb629432d539d3aa149a545ffcf88571d0bc9 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 12 Jan 2026 04:48:31 -0800 Subject: [PATCH 18/35] Fix: update-tornado --- .../taint/python/tornado-taint-checker.ts | 611 +++++------------- src/checker/taint/python/tornado-util.ts | 123 +--- 2 files changed, 175 insertions(+), 559 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 1e544679..7635ec22 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -1,14 +1,8 @@ -const path = require('path') const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') -const FileUtil = require('../../../util/file-util') -const AstUtil = require('../../../util/ast-util') const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') -const logger = require('../../../util/logger')(__filename) -const BasicRuleHandler = require('../../common/rules-basic-handler') -const { mergeAToB } = require('../../../util/common-util') +const { markTaintSource } = require('../common-kit/source-util') const { - extractParamsFromAst, isTornadoCall, tornadoSourceAPIs, passthroughFuncs, @@ -16,21 +10,13 @@ const { isRequestAttributeAccess, extractTornadoParams, } = require('./tornado-util') -const { markTaintSource } = require('../common-kit/source-util') - -interface RoutePair { - path: string - handlerName: string - file?: string - handlerSymVal?: any -} /** - * Tornado Taint Checker Base Class + * Tornado Taint Checker - Simplified */ class TornadoTaintChecker extends PythonTaintAbstractChecker { /** - * constructor + * * @param resultManager */ constructor(resultManager: any) { @@ -38,8 +24,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } /** - * trigger at start of analyze - * Register sourceScope values as sources + * * @param analyzer * @param scope * @param node @@ -47,19 +32,12 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 注册 sourceScope 中的 source this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) - // 注册规则配置中的 source this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } /** - * On function call before execution, use argvalues to get resolved symbol values - * This replaces the old AST-based triggerAtFuncCallSyntax approach. - * Using symbol interpretation allows us to: - * 1. Get resolved symbol values for arguments (especially strings) via argvalues - * 2. Handle cases where route lists are obtained through function calls - * 3. Process route objects regardless of how they are obtained (variable, function call, etc.) + * * @param analyzer * @param scope * @param node @@ -67,500 +45,213 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { - // 先调用基类方法 super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) - const { fclos, argvalues } = info - if (!fclos || !argvalues) return - - if (Config.entryPointMode === 'ONLY_CUSTOM') return - const fileName = node.loc?.sourcefile - if (!fileName) return + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return - // 检查是否是 Application 或 add_handlers 调用 - let routeListArgValue: any = null - const isApp = isTornadoCall(node, 'Application') - const isAddHandlers = isTornadoCall(node, 'add_handlers') + let routes = null + if (isTornadoCall(node, 'Application')) routes = argvalues[0] + else if (isTornadoCall(node, 'add_handlers')) routes = argvalues[1] - if (isApp) { - // Check if this is an __init__ call pattern: Application.__init__(self, handlers, ...) - // In this case, handlers is the second argument (index 1) - const { callee } = node - const isInitCall = - callee?.type === 'MemberAccess' && - (callee?.property?.name === '__init__' || callee?.property?.name === '_CTOR_') - if (isInitCall) { - // __init__(self, handlers, ...) -> handlers is at index 1 - routeListArgValue = argvalues[1] - } else { - // Application(handlers, ...) -> handlers is at index 0 - ;[routeListArgValue] = argvalues - } - } else if (isAddHandlers) { - // add_handlers(host, routes) -> second arg is routes - ;[, routeListArgValue] = argvalues - } - - if (routeListArgValue) { - this.collectTornadoEntrypointAndSourceFromArgValue(analyzer, scope, state, routeListArgValue, fileName) - } + if (routes) this.processRoutes(analyzer, scope, state, routes) } /** - * Collect entrypoints and sources from resolved symbol values (from argvalues) + * * @param analyzer * @param scope * @param state - * @param routeListSymVal - Resolved symbol value for route list - * @param currentFile + * @param val */ - private collectTornadoEntrypointAndSourceFromArgValue( - analyzer: any, - scope: any, - state: any, - routeListSymVal: any, - currentFile: string - ): void { - if (!routeListSymVal) return + private processRoutes(analyzer: any, scope: any, state: any, val: any) { + if (!val) return + + // 1. Handle Union - Try to see if the union itself is a route (flattened tuple) + if (val.vtype === 'union' && Array.isArray(val.value)) { + const pathVal = val.value.find((v: any) => typeof (v.value || v.ast?.value) === 'string') + const hVal = val.value.find((v: any) => v.vtype === 'class' || v.ast?.type === 'ClassDefinition') + + if (pathVal && hVal) { + const path = pathVal.value || pathVal.ast?.value + this.finishRoute(analyzer, scope, state, hVal, path) + return + } - const processed = new Set() - const routePairs = this.extractRoutesFromSymbolValue(routeListSymVal, currentFile, analyzer, scope, state) + val.value.forEach((v: any) => this.processRoutes(analyzer, scope, state, v)) + return + } - for (let i = 0; i < routePairs.length; i++) { - const pair = routePairs[i] - if (!pair.path || !pair.handlerName) { - continue - } - const dedupKey = `${pair.file || currentFile}::${pair.handlerName}::${pair.path}` - if (processed.has(dedupKey)) { - continue - } - processed.add(dedupKey) + // 2. Try to extract from Object/Tuple/URLSpec + let path: string | undefined + let h: any + + if ((val.vtype === 'object' || val.vtype === 'tuple') && val.value) { + const pVal = val.value['0'] || val.value.regex || val.value._pattern + h = val.value['1'] || val.value.handler_class || val.value._handler_class + path = pVal?.value || pVal?.ast?.value + } else if (Array.isArray(val.value)) { + const pVal = val.value[0] + h = val.value[1] + path = pVal?.value || pVal?.ast?.value + } - let handlerSymVal: any = null - let classAst: any = null + if (typeof path === 'string' && h) { + this.finishRoute(analyzer, scope, state, h, path) + return + } - // Helper function to process class AST and get handler symbol value - const processHandlerClass = (ast: any) => { - classAst = ast - try { - handlerSymVal = analyzer.processInstruction(scope, classAst, state) - if (!handlerSymVal || handlerSymVal.vtype !== 'class') { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} - } - } - } catch (e) { - handlerSymVal = this.buildClassSymbol(classAst) - if (!handlerSymVal.field) { - handlerSymVal.field = {} + // 3. Handle Symbol or Call (like tornado.web.url) + if (val.ast?.type === 'CallExpression') { + const { callee } = val.ast + const name = callee.property?.name || callee.name + if (name === 'url' || name === 'URLSpec') { + const args = val.ast.arguments + if (args && args.length >= 2) { + const p = args[0].value + const hNode = args[1] + if (typeof p === 'string') { + const resolvedH = analyzer.processInstruction(scope, hNode, state) + this.finishRoute(analyzer, scope, state, resolvedH || { ast: hNode }, p) + return } } } + } - // First, try to use handler symbol value directly from the route pair - if (pair.handlerSymVal) { - const handlerSym = pair.handlerSymVal - // If it's already a class symbol value, use it directly - if (handlerSym.vtype === 'class') { - handlerSymVal = handlerSym - classAst = handlerSym.ast || handlerSym.fdef - } else if (handlerSym.ast && handlerSym.ast.type === 'ClassDefinition') { - // If we have the AST, process it to get the class symbol value - processHandlerClass(handlerSym.ast) - } - } - - // Ensure handlerSymVal has field structure - if (handlerSymVal && handlerSymVal.vtype === 'class' && !handlerSymVal.field) { - handlerSymVal.field = {} - } + // 4. Fallback: Collections + if (['list', 'tuple', 'object'].includes(val.vtype) || (val.vtype === 'object' && val.value)) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value || {}) + const isLikelyCollection = + Array.isArray(val.value) || + (val.vtype === 'object' && Object.keys(val.value || {}).some((k) => /^\d+$/.test(k))) - if (handlerSymVal && classAst) { - this.emitHandlerEntrypoints(analyzer, handlerSymVal, pair.path, classAst, scope, state) + if (isLikelyCollection && items.length > 0) { + items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) } } } /** - * Proactive Sink Matching - * Overrides base class to add flexible matching for common Python sinks (DB, Shell) - * that might be missed due to incomplete type resolution. - * @param node - * @param fclos - * @param argvalues - */ - - /** - * Handle API calls like self.get_argument() + * * @param analyzer * @param scope - * @param node * @param state - * @param info + * @param h + * @param path */ - triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { - super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) - if (Config.entryPointMode === 'ONLY_CUSTOM') return - const { fclos, ret } = info - if (!fclos || !ret) return + private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { + if (!h) return - const funcName = node.callee?.property?.name || node.callee?.name - if (!funcName) return - - // Mark Tornado source APIs and passthrough functions - if (tornadoSourceAPIs.has(funcName)) { - markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) - } else if (passthroughFuncs.has(funcName)) { - // Check for request attribute access like self.request.body.decode() - const isReqAttr = node.callee?.type === 'MemberAccess' && isRequestAttributeExpression(node.callee.object) - const receiver = fclos?.object || fclos?._this - const isTaintedReceiver = - receiver && (receiver.taint || receiver.hasTagRec || receiver._tags?.has('PYTHON_INPUT')) - - if (isReqAttr || isTaintedReceiver) { - markTaintSource(ret, { path: node || ret.ast || {}, kind: 'PYTHON_INPUT' }) + if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] + if (h.vtype !== 'class' && h.ast?.type === 'ClassDefinition') { + try { + h = analyzer.processInstruction(scope, h.ast, state) || this.buildClassSymbol(h.ast) + } catch (e) { + h = this.buildClassSymbol(h.ast) } } + + if (h?.vtype === 'class') { + this.registerEntryPoints(analyzer, h, path) + } } /** - * Trigger before entrypoint execution - * Mark path parameters as tainted sources + * * @param analyzer - * @param scope - * @param node - * @param state - * @param info + * @param cls + * @param path */ + private registerEntryPoints(analyzer: any, cls: any, path: string) { + const methods = ['get', 'post', 'put', 'delete', 'patch'] + const classValue = cls.value || {} + + Object.entries(classValue).forEach(([name, fclos]: [string, any]) => { + if (methods.includes(name) && fclos.vtype === 'fclos') { + const ep = completeEntryPoint(fclos) + ep.urlPattern = path + ep.handlerName = cls.ast?.id?.name || cls._sid || 'Unknown' + analyzer.entryPoints.push(ep) + + const info = extractTornadoParams(path) + let paramIdx = 0 + const actualParams = (fclos.params || fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] + actualParams.forEach((p: any) => { + const pName = p.name || p.id?.name + if (pName === 'self') return + paramIdx++ + if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { + this.sourceScope.value.push({ + path: pName, + kind: 'PYTHON_INPUT', + scopeFile: 'all', + scopeFunc: 'all', + locStart: 'all', + locEnd: 'all', + }) + } + }) + } + }) + } /** - * Handle Member Access Sources like self.request.body - * Reuses isRequestAttributeAccess from tornado-util.ts to maintain consistency - * @param analyzer - * @param scope + * * @param node - * @param state - * @param info */ - triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { - if (Config.entryPointMode === 'ONLY_CUSTOM') return - if (isRequestAttributeAccess(node)) { - markTaintSource(info.res, { path: node || info.res.ast || {}, kind: 'PYTHON_INPUT' }) - } + private buildClassSymbol(node: any) { + const value: any = {} + node.body?.forEach((m: any) => { + if (m.type === 'FunctionDefinition') { + const name = m.name?.name || m.id?.name + if (name) { + value[name] = { + vtype: 'fclos', + fdef: m, + ast: m, + params: (m.parameters?.parameters || m.parameters || []).map((p: any) => ({ name: p.id?.name || p.name })), + } + } + } + }) + return { vtype: 'class', value, ast: node } } /** - * Extract route pairs from resolved symbol values (from argvalues) - * @param routeListSymVal - Symbol value representing route list - * @param currentFile - Current file path + * * @param analyzer * @param scope + * @param node * @param state - * @returns Array of route pairs with handler symbol values + * @param info */ - private extractRoutesFromSymbolValue( - routeListSymVal: any, - currentFile: string, - analyzer?: any, - scope?: any, - state?: any - ): Array { - if (!routeListSymVal) return [] - - // Handle list/tuple symbol values - if (routeListSymVal.vtype === 'list' || routeListSymVal.vtype === 'tuple' || routeListSymVal.vtype === 'array') { - const elements = routeListSymVal.value || [] - return elements.flatMap((element: any) => - this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) - ) - } - - // Handle object type that might be a list (e.g., when symbol interpretation returns object for list literals) - // Check if it has numeric keys (0, 1, 2, ...) which indicates it's an array-like object - if (routeListSymVal.vtype === 'object' && routeListSymVal.value) { - const keys = Object.keys(routeListSymVal.value).filter((k) => /^\d+$/.test(k)) - if (keys.length > 0) { - // It's an array-like object, extract elements by numeric keys - const elements = keys.map((k) => routeListSymVal.value[k]) - return elements.flatMap((element: any) => - this.extractRoutesFromSymbolValue(element, currentFile, analyzer, scope, state) - ) - } - } - - // Handle union types - if (routeListSymVal.vtype === 'union' && Array.isArray(routeListSymVal.value)) { - // Union type might represent a tuple (path, handler) - // Check if it has exactly 2 elements and try to extract as tuple - if (routeListSymVal.value.length === 2) { - const [pathSymVal, handlerSymVal] = routeListSymVal.value - const pathValue = this.extractStringFromSymbolValue(pathSymVal) - const handlerName = 'Handler' // Placeholder name - if (pathValue && handlerName) { - const file = - handlerSymVal?.ast?.loc?.sourcefile || - handlerSymVal?.fdef?.loc?.sourcefile || - handlerSymVal?.loc?.sourcefile || - currentFile - return [{ path: pathValue, handlerName, file, handlerSymVal }] - } - } - // Otherwise, recursively process each element - return routeListSymVal.value.flatMap((val: any) => - this.extractRoutesFromSymbolValue(val, currentFile, analyzer, scope, state) - ) - } - - // Handle tuple/route pair: (path, handler) - // Check if it's a tuple with 2 elements - if ( - routeListSymVal.vtype === 'tuple' && - Array.isArray(routeListSymVal.value) && - routeListSymVal.value.length >= 2 - ) { - const [pathSymVal, handlerSymVal] = routeListSymVal.value - const pathValue = this.extractStringFromSymbolValue(pathSymVal) - const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - if (pathValue && handlerName) { - const file = - handlerSymVal?.ast?.loc?.sourcefile || - handlerSymVal?.fdef?.loc?.sourcefile || - handlerSymVal?.loc?.sourcefile || - currentFile - return [{ path: pathValue, handlerName, file, handlerSymVal }] - } - } + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { + super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) + const { fclos, ret } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return - // Handle object type that represents a tuple (e.g., when tuple is represented as object with 0, 1 keys) + const name = node.callee?.property?.name || node.callee?.name if ( - routeListSymVal.vtype === 'object' && - routeListSymVal.value && - routeListSymVal.value['0'] && - routeListSymVal.value['1'] + tornadoSourceAPIs.has(name) || + (passthroughFuncs.has(name) && isRequestAttributeExpression(node.callee?.object)) ) { - const pathSymVal = routeListSymVal.value['0'] - const handlerSymVal = routeListSymVal.value['1'] - const pathValue = this.extractStringFromSymbolValue(pathSymVal) - const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - if (pathValue && handlerName) { - const file = - handlerSymVal?.ast?.loc?.sourcefile || - handlerSymVal?.fdef?.loc?.sourcefile || - handlerSymVal?.loc?.sourcefile || - currentFile - return [{ path: pathValue, handlerName, file, handlerSymVal }] - } - } - - // Handle list concatenation via BinaryExpression (e.g., app_routes + [...]) - const astNode = routeListSymVal.ast - if (astNode && astNode.type === 'BinaryExpression' && astNode.operator === '+') { - try { - const pairs: Array = [] - const leftVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.left, state) : null - if (leftVal) { - pairs.push(...this.extractRoutesFromSymbolValue(leftVal, currentFile, analyzer, scope, state)) - } - const rightVal = analyzer?.processInstruction ? analyzer.processInstruction(scope, astNode.right, state) : null - if (rightVal) { - pairs.push(...this.extractRoutesFromSymbolValue(rightVal, currentFile, analyzer, scope, state)) - } - if (pairs.length > 0) { - return pairs - } - } catch (e) { - // ignore and fallback to AST parse below - } - } - - return [] - } - - /** - * Extract string value from symbol value - * @param symVal - Symbol value - * @returns String value or null - */ - private extractStringFromSymbolValue(symVal: any): string | null { - if (!symVal) return null - - // Direct string value - if (symVal.vtype === 'string' || symVal.vtype === 'literal') { - return typeof symVal.value === 'string' ? symVal.value : null - } - - // From AST - if (symVal.ast && (symVal.ast.type === 'StringLiteral' || symVal.ast.type === 'Literal')) { - return typeof symVal.ast.value === 'string' ? symVal.ast.value : null - } - - return null - } - - /** - * Extract handler name/class from symbol value - * @param handlerSymVal - Handler symbol value - * @returns Handler name or null - */ - private extractHandlerNameFromSymbolValue(handlerSymVal: any): string | null { - if (!handlerSymVal) return null - - // If it's a class symbol value - if (handlerSymVal.vtype === 'class') { - // Try to get class name from AST - if (handlerSymVal.ast?.id?.name) { - return handlerSymVal.ast.id.name - } - if (handlerSymVal.ast?.name?.name) { - return handlerSymVal.ast.name.name - } - // Try from _sid or _qid - if (handlerSymVal._sid) { - return handlerSymVal._sid - } - if (handlerSymVal._qid) { - const parts = handlerSymVal._qid.split('.') - return parts[parts.length - 1] - } - } - - // If it's an identifier symbol value - if (handlerSymVal.vtype === 'identifier' || handlerSymVal.vtype === 'var') { - if (handlerSymVal._sid) { - return handlerSymVal._sid - } - if (handlerSymVal.ast?.name) { - return handlerSymVal.ast.name - } - } - - // From AST - if (handlerSymVal.ast) { - if (handlerSymVal.ast.type === 'Identifier') { - return handlerSymVal.ast.name - } - if (handlerSymVal.ast.type === 'ClassDefinition') { - return handlerSymVal.ast.id?.name || handlerSymVal.ast.name?.name || null - } + markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) } - - return null } /** - * Register EntryPoints and Path Param Sources - * [Fixed]: Removed Config check to forcefully register parameters as sources + * * @param analyzer - * @param handlerSymVal - * @param urlPattern - * @param classAst * @param scope + * @param node * @param state + * @param info */ - private emitHandlerEntrypoints( - analyzer: any, - handlerSymVal: any, - urlPattern: string, - classAst: any, - scope?: any, - state?: any - ) { - if (!handlerSymVal || handlerSymVal.vtype !== 'class') return - - const httpMethods = new Set(['get', 'post', 'put', 'delete', 'patch', 'head', 'options']) - const handlers = Object.entries(handlerSymVal.value).filter( - ([key, value]: [string, any]) => httpMethods.has(key) && value.vtype === 'fclos' - ) - - for (const [method, fclos] of handlers as any[]) { - if (fclos.fdef?.loc?.sourcefile?.endsWith('__init__.py')) continue - - let finalEp = fclos - if (scope && state && fclos.fdef) { - try { - const processed = analyzer.processInstruction(scope, fclos.fdef, state) - if (processed?.vtype === 'fclos') { - processed.parent = handlerSymVal - processed.params = fclos.params || extractParamsFromAst(fclos.fdef) - finalEp = processed - } - } catch (e) { - /* fallback */ - } - } - - if (!finalEp.value) finalEp.value = {} - finalEp.parent = handlerSymVal - if (handlerSymVal.vtype === 'class' && !handlerSymVal.field) handlerSymVal.field = {} - - try { - if (!finalEp.ast) finalEp.ast = finalEp.fdef - if (!finalEp.functionName) { - const rawName = finalEp.fdef?.name?.name || finalEp.fdef?.id?.name || finalEp.name || '' - const handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - finalEp.functionName = handlerName ? `${handlerName}.${rawName}` : rawName - } - if (!finalEp.filePath && finalEp.fdef?.loc?.sourcefile) { - finalEp.filePath = Config.maindir - ? FileUtil.extractRelativePath(finalEp.fdef.loc.sourcefile, Config.maindir) - : finalEp.fdef.loc.sourcefile - } - - const entryPoint = completeEntryPoint(finalEp) - entryPoint.urlPattern = urlPattern - entryPoint.handlerName = this.extractHandlerNameFromSymbolValue(handlerSymVal) - analyzer.entryPoints.push(entryPoint) - - // Register path parameters as sources - const params = extractTornadoParams(urlPattern) - const paramMetas = finalEp.params || extractParamsFromAst(finalEp.fdef) || [] - paramMetas.forEach((meta: any, idx: number) => { - if (meta.name === 'self') return - const isSource = - params.named.includes(meta.name) || (params.named.length === 0 && idx <= params.positionalCount) - if (isSource) { - const sourceEntry = { - path: meta.name, - kind: 'PYTHON_INPUT', - scopeFile: 'all', - scopeFunc: 'all', - locStart: 'all', - locEnd: 'all', - } - this.sourceScope.value.push(sourceEntry) - this.addSourceTagForSourceScope('PYTHON_INPUT', [sourceEntry]) - } - }) - } catch (e: any) { - logger.warn(`Error in entrypoint collection: ${e?.message || e}`) - } + triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { + if (Config.entryPointMode !== 'ONLY_CUSTOM' && isRequestAttributeAccess(node)) { + markTaintSource(info.res, { path: node, kind: 'PYTHON_INPUT' }) } } - - /** - * - * @param classNode - */ - private buildClassSymbol(classNode: any): any { - const value: any = {} - const members = classNode.body || [] - const className = classNode.name?.name || classNode.id?.name || 'UnknownClass' - members.forEach((member: any) => { - if (member.type !== 'FunctionDefinition') return - const memberName = member.name?.name || member.name?.id?.name || member.id?.name - if (memberName) { - value[memberName] = { - vtype: 'fclos', - fdef: member, - ast: member, - params: extractParamsFromAst(member), - } - } - }) - return { vtype: 'class', value, ast: classNode } - } } export = TornadoTaintChecker diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index ec553a3e..869058d2 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -1,13 +1,13 @@ -const path = require('path') -const AstUtil = require('../../../util/ast-util') - export interface ParamMeta { name: string locStart: number | 'all' locEnd: number | 'all' } -export const tornadoSourceAPIs = new Set([ +/** + * Tornado Source APIs + */ +export const tornadoSourceAPIs = new Set([ 'get_argument', 'get_query_argument', 'get_body_argument', @@ -19,39 +19,25 @@ export const tornadoSourceAPIs = new Set([ 'get_json_body', ]) -export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) +export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) /** - * Detect if node is an access to a Tornado request attribute (e.g., self.request.body) + * Detect if node is an access to a Tornado request attribute * @param node */ export function isRequestAttributeAccess(node: any): boolean { if (node?.type !== 'MemberAccess') return false - const propName = node.property?.name const inner = node.object - if (inner?.type !== 'MemberAccess') return false - const baseName = inner.object?.name - const requestName = inner.property?.name return ( - baseName === 'self' && - requestName === 'request' && - [ - 'body', - 'query', - 'headers', - 'cookies', - 'files', - 'uri', - 'path', - 'arguments', - 'query_arguments', - 'body_arguments', - ].includes(propName) + inner?.type === 'MemberAccess' && + inner.object?.name === 'self' && + inner.property?.name === 'request' && + ['body', 'query', 'headers', 'cookies', 'files', 'uri', 'path', 'arguments'].includes(node.property?.name) ) } /** - * Detect if expression involves a Tornado request attribute + * * @param expr */ export function isRequestAttributeExpression(expr: any): boolean { @@ -64,97 +50,36 @@ export function isRequestAttributeExpression(expr: any): boolean { } /** - * Check if node is a Tornado Application or handler call + * Check if node is a Tornado Application call * @param node * @param targetName */ export function isTornadoCall(node: any, targetName: string): boolean { - if (!node || node.type !== 'CallExpression' || !node.callee) return false + if (!node || node.type !== 'CallExpression') return false const { callee } = node - if (callee.type === 'MemberAccess' && callee.property?.name === targetName) { - return true - } - if (callee.type === 'Identifier' && callee.name === targetName) { - return true - } - // Handle pattern: tornado.web.Application.__init__(self, handlers, ...) - // In this case, we need to check if 'Application' is in the member access chain - // and the final property is '__init__' - const propName = callee.property?.name - if (callee.type === 'MemberAccess' && (propName === '__init__' || propName === '_CTOR_')) { - // Check if any part of the member access chain matches the targetName + if (callee.name === targetName || callee.property?.name === targetName) return true + + // Handle __init__ pattern + if (['__init__', '_CTOR_'].includes(callee.property?.name)) { let current = callee.object while (current) { - if (current.type === 'Identifier' && current.name === targetName) { - return true - } - if (current.type === 'MemberAccess' && current.property?.name === targetName) { - return true - } - // Follow through super() or other calls - if (current.type === 'CallExpression') { - current = current.callee - } else if (current.type === 'MemberAccess') { - current = current.object - } else { - current = null - } + if (current.name === targetName || current.property?.name === targetName) return true + current = current.object || current.callee } } return false } /** - * Extract parameters from function AST - * @param funcNode - */ -export function extractParamsFromAst(funcNode: any): ParamMeta[] { - if (!funcNode) return [] - const rawParams = Array.isArray(funcNode?.parameters?.parameters) - ? funcNode.parameters.parameters - : Array.isArray(funcNode?.parameters) - ? funcNode.parameters - : [] - const fallbackLine = typeof funcNode?.loc?.start?.line === 'number' ? funcNode.loc.start.line : 'all' - const result: ParamMeta[] = [] - for (const param of rawParams) { - const name = param?.id?.name || param?.name - if (!name) continue - const locStart = typeof param?.loc?.start?.line === 'number' ? param.loc.start.line : fallbackLine - const locEnd = typeof param?.loc?.end?.line === 'number' ? param.loc.end.line : fallbackLine - result.push({ name, locStart, locEnd }) - } - return result -} - -/** - * Extract named parameter names or positional count from Tornado URL patterns (regex) - * Supports pattern like (?P...) or (...) - * @param pattern - Tornado URL regex pattern + * Extract parameter info from URL regex patterns + * @param pattern */ export function extractTornadoParams(pattern: string): { named: string[]; positionalCount: number } { if (!pattern) return { named: [], positionalCount: 0 } + const named = Array.from(pattern.matchAll(/\(\?P<(\w+)>/g)).map((m) => m[1]) + if (named.length > 0) return { named, positionalCount: 0 } - const namedGroups: string[] = [] - const namedRegex = /\(\?P<(\w+)>/g - let match: RegExpExecArray | null - while ((match = namedRegex.exec(pattern)) !== null) { - namedGroups.push(match[1]) - } - - if (namedGroups.length > 0) { - return { named: namedGroups, positionalCount: 0 } - } - - // Count positional groups. - // Remove escaped parens first. const cleaned = pattern.replace(/\\\(|\\\)/g, '') - let positionalCount = 0 - // Matches '(' NOT followed by '?' (which covers (?:, (?P<, (?=, (?!, etc.) - const positionalRegex = /\((?!\?)/g - while (positionalRegex.exec(cleaned) !== null) { - positionalCount++ - } - + const positionalCount = (cleaned.match(/\((?!\?)/g) || []).length return { named: [], positionalCount } } From c8ff9dc2733d379887d612f41ab04c1cf8dad0e5 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 12 Jan 2026 06:09:32 -0800 Subject: [PATCH 19/35] Fix:update-tornado --- .../taint/python/tornado-taint-checker.ts | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 7635ec22..aae4f0fb 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -81,13 +81,13 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } - // 2. Try to extract from Object/Tuple/URLSpec + // 2. Try to extract from Object/URLSpec/List-like let path: string | undefined let h: any - if ((val.vtype === 'object' || val.vtype === 'tuple') && val.value) { - const pVal = val.value['0'] || val.value.regex || val.value._pattern - h = val.value['1'] || val.value.handler_class || val.value._handler_class + if (val.vtype === 'object' && val.value) { + const pVal = val.value['0'] || val.value.regex + h = val.value['1'] || val.value.handler_class path = pVal?.value || pVal?.ast?.value } else if (Array.isArray(val.value)) { const pVal = val.value[0] @@ -118,12 +118,11 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } - // 4. Fallback: Collections - if (['list', 'tuple', 'object'].includes(val.vtype) || (val.vtype === 'object' && val.value)) { + // 4. Fallback: Collections (Recurse into lists/tuples) + if (val.vtype === 'object' || val.vtype === 'union') { const items = Array.isArray(val.value) ? val.value : Object.values(val.value || {}) const isLikelyCollection = - Array.isArray(val.value) || - (val.vtype === 'object' && Object.keys(val.value || {}).some((k) => /^\d+$/.test(k))) + Array.isArray(val.value) || Object.keys(val.value || {}).some((k) => /^\d+$/.test(k)) if (isLikelyCollection && items.length > 0) { items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) @@ -151,7 +150,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } } - if (h?.vtype === 'class') { + if (path && h) { this.registerEntryPoints(analyzer, h, path) } } @@ -159,6 +158,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { /** * * @param analyzer + * @param scope + * @param state + * @param path * @param cls * @param path */ @@ -170,12 +172,12 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (methods.includes(name) && fclos.vtype === 'fclos') { const ep = completeEntryPoint(fclos) ep.urlPattern = path - ep.handlerName = cls.ast?.id?.name || cls._sid || 'Unknown' + ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' analyzer.entryPoints.push(ep) const info = extractTornadoParams(path) let paramIdx = 0 - const actualParams = (fclos.params || fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] + const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] actualParams.forEach((p: any) => { const pName = p.name || p.id?.name if (pName === 'self') return @@ -209,7 +211,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { vtype: 'fclos', fdef: m, ast: m, - params: (m.parameters?.parameters || m.parameters || []).map((p: any) => ({ name: p.id?.name || p.name })), + params: (m.parameters || []).map((p: any) => ({ name: p.id?.name || p.name })), } } } From 39669e6c94abaa33a0f3f56e79ee3cab02c51579 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 12 Jan 2026 18:13:59 -0800 Subject: [PATCH 20/35] Fix: update-tornado-framework --- src/engine/analyzer/python/common/python-analyzer.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index bc0af018..40e3f740 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -576,7 +576,11 @@ class PythonAnalyzer extends (Analyzer as any) { resolved_prop.name = '_CTOR_' } if (!resolved_prop) return defscope - return this.getMemberValue(defscope, resolved_prop, state) + const res = this.getMemberValue(defscope, resolved_prop, state) + if (this.checkerManager && (this.checkerManager as any).checkAtMemberAccess) { + this.checkerManager.checkAtMemberAccess(this, defscope, node, state, { res }) + } + return res } /** From 8d19c17e57c0bcdec75e865c1dcaaadf0f12e952 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 12 Jan 2026 18:30:36 -0800 Subject: [PATCH 21/35] Fix: update tornado --- .../taint/python/tornado-taint-checker.ts | 20 +++--------------- src/checker/taint/python/tornado-util.ts | 21 ------------------- 2 files changed, 3 insertions(+), 38 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index aae4f0fb..89749862 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -2,14 +2,7 @@ const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') const { markTaintSource } = require('../common-kit/source-util') -const { - isTornadoCall, - tornadoSourceAPIs, - passthroughFuncs, - isRequestAttributeExpression, - isRequestAttributeAccess, - extractTornadoParams, -} = require('./tornado-util') +const { isTornadoCall, tornadoSourceAPIs, isRequestAttributeAccess, extractTornadoParams } = require('./tornado-util') /** * Tornado Taint Checker - Simplified @@ -121,8 +114,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 4. Fallback: Collections (Recurse into lists/tuples) if (val.vtype === 'object' || val.vtype === 'union') { const items = Array.isArray(val.value) ? val.value : Object.values(val.value || {}) - const isLikelyCollection = - Array.isArray(val.value) || Object.keys(val.value || {}).some((k) => /^\d+$/.test(k)) + const isLikelyCollection = Array.isArray(val.value) || Object.keys(val.value || {}).some((k) => /^\d+$/.test(k)) if (isLikelyCollection && items.length > 0) { items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) @@ -158,9 +150,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { /** * * @param analyzer - * @param scope - * @param state - * @param path * @param cls * @param path */ @@ -233,10 +222,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return const name = node.callee?.property?.name || node.callee?.name - if ( - tornadoSourceAPIs.has(name) || - (passthroughFuncs.has(name) && isRequestAttributeExpression(node.callee?.object)) - ) { + if (tornadoSourceAPIs.has(name)) { markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) } } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 869058d2..a050f484 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -1,9 +1,3 @@ -export interface ParamMeta { - name: string - locStart: number | 'all' - locEnd: number | 'all' -} - /** * Tornado Source APIs */ @@ -19,8 +13,6 @@ export const tornadoSourceAPIs = new Set([ 'get_json_body', ]) -export const passthroughFuncs = new Set(['decode', 'strip', 'replace', 'lower', 'upper', 'split']) - /** * Detect if node is an access to a Tornado request attribute * @param node @@ -36,19 +28,6 @@ export function isRequestAttributeAccess(node: any): boolean { ) } -/** - * - * @param expr - */ -export function isRequestAttributeExpression(expr: any): boolean { - if (!expr) return false - if (expr.type === 'MemberAccess') return isRequestAttributeAccess(expr) - if (expr.type === 'CallExpression' && expr.callee?.type === 'MemberAccess') { - return isRequestAttributeAccess(expr.callee.object) - } - return false -} - /** * Check if node is a Tornado Application call * @param node From 61c8b5f690036f88c735edb7446e3236b7a44397 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 12 Jan 2026 19:21:53 -0800 Subject: [PATCH 22/35] Fix: update tornado-framework --- src/checker/taint/python/tornado-taint-checker.ts | 15 +++++++++++---- .../analyzer/python/common/python-analyzer.ts | 15 --------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 89749862..a0aff32a 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -43,10 +43,17 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return let routes = null - if (isTornadoCall(node, 'Application')) routes = argvalues[0] - else if (isTornadoCall(node, 'add_handlers')) routes = argvalues[1] - - if (routes) this.processRoutes(analyzer, scope, state, routes) + const isApp = isTornadoCall(node, 'Application') + const isAdd = isTornadoCall(node, 'add_handlers') + if (isApp) { + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name) + routes = isInit ? argvalues[1] : argvalues[0] + } else if (isAdd) { + routes = argvalues[1] + } + if (routes) { + this.processRoutes(analyzer, scope, state, routes) + } } /** diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 40e3f740..47833e07 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -238,21 +238,6 @@ class PythonAnalyzer extends (Analyzer as any) { const has_tag = (new_left && new_left.hasTagRec) || (new_right && new_right.hasTagRec) if (has_tag) { new_node.hasTagRec = has_tag - // Propagate tags and trace - new_node._tags = new Set() - if (new_left?._tags) { - for (const t of new_left._tags) new_node._tags.add(t) - } - if (new_right?._tags) { - for (const t of new_right._tags) new_node._tags.add(t) - } - - // Merge traces if possible, or just take one if not - if (new_left?.trace || new_right?.trace) { - new_node.trace = [] - if (new_left?.trace) new_node.trace.push(...new_left.trace) - if (new_right?.trace) new_node.trace.push(...new_right.trace) - } } if (this.checkerManager && (this.checkerManager as any).checkAtBinaryOperation) From f84b50fb47b0278ef8734ef678ffb5fdb66d55f6 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Tue, 13 Jan 2026 01:24:27 -0800 Subject: [PATCH 23/35] Fix:update tornado --- .../taint/python/tornado-taint-checker.ts | 72 ++++++++++++++++--- src/checker/taint/python/tornado-util.ts | 26 +++++-- 2 files changed, 83 insertions(+), 15 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index a0aff32a..49d6d09b 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -69,7 +69,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 1. Handle Union - Try to see if the union itself is a route (flattened tuple) if (val.vtype === 'union' && Array.isArray(val.value)) { const pathVal = val.value.find((v: any) => typeof (v.value || v.ast?.value) === 'string') - const hVal = val.value.find((v: any) => v.vtype === 'class' || v.ast?.type === 'ClassDefinition') + const hVal = val.value.find( + (v: any) => v.vtype === 'class' || v.ast?.type === 'ClassDefinition' || v.vtype === 'object' + ) if (pathVal && hVal) { const path = pathVal.value || pathVal.ast?.value @@ -81,33 +83,73 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } - // 2. Try to extract from Object/URLSpec/List-like + // 2. Try to extract from Object/URLSpec/List-like/RuleRouter/Rule let path: string | undefined let h: any if (val.vtype === 'object' && val.value) { - const pVal = val.value['0'] || val.value.regex - h = val.value['1'] || val.value.handler_class + // Handle RuleRouter and Rule specifically if they are objects + const isRouteObject = isTornadoCall(val.ast, 'RuleRouter') || isTornadoCall(val.ast, 'Rule') + if (isRouteObject) { + const rules = val.value['0'] || val.value.rules || val.value.target || val.value.handler + if (rules) { + this.processRoutes(analyzer, scope, state, rules) + return + } + } + + const pVal = val.value['0'] || val.value.regex || val.value.matcher + h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler path = pVal?.value || pVal?.ast?.value + + // If matcher is PathMatches(r"...") + if ( + !path && + pVal?.ast?.type === 'CallExpression' && + (pVal.ast.callee.name === 'PathMatches' || pVal.ast.callee.property?.name === 'PathMatches') + ) { + path = pVal.ast.arguments?.[0]?.value + } } else if (Array.isArray(val.value)) { const pVal = val.value[0] h = val.value[1] path = pVal?.value || pVal?.ast?.value } - if (typeof path === 'string' && h) { - this.finishRoute(analyzer, scope, state, h, path) - return + if (h) { + // If h is an instance (object), we might need to look for its handlers recursively + if (h.vtype === 'object' && h.value) { + // If it's another Application or Router instance + // We can try to see if it has internal handlers or rules + const innerRoutes = h.value.handlers || h.value.rules + if (innerRoutes) { + this.processRoutes(analyzer, scope, state, innerRoutes) + // Note: We don't return here because finishRoute might still be needed for direct handlers + } + } + + if (typeof path === 'string') { + this.finishRoute(analyzer, scope, state, h, path) + return + } } - // 3. Handle Symbol or Call (like tornado.web.url) + // 3. Handle Symbol or Call (like tornado.web.url, RuleRouter, Rule) if (val.ast?.type === 'CallExpression') { const { callee } = val.ast const name = callee.property?.name || callee.name - if (name === 'url' || name === 'URLSpec') { + if (name === 'url' || name === 'URLSpec' || name === 'Rule') { const args = val.ast.arguments if (args && args.length >= 2) { - const p = args[0].value + let p = args[0].value + // Handle PathMatches(r"...") + if (typeof p !== 'string' && args[0].type === 'CallExpression') { + const innerCallee = args[0].callee.property?.name || args[0].callee.name + if (innerCallee === 'PathMatches') { + p = args[0].arguments?.[0]?.value + } + } + const hNode = args[1] if (typeof p === 'string') { const resolvedH = analyzer.processInstruction(scope, hNode, state) @@ -115,6 +157,14 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } } + } else if (name === 'RuleRouter') { + const args = val.ast.arguments + if (args && args.length >= 1) { + const routesNode = args[0] + const resolvedRoutes = analyzer.processInstruction(scope, routesNode, state) + this.processRoutes(analyzer, scope, state, resolvedRoutes || { ast: routesNode }) + return + } } } @@ -175,7 +225,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { let paramIdx = 0 const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] actualParams.forEach((p: any) => { - const pName = p.name || p.id?.name + const pName = p.id?.name || p.name if (pName === 'self') return paramIdx++ if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index a050f484..6c34683a 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -10,7 +10,6 @@ export const tornadoSourceAPIs = new Set([ 'get_cookie', 'get_secure_cookie', 'get_arguments', - 'get_json_body', ]) /** @@ -22,9 +21,23 @@ export function isRequestAttributeAccess(node: any): boolean { const inner = node.object return ( inner?.type === 'MemberAccess' && + inner.object?.type === 'Identifier' && inner.object?.name === 'self' && inner.property?.name === 'request' && - ['body', 'query', 'headers', 'cookies', 'files', 'uri', 'path', 'arguments'].includes(node.property?.name) + [ + 'body', + 'query', + 'headers', + 'cookies', + 'files', + 'uri', + 'path', + 'arguments', + 'remote_ip', + 'host', + 'query_arguments', + 'body_arguments', + ].includes(node.property?.name) ) } @@ -36,13 +49,18 @@ export function isRequestAttributeAccess(node: any): boolean { export function isTornadoCall(node: any, targetName: string): boolean { if (!node || node.type !== 'CallExpression') return false const { callee } = node - if (callee.name === targetName || callee.property?.name === targetName) return true + const names = [targetName] + if (targetName === 'Application') { + names.push('RuleRouter', 'Rule') + } + + if (names.includes(callee.name) || names.includes(callee.property?.name)) return true // Handle __init__ pattern if (['__init__', '_CTOR_'].includes(callee.property?.name)) { let current = callee.object while (current) { - if (current.name === targetName || current.property?.name === targetName) return true + if (names.includes(current.name) || names.includes(current.property?.name)) return true current = current.object || current.callee } } From 15da4c156c9e2728a846cfbc9adef247a760ad02 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 19 Jan 2026 00:30:21 -0800 Subject: [PATCH 24/35] Fix: update tornado framework --- .../taint/python/tornado-taint-checker.ts | 54 +++++++------------ src/checker/taint/python/tornado-util.ts | 5 -- 2 files changed, 19 insertions(+), 40 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 49d6d09b..b6343c0f 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -66,19 +66,19 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { private processRoutes(analyzer: any, scope: any, state: any, val: any) { if (!val) return - // 1. Handle Union - Try to see if the union itself is a route (flattened tuple) + // 1. Handle Union - Process all elements in the union if (val.vtype === 'union' && Array.isArray(val.value)) { + // Try to see if this union represents a single route (flattened tuple/list) const pathVal = val.value.find((v: any) => typeof (v.value || v.ast?.value) === 'string') const hVal = val.value.find( (v: any) => v.vtype === 'class' || v.ast?.type === 'ClassDefinition' || v.vtype === 'object' ) - if (pathVal && hVal) { const path = pathVal.value || pathVal.ast?.value this.finishRoute(analyzer, scope, state, hVal, path) return } - + // Otherwise recurse into each element val.value.forEach((v: any) => this.processRoutes(analyzer, scope, state, v)) return } @@ -86,10 +86,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { // 2. Try to extract from Object/URLSpec/List-like/RuleRouter/Rule let path: string | undefined let h: any - if (val.vtype === 'object' && val.value) { - // Handle RuleRouter and Rule specifically if they are objects const isRouteObject = isTornadoCall(val.ast, 'RuleRouter') || isTornadoCall(val.ast, 'Rule') + // Handle RuleRouter and Rule specifically if (isRouteObject) { const rules = val.value['0'] || val.value.rules || val.value.target || val.value.handler if (rules) { @@ -101,7 +100,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const pVal = val.value['0'] || val.value.regex || val.value.matcher h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler path = pVal?.value || pVal?.ast?.value - // If matcher is PathMatches(r"...") if ( !path && @@ -111,30 +109,32 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { path = pVal.ast.arguments?.[0]?.value } } else if (Array.isArray(val.value)) { - const pVal = val.value[0] + path = val.value[0]?.value || val.value[0]?.ast?.value h = val.value[1] - path = pVal?.value || pVal?.ast?.value } - if (h) { // If h is an instance (object), we might need to look for its handlers recursively if (h.vtype === 'object' && h.value) { - // If it's another Application or Router instance - // We can try to see if it has internal handlers or rules const innerRoutes = h.value.handlers || h.value.rules if (innerRoutes) { this.processRoutes(analyzer, scope, state, innerRoutes) - // Note: We don't return here because finishRoute might still be needed for direct handlers } } - if (typeof path === 'string') { this.finishRoute(analyzer, scope, state, h, path) return } } - - // 3. Handle Symbol or Call (like tornado.web.url, RuleRouter, Rule) + // Handle nested collections in objects (like lists of routes) + if (val.vtype === 'object' && val.value) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + const isLikelyCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + if (isLikelyCollection) { + items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) + return + } + } + // 3. Handle Direct Call (like tornado.web.url, RuleRouter, Rule) if (val.ast?.type === 'CallExpression') { const { callee } = val.ast const name = callee.property?.name || callee.name @@ -142,41 +142,26 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const args = val.ast.arguments if (args && args.length >= 2) { let p = args[0].value - // Handle PathMatches(r"...") if (typeof p !== 'string' && args[0].type === 'CallExpression') { const innerCallee = args[0].callee.property?.name || args[0].callee.name if (innerCallee === 'PathMatches') { p = args[0].arguments?.[0]?.value } } - - const hNode = args[1] if (typeof p === 'string') { + const hNode = args[1] const resolvedH = analyzer.processInstruction(scope, hNode, state) this.finishRoute(analyzer, scope, state, resolvedH || { ast: hNode }, p) - return } } } else if (name === 'RuleRouter') { const args = val.ast.arguments if (args && args.length >= 1) { - const routesNode = args[0] - const resolvedRoutes = analyzer.processInstruction(scope, routesNode, state) - this.processRoutes(analyzer, scope, state, resolvedRoutes || { ast: routesNode }) - return + const resolvedRoutes = analyzer.processInstruction(scope, args[0], state) + this.processRoutes(analyzer, scope, state, resolvedRoutes || { ast: args[0] }) } } } - - // 4. Fallback: Collections (Recurse into lists/tuples) - if (val.vtype === 'object' || val.vtype === 'union') { - const items = Array.isArray(val.value) ? val.value : Object.values(val.value || {}) - const isLikelyCollection = Array.isArray(val.value) || Object.keys(val.value || {}).some((k) => /^\d+$/.test(k)) - - if (isLikelyCollection && items.length > 0) { - items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) - } - } } /** @@ -251,13 +236,12 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const value: any = {} node.body?.forEach((m: any) => { if (m.type === 'FunctionDefinition') { - const name = m.name?.name || m.id?.name + const name = m.id?.name || m.name?.name if (name) { value[name] = { vtype: 'fclos', fdef: m, ast: m, - params: (m.parameters || []).map((p: any) => ({ name: p.id?.name || p.name })), } } } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 6c34683a..d6b0526c 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -50,12 +50,7 @@ export function isTornadoCall(node: any, targetName: string): boolean { if (!node || node.type !== 'CallExpression') return false const { callee } = node const names = [targetName] - if (targetName === 'Application') { - names.push('RuleRouter', 'Rule') - } - if (names.includes(callee.name) || names.includes(callee.property?.name)) return true - // Handle __init__ pattern if (['__init__', '_CTOR_'].includes(callee.property?.name)) { let current = callee.object From 2682e6a36764ef30a81b14c55cf174405c140804 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Mon, 19 Jan 2026 02:42:59 -0800 Subject: [PATCH 25/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 30 +++++++++---------- src/checker/taint/python/tornado-util.ts | 9 +++--- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index b6343c0f..e3b12f90 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -41,13 +41,13 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) const { fclos, argvalues } = info if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return - let routes = null const isApp = isTornadoCall(node, 'Application') const isAdd = isTornadoCall(node, 'add_handlers') - if (isApp) { + const isRouter = isTornadoCall(node, 'RuleRouter') + if (isApp || isRouter) { const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name) - routes = isInit ? argvalues[1] : argvalues[0] + routes = (isApp || isRouter) && isInit ? argvalues[1] : argvalues[0] } else if (isAdd) { routes = argvalues[1] } @@ -87,16 +87,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { let path: string | undefined let h: any if (val.vtype === 'object' && val.value) { - const isRouteObject = isTornadoCall(val.ast, 'RuleRouter') || isTornadoCall(val.ast, 'Rule') - // Handle RuleRouter and Rule specifically - if (isRouteObject) { - const rules = val.value['0'] || val.value.rules || val.value.target || val.value.handler + const isRouter = isTornadoCall(val.ast, 'RuleRouter') + const isRule = isTornadoCall(val.ast, 'Rule') || isTornadoCall(val.ast, 'URLSpec') + + if (isRouter) { + const rules = val.value['0'] || val.value.rules if (rules) { this.processRoutes(analyzer, scope, state, rules) return } } - const pVal = val.value['0'] || val.value.regex || val.value.matcher h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler path = pVal?.value || pVal?.ast?.value @@ -125,16 +125,19 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } } - // Handle nested collections in objects (like lists of routes) - if (val.vtype === 'object' && val.value) { - const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + + // 3. Handle nested collections (like lists of routes) + const items = + val.vtype === 'object' && val.value ? (Array.isArray(val.value) ? val.value : Object.values(val.value)) : null + if (items) { const isLikelyCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) if (isLikelyCollection) { items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) return } } - // 3. Handle Direct Call (like tornado.web.url, RuleRouter, Rule) + + // 4. Handle Direct Call (like tornado.web.url, RuleRouter, Rule) if (val.ast?.type === 'CallExpression') { const { callee } = val.ast const name = callee.property?.name || callee.name @@ -183,7 +186,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { h = this.buildClassSymbol(h.ast) } } - if (path && h) { this.registerEntryPoints(analyzer, h, path) } @@ -205,7 +207,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { ep.urlPattern = path ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' analyzer.entryPoints.push(ep) - const info = extractTornadoParams(path) let paramIdx = 0 const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] @@ -261,7 +262,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) const { fclos, ret } = info if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return - const name = node.callee?.property?.name || node.callee?.name if (tornadoSourceAPIs.has(name)) { markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index d6b0526c..e82e46cf 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -49,13 +49,13 @@ export function isRequestAttributeAccess(node: any): boolean { export function isTornadoCall(node: any, targetName: string): boolean { if (!node || node.type !== 'CallExpression') return false const { callee } = node - const names = [targetName] - if (names.includes(callee.name) || names.includes(callee.property?.name)) return true + if (callee.name === targetName || callee.property?.name === targetName) return true // Handle __init__ pattern - if (['__init__', '_CTOR_'].includes(callee.property?.name)) { + const funcName = callee.property?.name || callee.name + if (['__init__', '_CTOR_'].includes(funcName)) { let current = callee.object while (current) { - if (names.includes(current.name) || names.includes(current.property?.name)) return true + if (current.name === targetName || current.property?.name === targetName) return true current = current.object || current.callee } } @@ -70,7 +70,6 @@ export function extractTornadoParams(pattern: string): { named: string[]; positi if (!pattern) return { named: [], positionalCount: 0 } const named = Array.from(pattern.matchAll(/\(\?P<(\w+)>/g)).map((m) => m[1]) if (named.length > 0) return { named, positionalCount: 0 } - const cleaned = pattern.replace(/\\\(|\\\)/g, '') const positionalCount = (cleaned.match(/\((?!\?)/g) || []).length return { named: [], positionalCount } From fee55b1fe99d80ff3924268d81bcca63b8531e93 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Tue, 20 Jan 2026 05:04:54 -0800 Subject: [PATCH 26/35] Fix: update tornado framework --- .../taint/python/tornado-taint-checker.ts | 52 +++++++------------ src/checker/taint/python/tornado-util.ts | 3 +- 2 files changed, 21 insertions(+), 34 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index e3b12f90..e242f672 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -46,8 +46,9 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const isAdd = isTornadoCall(node, 'add_handlers') const isRouter = isTornadoCall(node, 'RuleRouter') if (isApp || isRouter) { - const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name) - routes = (isApp || isRouter) && isInit ? argvalues[1] : argvalues[0] + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) + routes = (isApp || isRouter) && (isInit || !node.callee?.property) ? argvalues[1] : argvalues[0] + if (!routes && argvalues[0]) routes = argvalues[0] } else if (isAdd) { routes = argvalues[1] } @@ -83,29 +84,20 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } - // 2. Try to extract from Object/URLSpec/List-like/RuleRouter/Rule + // 2. Try to extract from Object/URLSpec/List-like/Rule let path: string | undefined let h: any - if (val.vtype === 'object' && val.value) { - const isRouter = isTornadoCall(val.ast, 'RuleRouter') - const isRule = isTornadoCall(val.ast, 'Rule') || isTornadoCall(val.ast, 'URLSpec') - - if (isRouter) { - const rules = val.value['0'] || val.value.rules - if (rules) { - this.processRoutes(analyzer, scope, state, rules) - return - } - } + if ((val.vtype === 'object' || val.vtype === 'symbol') && val.value) { + const isRule = + isTornadoCall(val.ast, 'Rule') || + isTornadoCall(val.ast, 'URLSpec') || + val.sid?.includes('Rule') || + val.sid?.includes('URLSpec') const pVal = val.value['0'] || val.value.regex || val.value.matcher h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler path = pVal?.value || pVal?.ast?.value // If matcher is PathMatches(r"...") - if ( - !path && - pVal?.ast?.type === 'CallExpression' && - (pVal.ast.callee.name === 'PathMatches' || pVal.ast.callee.property?.name === 'PathMatches') - ) { + if (!path && isTornadoCall(pVal?.ast, 'PathMatches')) { path = pVal.ast.arguments?.[0]?.value } } else if (Array.isArray(val.value)) { @@ -130,24 +122,25 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const items = val.vtype === 'object' && val.value ? (Array.isArray(val.value) ? val.value : Object.values(val.value)) : null if (items) { - const isLikelyCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + const isLikelyCollection = + Array.isArray(val.value) || + (val.value && typeof val.value === 'object' && Object.keys(val.value).some((k) => /^\d+$/.test(k))) if (isLikelyCollection) { items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) return } } - // 4. Handle Direct Call (like tornado.web.url, RuleRouter, Rule) + // 4. Handle Direct Call (like tornado.web.url, URLSpec, Rule) if (val.ast?.type === 'CallExpression') { - const { callee } = val.ast - const name = callee.property?.name || callee.name - if (name === 'url' || name === 'URLSpec' || name === 'Rule') { + const isUrl = isTornadoCall(val.ast, 'url') + const isRule = isTornadoCall(val.ast, 'Rule') || isTornadoCall(val.ast, 'URLSpec') + if (isUrl || isRule) { const args = val.ast.arguments if (args && args.length >= 2) { let p = args[0].value if (typeof p !== 'string' && args[0].type === 'CallExpression') { - const innerCallee = args[0].callee.property?.name || args[0].callee.name - if (innerCallee === 'PathMatches') { + if (isTornadoCall(args[0], 'PathMatches')) { p = args[0].arguments?.[0]?.value } } @@ -157,12 +150,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { this.finishRoute(analyzer, scope, state, resolvedH || { ast: hNode }, p) } } - } else if (name === 'RuleRouter') { - const args = val.ast.arguments - if (args && args.length >= 1) { - const resolvedRoutes = analyzer.processInstruction(scope, args[0], state) - this.processRoutes(analyzer, scope, state, resolvedRoutes || { ast: args[0] }) - } } } } @@ -177,7 +164,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { if (!h) return - if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] if (h.vtype !== 'class' && h.ast?.type === 'ClassDefinition') { try { diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index e82e46cf..32504dcf 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -50,8 +50,8 @@ export function isTornadoCall(node: any, targetName: string): boolean { if (!node || node.type !== 'CallExpression') return false const { callee } = node if (callee.name === targetName || callee.property?.name === targetName) return true - // Handle __init__ pattern const funcName = callee.property?.name || callee.name + if (funcName === targetName) return true if (['__init__', '_CTOR_'].includes(funcName)) { let current = callee.object while (current) { @@ -59,6 +59,7 @@ export function isTornadoCall(node: any, targetName: string): boolean { current = current.object || current.callee } } + return false } From a1721c2f8abaa94227272ba6a42fb367194f2f4d Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 21 Jan 2026 18:26:43 -0800 Subject: [PATCH 27/35] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 15 +++++++++------ src/checker/taint/python/tornado-util.ts | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index e242f672..b3409af9 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -93,12 +93,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { isTornadoCall(val.ast, 'URLSpec') || val.sid?.includes('Rule') || val.sid?.includes('URLSpec') - const pVal = val.value['0'] || val.value.regex || val.value.matcher - h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler - path = pVal?.value || pVal?.ast?.value - // If matcher is PathMatches(r"...") - if (!path && isTornadoCall(pVal?.ast, 'PathMatches')) { - path = pVal.ast.arguments?.[0]?.value + + if (isRule) { + const pVal = val.value['0'] || val.value.regex || val.value.matcher + h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler + path = pVal?.value || pVal?.ast?.value + // If matcher is PathMatches(r"...") + if (!path && isTornadoCall(pVal?.ast, 'PathMatches')) { + path = pVal.ast.arguments?.[0]?.value + } } } else if (Array.isArray(val.value)) { path = val.value[0]?.value || val.value[0]?.ast?.value diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 32504dcf..35d2dd3b 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -10,6 +10,8 @@ export const tornadoSourceAPIs = new Set([ 'get_cookie', 'get_secure_cookie', 'get_arguments', + 'get_json_body', + , ]) /** From f4428dabf4a48573b6690dfb6c455192544440f0 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 21 Jan 2026 18:41:02 -0800 Subject: [PATCH 28/35] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 2 +- src/checker/taint/python/tornado-util.ts | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index b3409af9..ab207e7c 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -47,7 +47,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const isRouter = isTornadoCall(node, 'RuleRouter') if (isApp || isRouter) { const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) - routes = (isApp || isRouter) && (isInit || !node.callee?.property) ? argvalues[1] : argvalues[0] + routes = (isApp || isRouter) && isInit ? argvalues[1] : argvalues[0] if (!routes && argvalues[0]) routes = argvalues[0] } else if (isAdd) { routes = argvalues[1] diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 35d2dd3b..c8bb4f7f 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -11,7 +11,6 @@ export const tornadoSourceAPIs = new Set([ 'get_secure_cookie', 'get_arguments', 'get_json_body', - , ]) /** @@ -53,7 +52,6 @@ export function isTornadoCall(node: any, targetName: string): boolean { const { callee } = node if (callee.name === targetName || callee.property?.name === targetName) return true const funcName = callee.property?.name || callee.name - if (funcName === targetName) return true if (['__init__', '_CTOR_'].includes(funcName)) { let current = callee.object while (current) { From 0d63bafd672b983380a40e01c2a4f57dd11c38df Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 21 Jan 2026 19:03:22 -0800 Subject: [PATCH 29/35] Fix: update tornado --- src/checker/taint/python/tornado-taint-checker.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index ab207e7c..db33943b 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -47,8 +47,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const isRouter = isTornadoCall(node, 'RuleRouter') if (isApp || isRouter) { const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) - routes = (isApp || isRouter) && isInit ? argvalues[1] : argvalues[0] - if (!routes && argvalues[0]) routes = argvalues[0] + routes = (isInit && argvalues[1]) || argvalues[0] } else if (isAdd) { routes = argvalues[1] } From e20920a1707cf70e209d16308b584f07d767ed94 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 28 Jan 2026 02:29:37 -0800 Subject: [PATCH 30/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 247 +++++++++--------- src/checker/taint/python/tornado-util.ts | 11 +- 2 files changed, 135 insertions(+), 123 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index db33943b..16a7493d 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -41,117 +41,78 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) const { fclos, argvalues } = info if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return - let routes = null const isApp = isTornadoCall(node, 'Application') - const isAdd = isTornadoCall(node, 'add_handlers') const isRouter = isTornadoCall(node, 'RuleRouter') - if (isApp || isRouter) { - const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) - routes = (isInit && argvalues[1]) || argvalues[0] - } else if (isAdd) { - routes = argvalues[1] - } - if (routes) { - this.processRoutes(analyzer, scope, state, routes) + const isAdd = isTornadoCall(node, 'add_handlers') + if (isApp || isRouter || isAdd) { + let routes: any = null + if (isApp || isRouter) { + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) + routes = (isInit && argvalues[1]) || argvalues[0] + } else { + routes = argvalues[1] // isAdd case + } + if (routes) { + this.registerRoutesFromValue(analyzer, scope, state, routes) + } } } /** - * + * Register routes from a collection value (List/Dict/Union/Single Symbol) * @param analyzer * @param scope * @param state * @param val + * @param prefix */ - private processRoutes(analyzer: any, scope: any, state: any, val: any) { + private registerRoutesFromValue(analyzer: any, scope: any, state: any, val: any, prefix = '') { if (!val) return - - // 1. Handle Union - Process all elements in the union - if (val.vtype === 'union' && Array.isArray(val.value)) { - // Try to see if this union represents a single route (flattened tuple/list) - const pathVal = val.value.find((v: any) => typeof (v.value || v.ast?.value) === 'string') - const hVal = val.value.find( - (v: any) => v.vtype === 'class' || v.ast?.type === 'ClassDefinition' || v.vtype === 'object' - ) - if (pathVal && hVal) { - const path = pathVal.value || pathVal.ast?.value - this.finishRoute(analyzer, scope, state, hVal, path) + // 1. Handle recording optimization (tornadoRoute) + if (val.tornadoRoute) { + const { path, handler } = val.tornadoRoute + if (path && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) return } - // Otherwise recurse into each element - val.value.forEach((v: any) => this.processRoutes(analyzer, scope, state, v)) - return - } - - // 2. Try to extract from Object/URLSpec/List-like/Rule - let path: string | undefined - let h: any - if ((val.vtype === 'object' || val.vtype === 'symbol') && val.value) { - const isRule = - isTornadoCall(val.ast, 'Rule') || - isTornadoCall(val.ast, 'URLSpec') || - val.sid?.includes('Rule') || - val.sid?.includes('URLSpec') - - if (isRule) { - const pVal = val.value['0'] || val.value.regex || val.value.matcher - h = val.value['1'] || val.value.handler_class || val.value.target || val.value.handler - path = pVal?.value || pVal?.ast?.value - // If matcher is PathMatches(r"...") - if (!path && isTornadoCall(pVal?.ast, 'PathMatches')) { - path = pVal.ast.arguments?.[0]?.value - } - } - } else if (Array.isArray(val.value)) { - path = val.value[0]?.value || val.value[0]?.ast?.value - h = val.value[1] } - if (h) { - // If h is an instance (object), we might need to look for its handlers recursively - if (h.vtype === 'object' && h.value) { - const innerRoutes = h.value.handlers || h.value.rules - if (innerRoutes) { - this.processRoutes(analyzer, scope, state, innerRoutes) + // 2. Handle Union + if (val.vtype === 'union' && Array.isArray(val.value)) { + // Small optimization: if this union contains exactly a string and something else, it might be a flattened tuple + const pathVal = val.value.find( + (v: any) => v.tornadoPath || typeof v.value === 'string' || typeof v.ast?.value === 'string' + ) + const hVal = val.value.find((v: any) => v.vtype === 'class' || v.vtype === 'symbol' || v.vtype === 'object') + if (pathVal && hVal) { + const path = pathVal.tornadoPath || pathVal.value || pathVal.ast?.value + if (typeof path === 'string') { + this.finishRoute(analyzer, scope, state, hVal, prefix + path) + return } } - if (typeof path === 'string') { - this.finishRoute(analyzer, scope, state, h, path) - return - } + val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) + return } - - // 3. Handle nested collections (like lists of routes) - const items = - val.vtype === 'object' && val.value ? (Array.isArray(val.value) ? val.value : Object.values(val.value)) : null - if (items) { - const isLikelyCollection = - Array.isArray(val.value) || - (val.value && typeof val.value === 'object' && Object.keys(val.value).some((k) => /^\d+$/.test(k))) - if (isLikelyCollection) { - items.forEach((item: any) => this.processRoutes(analyzer, scope, state, item)) + // 3. Handle Collections (List/Object with numeric keys) + const isObject = val.vtype === 'object' && val.value + if (isObject) { + const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + if (isCollection) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) return } } - - // 4. Handle Direct Call (like tornado.web.url, URLSpec, Rule) - if (val.ast?.type === 'CallExpression') { - const isUrl = isTornadoCall(val.ast, 'url') - const isRule = isTornadoCall(val.ast, 'Rule') || isTornadoCall(val.ast, 'URLSpec') - if (isUrl || isRule) { - const args = val.ast.arguments - if (args && args.length >= 2) { - let p = args[0].value - if (typeof p !== 'string' && args[0].type === 'CallExpression') { - if (isTornadoCall(args[0], 'PathMatches')) { - p = args[0].arguments?.[0]?.value - } - } - if (typeof p === 'string') { - const hNode = args[1] - const resolvedH = analyzer.processInstruction(scope, hNode, state) - this.finishRoute(analyzer, scope, state, resolvedH || { ast: hNode }, p) - } - } + // 4. Fallback for raw tuple (path, handler) + const isTuple = + (Array.isArray(val.value) && val.value.length >= 2) || + (val.vtype === 'object' && val.value && val.value['0'] && val.value['1']) + if (isTuple) { + const pathArg = val.value['0'] || (Array.isArray(val.value) ? val.value[0] : null) + const handler = val.value['1'] || (Array.isArray(val.value) ? val.value[1] : null) + const path = pathArg?.tornadoPath || pathArg?.value || pathArg?.ast?.value + if (typeof path === 'string' && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) } } } @@ -167,15 +128,26 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { if (!h) return if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] - if (h.vtype !== 'class' && h.ast?.type === 'ClassDefinition') { + // 1. Check for recorded nested routes (Application/Router instances) + const innerRoutes = h.tornadoRoutes || h.value?.tornadoRoutes || h.field?.tornadoRoutes + if (innerRoutes) { + this.registerRoutesFromValue(analyzer, scope, state, innerRoutes, path) + return + } + // 2. Handle Class Definition (Handler classes) + let cls = h + if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') { try { - h = analyzer.processInstruction(scope, h.ast, state) || this.buildClassSymbol(h.ast) + cls = analyzer.processInstruction(scope, cls.ast, state) || this.buildClassSymbol(cls.ast) } catch (e) { - h = this.buildClassSymbol(h.ast) + cls = this.buildClassSymbol(cls.ast) } + } else if (cls.vtype === 'symbol' && cls.cdef) { + // If it's an instance symbol, get its class definition + cls = cls.cdef } - if (path && h) { - this.registerEntryPoints(analyzer, h, path) + if (path && cls && (cls.vtype === 'class' || cls.vtype === 'symbol')) { + this.registerEntryPoints(analyzer, cls, path) } } @@ -187,32 +159,34 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ private registerEntryPoints(analyzer: any, cls: any, path: string) { const methods = ['get', 'post', 'put', 'delete', 'patch'] - const classValue = cls.value || {} - + // Look for methods in cls.value, cls.field, or cls.value.field (Python specificity) + const classValue = cls.value?.field || cls.field || cls.value || {} Object.entries(classValue).forEach(([name, fclos]: [string, any]) => { - if (methods.includes(name) && fclos.vtype === 'fclos') { + if (methods.includes(name)) { const ep = completeEntryPoint(fclos) - ep.urlPattern = path - ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' - analyzer.entryPoints.push(ep) - const info = extractTornadoParams(path) - let paramIdx = 0 - const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] - actualParams.forEach((p: any) => { - const pName = p.id?.name || p.name - if (pName === 'self') return - paramIdx++ - if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { - this.sourceScope.value.push({ - path: pName, - kind: 'PYTHON_INPUT', - scopeFile: 'all', - scopeFunc: 'all', - locStart: 'all', - locEnd: 'all', - }) - } - }) + if (ep) { + ep.urlPattern = path + ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' + analyzer.entryPoints.push(ep) + const info = extractTornadoParams(path) + let paramIdx = 0 + const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] + actualParams.forEach((p: any) => { + const pName = p.id?.name || p.name + if (pName === 'self') return + paramIdx++ + if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { + this.sourceScope.value.push({ + path: pName, + kind: 'PYTHON_INPUT', + scopeFile: 'all', + scopeFunc: 'all', + locStart: 'all', + locEnd: 'all', + }) + } + }) + } } }) } @@ -248,9 +222,42 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) - const { fclos, ret } = info + const { fclos, ret, argvalues } = info if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return const name = node.callee?.property?.name || node.callee?.name + // 1. Record route info for Rule, URLSpec, url (Recording phase) + const isRuleCall = isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec') || name === 'url' + if (isRuleCall && argvalues && argvalues.length >= 2) { + const pArg = argvalues[0] + const path = pArg?.tornadoPath || pArg?.value + const handler = argvalues[1] + ret.tornadoRoute = { path, handler } + } + // 2. Record path for PathMatches + if (isTornadoCall(node, 'PathMatches') && argvalues && argvalues.length >= 1) { + const path = argvalues[0]?.value + if (typeof path === 'string') { + ret.tornadoPath = path + } + } + // 3. Record internal routes for Application/RuleRouter instances + const isInit = ['__init__', '_CTOR_'].includes(name) + if (isInit && argvalues && argvalues.length >= 2) { + const self = argvalues[0] + const routes = argvalues[1] + // Heuristic: if routes looks like a list/tuple of routes + const isRouteList = + routes && (routes.vtype === 'object' || routes.vtype === 'symbol' || Array.isArray(routes.value)) + if (isRouteList && self) { + self.tornadoRoutes = routes + } + } + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + if (!isInit && (isApp || isRouter)) { + // Direct class call returns instance + ret.tornadoRoutes = argvalues[0] + } if (tornadoSourceAPIs.has(name)) { markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) } diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index c8bb4f7f..281d2c58 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -50,16 +50,21 @@ export function isRequestAttributeAccess(node: any): boolean { export function isTornadoCall(node: any, targetName: string): boolean { if (!node || node.type !== 'CallExpression') return false const { callee } = node - if (callee.name === targetName || callee.property?.name === targetName) return true const funcName = callee.property?.name || callee.name + const objectName = callee.object?.name || callee.object?.property?.name + + if (funcName === targetName || objectName === targetName) { + return true + } + if (['__init__', '_CTOR_'].includes(funcName)) { let current = callee.object while (current) { - if (current.name === targetName || current.property?.name === targetName) return true + const currentName = current.name || current.property?.name + if (currentName === targetName) return true current = current.object || current.callee } } - return false } From 8244780e8cbeb180c24f6ac968495cc43547eb64 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 28 Jan 2026 02:51:57 -0800 Subject: [PATCH 31/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 25 ++++++++----------- src/checker/taint/python/tornado-util.ts | 2 -- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 16a7493d..49748b33 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -93,7 +93,17 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) return } - // 3. Handle Collections (List/Object with numeric keys) + // 3. Handle raw tuple (path, handler) + if (val.value && typeof val.value === 'object') { + const pathArg = val.value['0'] + const handler = val.value['1'] + const path = pathArg?.tornadoPath || pathArg?.value || pathArg?.ast?.value + if (typeof path === 'string' && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } + } + // 4. Handle Collections (List/Object with numeric keys) const isObject = val.vtype === 'object' && val.value if (isObject) { const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) @@ -103,18 +113,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { return } } - // 4. Fallback for raw tuple (path, handler) - const isTuple = - (Array.isArray(val.value) && val.value.length >= 2) || - (val.vtype === 'object' && val.value && val.value['0'] && val.value['1']) - if (isTuple) { - const pathArg = val.value['0'] || (Array.isArray(val.value) ? val.value[0] : null) - const handler = val.value['1'] || (Array.isArray(val.value) ? val.value[1] : null) - const path = pathArg?.tornadoPath || pathArg?.value || pathArg?.ast?.value - if (typeof path === 'string' && handler) { - this.finishRoute(analyzer, scope, state, handler, prefix + path) - } - } } /** @@ -255,7 +253,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const isApp = isTornadoCall(node, 'Application') const isRouter = isTornadoCall(node, 'RuleRouter') if (!isInit && (isApp || isRouter)) { - // Direct class call returns instance ret.tornadoRoutes = argvalues[0] } if (tornadoSourceAPIs.has(name)) { diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts index 281d2c58..618a49a3 100644 --- a/src/checker/taint/python/tornado-util.ts +++ b/src/checker/taint/python/tornado-util.ts @@ -52,11 +52,9 @@ export function isTornadoCall(node: any, targetName: string): boolean { const { callee } = node const funcName = callee.property?.name || callee.name const objectName = callee.object?.name || callee.object?.property?.name - if (funcName === targetName || objectName === targetName) { return true } - if (['__init__', '_CTOR_'].includes(funcName)) { let current = callee.object while (current) { From 73ae7efbc90149fe1ea336658ec203e52b1627eb Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 28 Jan 2026 03:48:26 -0800 Subject: [PATCH 32/35] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 49748b33..a486bd4e 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -110,7 +110,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (isCollection) { const items = Array.isArray(val.value) ? val.value : Object.values(val.value) items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) - return } } } @@ -165,7 +164,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (ep) { ep.urlPattern = path ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' - analyzer.entryPoints.push(ep) + const isDuplicate = analyzer.entryPoints.some( + (existing: any) => + existing.urlPattern === ep.urlPattern && + existing.functionName === ep.functionName && + existing.filePath === ep.filePath + ) + if (!isDuplicate) { + analyzer.entryPoints.push(ep) + } const info = extractTornadoParams(path) let paramIdx = 0 const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] From 6f50c2a866930e662118fc5b16b4256faecd4889 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Tue, 3 Feb 2026 00:28:07 -0800 Subject: [PATCH 33/35] Fix: update tornado framework --- .../taint/python/tornado-taint-checker.ts | 221 +++++++++++++----- 1 file changed, 158 insertions(+), 63 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index a486bd4e..6f61230b 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -3,11 +3,25 @@ const Config = require('../../../config') const completeEntryPoint = require('../common-kit/entry-points-util') const { markTaintSource } = require('../common-kit/source-util') const { isTornadoCall, tornadoSourceAPIs, isRequestAttributeAccess, extractTornadoParams } = require('./tornado-util') +const { extractRelativePath } = require('../../../util/file-util') /** * Tornado Taint Checker - Simplified */ class TornadoTaintChecker extends PythonTaintAbstractChecker { + private instanceRoutes = new Map() + + private routeInfoMap = new Map() + + /** + * + * @param node + */ + private getNodeKey(node: any): string | null { + if (!node || !node.loc) return null + return `${node.loc.sourcefile}:${node.loc.start.line}:${node.loc.start.column}` + } + /** * * @param resultManager @@ -40,17 +54,19 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) const { fclos, argvalues } = info - if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return + if (Config.entryPointMode === 'ONLY_CUSTOM' || !argvalues) return const isApp = isTornadoCall(node, 'Application') const isRouter = isTornadoCall(node, 'RuleRouter') const isAdd = isTornadoCall(node, 'add_handlers') + const funcName = node.callee?.property?.name || node.callee?.name if (isApp || isRouter || isAdd) { let routes: any = null if (isApp || isRouter) { - const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) - routes = (isInit && argvalues[1]) || argvalues[0] - } else { - routes = argvalues[1] // isAdd case + const isInit = ['__init__', '_CTOR_'].includes(funcName) + const routesIdx = isInit ? 1 : 0 + routes = argvalues[routesIdx] + } else if (isAdd) { + routes = argvalues[1] } if (routes) { this.registerRoutesFromValue(analyzer, scope, state, routes) @@ -68,52 +84,99 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ private registerRoutesFromValue(analyzer: any, scope: any, state: any, val: any, prefix = '') { if (!val) return - // 1. Handle recording optimization (tornadoRoute) - if (val.tornadoRoute) { - const { path, handler } = val.tornadoRoute - if (path && handler) { - this.finishRoute(analyzer, scope, state, handler, prefix + path) - return + const storedRoute = this.routeInfoMap.get(val) + if (storedRoute) { + this.finishRoute(analyzer, scope, state, storedRoute.handler, prefix + storedRoute.path) + return + } + // 0. Handle Symbols mapping to values + if (val.vtype === 'symbol' && val.value && typeof val.value === 'object') { + // If it's a symbol, its 'value' is where the actual object (Tuple/List) resides + this.registerRoutesFromValue(analyzer, scope, state, val.value, prefix) + return + } + + const ast = val.ast || val.node + if (ast?.type === 'CallExpression') { + const name = ast.callee?.property?.name || ast.callee?.name + if (isTornadoCall(ast, 'Rule') || isTornadoCall(ast, 'URLSpec') || name === 'url') { + const args = val.ast.arguments + if (args && args.length >= 2) { + const pVal = analyzer.processInstruction(scope, args[0], state) + const path = this.getPathFromValue(analyzer, scope, state, pVal) + const hVal = analyzer.processInstruction(scope, args[1], state) + if (path !== null && hVal) { + this.finishRoute(analyzer, scope, state, hVal, prefix + path) + return + } + } } } + // 2. Handle Union if (val.vtype === 'union' && Array.isArray(val.value)) { - // Small optimization: if this union contains exactly a string and something else, it might be a flattened tuple - const pathVal = val.value.find( - (v: any) => v.tornadoPath || typeof v.value === 'string' || typeof v.ast?.value === 'string' - ) - const hVal = val.value.find((v: any) => v.vtype === 'class' || v.vtype === 'symbol' || v.vtype === 'object') - if (pathVal && hVal) { - const path = pathVal.tornadoPath || pathVal.value || pathVal.ast?.value - if (typeof path === 'string') { - this.finishRoute(analyzer, scope, state, hVal, prefix + path) - return + // Check if the union elements themselves form a route (path, handler) + // Sometimes tuples are resolved as unions of their elements in some analyzer versions + const pathArg = val.value['0'] || val.value[0] + const handler = val.value['1'] || val.value[1] + let handledAsRoute = false + if (pathArg && handler) { + const path = this.getPathFromValue(analyzer, scope, state, pathArg) + if (path !== null) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + handledAsRoute = true } } - val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) + if (!handledAsRoute) { + val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) + } return } - // 3. Handle raw tuple (path, handler) + // 3. Handle raw tuple (path, handler) or any object with path/handler at index 0/1 if (val.value && typeof val.value === 'object') { const pathArg = val.value['0'] const handler = val.value['1'] - const path = pathArg?.tornadoPath || pathArg?.value || pathArg?.ast?.value - if (typeof path === 'string' && handler) { - this.finishRoute(analyzer, scope, state, handler, prefix + path) - return + if (pathArg && handler) { + const path = this.getPathFromValue(analyzer, scope, state, pathArg) + if (path !== null) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } } } // 4. Handle Collections (List/Object with numeric keys) - const isObject = val.vtype === 'object' && val.value - if (isObject) { - const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) - if (isCollection) { - const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + const isCollection = val.vtype === 'list' || (val.vtype === 'object' && val.value) + if (isCollection) { + const items = Array.isArray(val.value) ? val.value : typeof val.value === 'object' ? Object.values(val.value) : [] + if (items.length > 0) { items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) } } } + /** + * Extract path string from a symbol value, handling PathMatches + * @param analyzer + * @param scope + * @param state + * @param val + */ + private getPathFromValue(analyzer: any, scope: any, state: any, val: any): string | null { + if (!val) return null + if (typeof val.value === 'string') return val.value + if (typeof val.ast?.value === 'string') return val.ast.value + // Check for PathMatches(pattern) + const ast = val.ast || val.node + if (ast?.type === 'CallExpression' && isTornadoCall(ast, 'PathMatches')) { + const arg = ast.arguments?.[0] + if (arg) { + const argVal = analyzer.processInstruction(scope, arg, state) + return typeof argVal?.value === 'string' ? argVal.value : arg.value || null + } + } + return null + } + /** * * @param analyzer @@ -125,13 +188,20 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { if (!h) return if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] - // 1. Check for recorded nested routes (Application/Router instances) - const innerRoutes = h.tornadoRoutes || h.value?.tornadoRoutes || h.field?.tornadoRoutes + let innerRoutes: any = null + const hAst = h.ast || h.node + if (hAst) { + const key = this.getNodeKey(hAst) + if (key) innerRoutes = this.instanceRoutes.get(key) + } + if (!innerRoutes) { + innerRoutes = this.instanceRoutes.get(h) + } if (innerRoutes) { this.registerRoutesFromValue(analyzer, scope, state, innerRoutes, path) return } - // 2. Handle Class Definition (Handler classes) + // Handle Class Definition (Handler classes) let cls = h if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') { try { @@ -173,6 +243,11 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (!isDuplicate) { analyzer.entryPoints.push(ep) } + const scopeFile = extractRelativePath( + fclos.fdef?.loc?.sourcefile || fclos.ast?.loc?.sourcefile, + Config.maindir + ) + const scopeFunc = fclos.fdef?.id?.name || fclos.ast?.id?.name const info = extractTornadoParams(path) let paramIdx = 0 const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] @@ -184,10 +259,10 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { this.sourceScope.value.push({ path: pName, kind: 'PYTHON_INPUT', - scopeFile: 'all', - scopeFunc: 'all', - locStart: 'all', - locEnd: 'all', + scopeFile: scopeFile || 'all', + scopeFunc: scopeFunc || 'all', + locStart: p.loc?.start?.line || 'all', + locEnd: p.loc?.end?.line || 'all', }) } }) @@ -228,42 +303,62 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) const { fclos, ret, argvalues } = info - if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return const name = node.callee?.property?.name || node.callee?.name - // 1. Record route info for Rule, URLSpec, url (Recording phase) - const isRuleCall = isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec') || name === 'url' - if (isRuleCall && argvalues && argvalues.length >= 2) { - const pArg = argvalues[0] - const path = pArg?.tornadoPath || pArg?.value - const handler = argvalues[1] - ret.tornadoRoute = { path, handler } + const isInit = ['__init__', '_CTOR_'].includes(name) + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + + if (Config.entryPointMode === 'ONLY_CUSTOM') return + if (!isApp && !isRouter && !isInit) { + if (!fclos || !ret) return } - // 2. Record path for PathMatches - if (isTornadoCall(node, 'PathMatches') && argvalues && argvalues.length >= 1) { - const path = argvalues[0]?.value - if (typeof path === 'string') { - ret.tornadoPath = path - } + // 1. Mark Taint Source for APIs + if (tornadoSourceAPIs.has(name)) { + markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) } - // 3. Record internal routes for Application/RuleRouter instances - const isInit = ['__init__', '_CTOR_'].includes(name) + // 2. Track routes for instances (nested routers/apps) if (isInit && argvalues && argvalues.length >= 2) { const self = argvalues[0] const routes = argvalues[1] - // Heuristic: if routes looks like a list/tuple of routes const isRouteList = - routes && (routes.vtype === 'object' || routes.vtype === 'symbol' || Array.isArray(routes.value)) + routes && + (routes.vtype === 'object' || + routes.vtype === 'symbol' || + routes.vtype === 'list' || + Array.isArray(routes.value)) + const selfAst = self?.ast || self?.node if (isRouteList && self) { - self.tornadoRoutes = routes + const instKey = this.getNodeKey(selfAst) + if (instKey) { + this.instanceRoutes.set(instKey, routes) + } + this.instanceRoutes.set(self, routes) + if (self.cdef && self.cdef.ast) { + const classKey = this.getNodeKey(self.cdef.ast) + if (classKey) { + this.instanceRoutes.set(classKey, routes) + } + } } } - const isApp = isTornadoCall(node, 'Application') - const isRouter = isTornadoCall(node, 'RuleRouter') if (!isInit && (isApp || isRouter)) { - ret.tornadoRoutes = argvalues[0] + const key = this.getNodeKey(node) + if (key) { + this.instanceRoutes.set(key, argvalues[0]) + } } - if (tornadoSourceAPIs.has(name)) { - markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) + + // 3. Record route info for Rule/URLSpec + if (isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec')) { + const args = node.arguments + if (args && args.length >= 2) { + const pVal = analyzer.processInstruction(scope, args[0], state) + const path = this.getPathFromValue(analyzer, scope, state, pVal) + const hVal = analyzer.processInstruction(scope, args[1], state) + if (path !== null && hVal && ret) { + this.routeInfoMap.set(ret, { path, handler: hVal }) + } + } } } From d3ac17a48ce85b997031372d6325220fe4464b73 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Tue, 3 Feb 2026 00:40:16 -0800 Subject: [PATCH 34/35] Fix: update tornado framework --- src/checker/taint/python/tornado-taint-checker.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 6f61230b..43560c14 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -97,6 +97,7 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { } const ast = val.ast || val.node + // 1. Handle CallExpressions (Rule/URLSpec/url) if (ast?.type === 'CallExpression') { const name = ast.callee?.property?.name || ast.callee?.name if (isTornadoCall(ast, 'Rule') || isTornadoCall(ast, 'URLSpec') || name === 'url') { From 3ec065be66d87e285cefd9c2da594ba182110816 Mon Sep 17 00:00:00 2001 From: Ris-1kd <2351946411@qq.com> Date: Wed, 4 Feb 2026 01:32:42 -0800 Subject: [PATCH 35/35] Fix: update tornado-framework --- .../taint/python/tornado-taint-checker.ts | 235 ++++++------------ 1 file changed, 72 insertions(+), 163 deletions(-) diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts index 43560c14..c34c055d 100644 --- a/src/checker/taint/python/tornado-taint-checker.ts +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -5,23 +5,15 @@ const { markTaintSource } = require('../common-kit/source-util') const { isTornadoCall, tornadoSourceAPIs, isRequestAttributeAccess, extractTornadoParams } = require('./tornado-util') const { extractRelativePath } = require('../../../util/file-util') +// Metadata storage +const tornadoRoutesMap = new WeakMap() +const tornadoRouteMap = new WeakMap() +const tornadoPathMap = new WeakMap() + /** * Tornado Taint Checker - Simplified */ class TornadoTaintChecker extends PythonTaintAbstractChecker { - private instanceRoutes = new Map() - - private routeInfoMap = new Map() - - /** - * - * @param node - */ - private getNodeKey(node: any): string | null { - if (!node || !node.loc) return null - return `${node.loc.sourcefile}:${node.loc.start.line}:${node.loc.start.column}` - } - /** * * @param resultManager @@ -39,7 +31,6 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { * @param info */ triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { - this.addSourceTagForSourceScope('PYTHON_INPUT', this.sourceScope.value) this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) } @@ -54,19 +45,17 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) const { fclos, argvalues } = info - if (Config.entryPointMode === 'ONLY_CUSTOM' || !argvalues) return + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return const isApp = isTornadoCall(node, 'Application') const isRouter = isTornadoCall(node, 'RuleRouter') const isAdd = isTornadoCall(node, 'add_handlers') - const funcName = node.callee?.property?.name || node.callee?.name if (isApp || isRouter || isAdd) { let routes: any = null if (isApp || isRouter) { - const isInit = ['__init__', '_CTOR_'].includes(funcName) - const routesIdx = isInit ? 1 : 0 - routes = argvalues[routesIdx] - } else if (isAdd) { - routes = argvalues[1] + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) + routes = (isInit && argvalues[1]) || argvalues[0] + } else { + routes = argvalues[1] // isAdd case } if (routes) { this.registerRoutesFromValue(analyzer, scope, state, routes) @@ -84,100 +73,52 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { */ private registerRoutesFromValue(analyzer: any, scope: any, state: any, val: any, prefix = '') { if (!val) return - const storedRoute = this.routeInfoMap.get(val) - if (storedRoute) { - this.finishRoute(analyzer, scope, state, storedRoute.handler, prefix + storedRoute.path) - return - } - // 0. Handle Symbols mapping to values - if (val.vtype === 'symbol' && val.value && typeof val.value === 'object') { - // If it's a symbol, its 'value' is where the actual object (Tuple/List) resides - this.registerRoutesFromValue(analyzer, scope, state, val.value, prefix) - return - } - - const ast = val.ast || val.node - // 1. Handle CallExpressions (Rule/URLSpec/url) - if (ast?.type === 'CallExpression') { - const name = ast.callee?.property?.name || ast.callee?.name - if (isTornadoCall(ast, 'Rule') || isTornadoCall(ast, 'URLSpec') || name === 'url') { - const args = val.ast.arguments - if (args && args.length >= 2) { - const pVal = analyzer.processInstruction(scope, args[0], state) - const path = this.getPathFromValue(analyzer, scope, state, pVal) - const hVal = analyzer.processInstruction(scope, args[1], state) - if (path !== null && hVal) { - this.finishRoute(analyzer, scope, state, hVal, prefix + path) - return - } - } + // 1. Handle recording optimization (tornadoRoute) + if (tornadoRouteMap.has(val)) { + const { path, handler } = tornadoRouteMap.get(val)! + if (path && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return } } - // 2. Handle Union if (val.vtype === 'union' && Array.isArray(val.value)) { - // Check if the union elements themselves form a route (path, handler) - // Sometimes tuples are resolved as unions of their elements in some analyzer versions - const pathArg = val.value['0'] || val.value[0] - const handler = val.value['1'] || val.value[1] - let handledAsRoute = false - if (pathArg && handler) { - const path = this.getPathFromValue(analyzer, scope, state, pathArg) - if (path !== null) { - this.finishRoute(analyzer, scope, state, handler, prefix + path) - handledAsRoute = true + // Small optimization: if this union contains exactly a string and something else, it might be a flattened tuple + const pathVal = val.value.find( + (v: any) => tornadoPathMap.has(v) || typeof v.value === 'string' || typeof v.ast?.value === 'string' + ) + const hVal = val.value.find((v: any) => v.vtype === 'class' || v.vtype === 'symbol' || v.vtype === 'object') + if (pathVal && hVal) { + const path = tornadoPathMap.get(pathVal) || pathVal.value || pathVal.ast?.value + if (typeof path === 'string') { + this.finishRoute(analyzer, scope, state, hVal, prefix + path) + return } } - if (!handledAsRoute) { - val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) - } + val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) return } - // 3. Handle raw tuple (path, handler) or any object with path/handler at index 0/1 + // 3. Handle raw tuple (path, handler) if (val.value && typeof val.value === 'object') { const pathArg = val.value['0'] const handler = val.value['1'] - if (pathArg && handler) { - const path = this.getPathFromValue(analyzer, scope, state, pathArg) - if (path !== null) { - this.finishRoute(analyzer, scope, state, handler, prefix + path) - return - } + const path = (pathArg && tornadoPathMap.get(pathArg)) || pathArg?.value || pathArg?.ast?.value + if (typeof path === 'string' && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return } } // 4. Handle Collections (List/Object with numeric keys) - const isCollection = val.vtype === 'list' || (val.vtype === 'object' && val.value) - if (isCollection) { - const items = Array.isArray(val.value) ? val.value : typeof val.value === 'object' ? Object.values(val.value) : [] - if (items.length > 0) { + const isObject = val.vtype === 'object' && val.value + if (isObject) { + const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + if (isCollection) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value) items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) } } } - /** - * Extract path string from a symbol value, handling PathMatches - * @param analyzer - * @param scope - * @param state - * @param val - */ - private getPathFromValue(analyzer: any, scope: any, state: any, val: any): string | null { - if (!val) return null - if (typeof val.value === 'string') return val.value - if (typeof val.ast?.value === 'string') return val.ast.value - // Check for PathMatches(pattern) - const ast = val.ast || val.node - if (ast?.type === 'CallExpression' && isTornadoCall(ast, 'PathMatches')) { - const arg = ast.arguments?.[0] - if (arg) { - const argVal = analyzer.processInstruction(scope, arg, state) - return typeof argVal?.value === 'string' ? argVal.value : arg.value || null - } - } - return null - } - /** * * @param analyzer @@ -189,20 +130,13 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { if (!h) return if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] - let innerRoutes: any = null - const hAst = h.ast || h.node - if (hAst) { - const key = this.getNodeKey(hAst) - if (key) innerRoutes = this.instanceRoutes.get(key) - } - if (!innerRoutes) { - innerRoutes = this.instanceRoutes.get(h) - } + // 1. Check for recorded nested routes (Application/Router instances) + const innerRoutes = tornadoRoutesMap.get(h) || (h.value && tornadoRoutesMap.get(h.value)) if (innerRoutes) { this.registerRoutesFromValue(analyzer, scope, state, innerRoutes, path) return } - // Handle Class Definition (Handler classes) + // 2. Handle Class Definition (Handler classes) let cls = h if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') { try { @@ -233,22 +167,16 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { if (methods.includes(name)) { const ep = completeEntryPoint(fclos) if (ep) { - ep.urlPattern = path - ep.handlerName = cls.ast?.id?.name || cls.sid || 'Unknown' + ep.funcReceiverType = cls.ast?.id?.name || cls.sid || 'Unknown' const isDuplicate = analyzer.entryPoints.some( (existing: any) => - existing.urlPattern === ep.urlPattern && existing.functionName === ep.functionName && - existing.filePath === ep.filePath + existing.filePath === ep.filePath && + existing.funcReceiverType === ep.funcReceiverType ) if (!isDuplicate) { analyzer.entryPoints.push(ep) } - const scopeFile = extractRelativePath( - fclos.fdef?.loc?.sourcefile || fclos.ast?.loc?.sourcefile, - Config.maindir - ) - const scopeFunc = fclos.fdef?.id?.name || fclos.ast?.id?.name const info = extractTornadoParams(path) let paramIdx = 0 const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] @@ -256,14 +184,15 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { const pName = p.id?.name || p.name if (pName === 'self') return paramIdx++ + // Add source scope for parameters based on URL pattern if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { this.sourceScope.value.push({ path: pName, kind: 'PYTHON_INPUT', - scopeFile: scopeFile || 'all', - scopeFunc: scopeFunc || 'all', - locStart: p.loc?.start?.line || 'all', - locEnd: p.loc?.end?.line || 'all', + scopeFile: extractRelativePath(fclos?.ast?.loc?.sourcefile || ep.filePath, Config.maindir), + scopeFunc: ep.functionName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, }) } }) @@ -304,62 +233,42 @@ class TornadoTaintChecker extends PythonTaintAbstractChecker { triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) const { fclos, ret, argvalues } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return const name = node.callee?.property?.name || node.callee?.name - const isInit = ['__init__', '_CTOR_'].includes(name) - const isApp = isTornadoCall(node, 'Application') - const isRouter = isTornadoCall(node, 'RuleRouter') - - if (Config.entryPointMode === 'ONLY_CUSTOM') return - if (!isApp && !isRouter && !isInit) { - if (!fclos || !ret) return + // 1. Record route info for Rule, URLSpec, url (Recording phase) + const isRuleCall = isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec') || name === 'url' + if (isRuleCall && argvalues && argvalues.length >= 2) { + const pArg = argvalues[0] + const path = (pArg && tornadoPathMap.get(pArg)) || pArg?.value + const handler = argvalues[1] + tornadoRouteMap.set(ret, { path, handler }) } - // 1. Mark Taint Source for APIs - if (tornadoSourceAPIs.has(name)) { - markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) + // 2. Record path for PathMatches + if (isTornadoCall(node, 'PathMatches') && argvalues && argvalues.length >= 1) { + const path = argvalues[0]?.value + if (typeof path === 'string') { + tornadoPathMap.set(ret, path) + } } - // 2. Track routes for instances (nested routers/apps) + // 3. Record internal routes for Application/RuleRouter instances + const isInit = ['__init__', '_CTOR_'].includes(name) if (isInit && argvalues && argvalues.length >= 2) { const self = argvalues[0] const routes = argvalues[1] + // Heuristic: if routes looks like a list/tuple of routes const isRouteList = - routes && - (routes.vtype === 'object' || - routes.vtype === 'symbol' || - routes.vtype === 'list' || - Array.isArray(routes.value)) - const selfAst = self?.ast || self?.node + routes && (routes.vtype === 'object' || routes.vtype === 'symbol' || Array.isArray(routes.value)) if (isRouteList && self) { - const instKey = this.getNodeKey(selfAst) - if (instKey) { - this.instanceRoutes.set(instKey, routes) - } - this.instanceRoutes.set(self, routes) - if (self.cdef && self.cdef.ast) { - const classKey = this.getNodeKey(self.cdef.ast) - if (classKey) { - this.instanceRoutes.set(classKey, routes) - } - } + tornadoRoutesMap.set(self, routes) } } + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') if (!isInit && (isApp || isRouter)) { - const key = this.getNodeKey(node) - if (key) { - this.instanceRoutes.set(key, argvalues[0]) - } + tornadoRoutesMap.set(ret, argvalues[0]) } - - // 3. Record route info for Rule/URLSpec - if (isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec')) { - const args = node.arguments - if (args && args.length >= 2) { - const pVal = analyzer.processInstruction(scope, args[0], state) - const path = this.getPathFromValue(analyzer, scope, state, pVal) - const hVal = analyzer.processInstruction(scope, args[1], state) - if (path !== null && hVal && ret) { - this.routeInfoMap.set(ret, { path, handler: hVal }) - } - } + if (tornadoSourceAPIs.has(name)) { + markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) } }