diff --git a/resource/checker/checker-config.json b/resource/checker/checker-config.json index 5e28f533..fad64657 100644 --- a/resource/checker/checker-config.json +++ b/resource/checker/checker-config.json @@ -135,6 +135,11 @@ "checkerPath": "checker/taint/python/django-taint-checker.ts", "description": "python Django框架 entrypoint采集以及框架source添加" }, + { + "checkerId": "taint_flow_python_tornado_input", + "checkerPath": "checker/taint/python/tornado-taint-checker.ts", + "description": "python Tornado框架 entrypoint采集以及框架source添加" + }, { "checkerId": "taint_flow_test", "checkerPath": "checker/taint/test-taint-checker.ts", diff --git a/resource/checker/checker-pack-config.json b/resource/checker/checker-pack-config.json index 2380e197..580fdd35 100644 --- a/resource/checker/checker-pack-config.json +++ b/resource/checker/checker-pack-config.json @@ -86,6 +86,7 @@ "checkerIds": [ "taint_flow_python_input", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], @@ -96,6 +97,7 @@ "checkerIds": [ "taint_flow_python_input_inner", "taint_flow_python_django_input", + "taint_flow_python_tornado_input", "callgraph", "sanitizer" ], diff --git a/resource/example-rule-config/rule_config_python.json b/resource/example-rule-config/rule_config_python.json index 93de362d..1b866057 100644 --- a/resource/example-rule-config/rule_config_python.json +++ b/resource/example-rule-config/rule_config_python.json @@ -1,6 +1,11 @@ [ { - "checkerIds": ["taint_flow_python_input", "taint_flow_python_input_inner", "taint_flow_python_django_input"], + "checkerIds": [ + "taint_flow_python_input", + "taint_flow_python_input_inner", + "taint_flow_python_django_input", + "taint_flow_python_tornado_input" + ], "sources": { "FuncCallReturnValueTaintSource": [ { diff --git a/src/checker/taint/python/tornado-taint-checker.ts b/src/checker/taint/python/tornado-taint-checker.ts new file mode 100644 index 00000000..c34c055d --- /dev/null +++ b/src/checker/taint/python/tornado-taint-checker.ts @@ -0,0 +1,290 @@ +const PythonTaintAbstractChecker = require('./python-taint-abstract-checker') +const Config = require('../../../config') +const completeEntryPoint = require('../common-kit/entry-points-util') +const { markTaintSource } = require('../common-kit/source-util') +const { isTornadoCall, tornadoSourceAPIs, isRequestAttributeAccess, extractTornadoParams } = require('./tornado-util') +const { extractRelativePath } = require('../../../util/file-util') + +// Metadata storage +const tornadoRoutesMap = new WeakMap() +const tornadoRouteMap = new WeakMap() +const tornadoPathMap = new WeakMap() + +/** + * Tornado Taint Checker - Simplified + */ +class TornadoTaintChecker extends PythonTaintAbstractChecker { + /** + * + * @param resultManager + */ + constructor(resultManager: any) { + super(resultManager, 'taint_flow_python_tornado_input') + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtStartOfAnalyze(analyzer: any, scope: any, node: any, state: any, info: any): void { + this.addSourceTagForcheckerRuleConfigContent('PYTHON_INPUT', this.checkerRuleConfigContent) + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallBefore(analyzer: any, scope: any, node: any, state: any, info: any): void { + super.triggerAtFunctionCallBefore(analyzer, scope, node, state, info) + const { fclos, argvalues } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !argvalues) return + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + const isAdd = isTornadoCall(node, 'add_handlers') + if (isApp || isRouter || isAdd) { + let routes: any = null + if (isApp || isRouter) { + const isInit = ['__init__', '_CTOR_'].includes(node.callee?.property?.name || node.callee?.name) + routes = (isInit && argvalues[1]) || argvalues[0] + } else { + routes = argvalues[1] // isAdd case + } + if (routes) { + this.registerRoutesFromValue(analyzer, scope, state, routes) + } + } + } + + /** + * Register routes from a collection value (List/Dict/Union/Single Symbol) + * @param analyzer + * @param scope + * @param state + * @param val + * @param prefix + */ + private registerRoutesFromValue(analyzer: any, scope: any, state: any, val: any, prefix = '') { + if (!val) return + // 1. Handle recording optimization (tornadoRoute) + if (tornadoRouteMap.has(val)) { + const { path, handler } = tornadoRouteMap.get(val)! + if (path && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } + } + // 2. Handle Union + if (val.vtype === 'union' && Array.isArray(val.value)) { + // Small optimization: if this union contains exactly a string and something else, it might be a flattened tuple + const pathVal = val.value.find( + (v: any) => tornadoPathMap.has(v) || typeof v.value === 'string' || typeof v.ast?.value === 'string' + ) + const hVal = val.value.find((v: any) => v.vtype === 'class' || v.vtype === 'symbol' || v.vtype === 'object') + if (pathVal && hVal) { + const path = tornadoPathMap.get(pathVal) || pathVal.value || pathVal.ast?.value + if (typeof path === 'string') { + this.finishRoute(analyzer, scope, state, hVal, prefix + path) + return + } + } + val.value.forEach((v: any) => this.registerRoutesFromValue(analyzer, scope, state, v, prefix)) + return + } + // 3. Handle raw tuple (path, handler) + if (val.value && typeof val.value === 'object') { + const pathArg = val.value['0'] + const handler = val.value['1'] + const path = (pathArg && tornadoPathMap.get(pathArg)) || pathArg?.value || pathArg?.ast?.value + if (typeof path === 'string' && handler) { + this.finishRoute(analyzer, scope, state, handler, prefix + path) + return + } + } + // 4. Handle Collections (List/Object with numeric keys) + const isObject = val.vtype === 'object' && val.value + if (isObject) { + const isCollection = Array.isArray(val.value) || Object.keys(val.value).some((k) => /^\d+$/.test(k)) + if (isCollection) { + const items = Array.isArray(val.value) ? val.value : Object.values(val.value) + items.forEach((item: any) => this.registerRoutesFromValue(analyzer, scope, state, item, prefix)) + } + } + } + + /** + * + * @param analyzer + * @param scope + * @param state + * @param h + * @param path + */ + private finishRoute(analyzer: any, scope: any, state: any, h: any, path: string) { + if (!h) return + if (h.vtype === 'union' && Array.isArray(h.value)) h = h.value[0] + // 1. Check for recorded nested routes (Application/Router instances) + const innerRoutes = tornadoRoutesMap.get(h) || (h.value && tornadoRoutesMap.get(h.value)) + if (innerRoutes) { + this.registerRoutesFromValue(analyzer, scope, state, innerRoutes, path) + return + } + // 2. Handle Class Definition (Handler classes) + let cls = h + if (cls.vtype !== 'class' && cls.ast?.type === 'ClassDefinition') { + try { + cls = analyzer.processInstruction(scope, cls.ast, state) || this.buildClassSymbol(cls.ast) + } catch (e) { + cls = this.buildClassSymbol(cls.ast) + } + } else if (cls.vtype === 'symbol' && cls.cdef) { + // If it's an instance symbol, get its class definition + cls = cls.cdef + } + if (path && cls && (cls.vtype === 'class' || cls.vtype === 'symbol')) { + this.registerEntryPoints(analyzer, cls, path) + } + } + + /** + * + * @param analyzer + * @param cls + * @param path + */ + private registerEntryPoints(analyzer: any, cls: any, path: string) { + const methods = ['get', 'post', 'put', 'delete', 'patch'] + // Look for methods in cls.value, cls.field, or cls.value.field (Python specificity) + const classValue = cls.value?.field || cls.field || cls.value || {} + Object.entries(classValue).forEach(([name, fclos]: [string, any]) => { + if (methods.includes(name)) { + const ep = completeEntryPoint(fclos) + if (ep) { + ep.funcReceiverType = cls.ast?.id?.name || cls.sid || 'Unknown' + const isDuplicate = analyzer.entryPoints.some( + (existing: any) => + existing.functionName === ep.functionName && + existing.filePath === ep.filePath && + existing.funcReceiverType === ep.funcReceiverType + ) + if (!isDuplicate) { + analyzer.entryPoints.push(ep) + } + const info = extractTornadoParams(path) + let paramIdx = 0 + const actualParams = (fclos.fdef?.parameters || fclos.ast?.parameters || []) as any[] + actualParams.forEach((p: any) => { + const pName = p.id?.name || p.name + if (pName === 'self') return + paramIdx++ + // Add source scope for parameters based on URL pattern + if (info.named.includes(pName) || (info.named.length === 0 && paramIdx <= info.positionalCount)) { + this.sourceScope.value.push({ + path: pName, + kind: 'PYTHON_INPUT', + scopeFile: extractRelativePath(fclos?.ast?.loc?.sourcefile || ep.filePath, Config.maindir), + scopeFunc: ep.functionName, + locStart: p.loc?.start?.line, + locEnd: p.loc?.end?.line, + }) + } + }) + } + } + }) + } + + /** + * + * @param node + */ + private buildClassSymbol(node: any) { + const value: any = {} + node.body?.forEach((m: any) => { + if (m.type === 'FunctionDefinition') { + const name = m.id?.name || m.name?.name + if (name) { + value[name] = { + vtype: 'fclos', + fdef: m, + ast: m, + } + } + } + }) + return { vtype: 'class', value, ast: node } + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtFunctionCallAfter(analyzer: any, scope: any, node: any, state: any, info: any): void { + super.triggerAtFunctionCallAfter(analyzer, scope, node, state, info) + const { fclos, ret, argvalues } = info + if (Config.entryPointMode === 'ONLY_CUSTOM' || !fclos || !ret) return + const name = node.callee?.property?.name || node.callee?.name + // 1. Record route info for Rule, URLSpec, url (Recording phase) + const isRuleCall = isTornadoCall(node, 'Rule') || isTornadoCall(node, 'URLSpec') || name === 'url' + if (isRuleCall && argvalues && argvalues.length >= 2) { + const pArg = argvalues[0] + const path = (pArg && tornadoPathMap.get(pArg)) || pArg?.value + const handler = argvalues[1] + tornadoRouteMap.set(ret, { path, handler }) + } + // 2. Record path for PathMatches + if (isTornadoCall(node, 'PathMatches') && argvalues && argvalues.length >= 1) { + const path = argvalues[0]?.value + if (typeof path === 'string') { + tornadoPathMap.set(ret, path) + } + } + // 3. Record internal routes for Application/RuleRouter instances + const isInit = ['__init__', '_CTOR_'].includes(name) + if (isInit && argvalues && argvalues.length >= 2) { + const self = argvalues[0] + const routes = argvalues[1] + // Heuristic: if routes looks like a list/tuple of routes + const isRouteList = + routes && (routes.vtype === 'object' || routes.vtype === 'symbol' || Array.isArray(routes.value)) + if (isRouteList && self) { + tornadoRoutesMap.set(self, routes) + } + } + const isApp = isTornadoCall(node, 'Application') + const isRouter = isTornadoCall(node, 'RuleRouter') + if (!isInit && (isApp || isRouter)) { + tornadoRoutesMap.set(ret, argvalues[0]) + } + if (tornadoSourceAPIs.has(name)) { + markTaintSource(ret, { path: node, kind: 'PYTHON_INPUT' }) + } + } + + /** + * + * @param analyzer + * @param scope + * @param node + * @param state + * @param info + */ + triggerAtMemberAccess(analyzer: any, scope: any, node: any, state: any, info: any): void { + if (Config.entryPointMode !== 'ONLY_CUSTOM' && isRequestAttributeAccess(node)) { + markTaintSource(info.res, { path: node, kind: 'PYTHON_INPUT' }) + } + } +} + +export = TornadoTaintChecker diff --git a/src/checker/taint/python/tornado-util.ts b/src/checker/taint/python/tornado-util.ts new file mode 100644 index 00000000..618a49a3 --- /dev/null +++ b/src/checker/taint/python/tornado-util.ts @@ -0,0 +1,80 @@ +/** + * Tornado Source APIs + */ +export const tornadoSourceAPIs = new Set([ + 'get_argument', + 'get_query_argument', + 'get_body_argument', + 'get_query_arguments', + 'get_body_arguments', + 'get_cookie', + 'get_secure_cookie', + 'get_arguments', + 'get_json_body', +]) + +/** + * Detect if node is an access to a Tornado request attribute + * @param node + */ +export function isRequestAttributeAccess(node: any): boolean { + if (node?.type !== 'MemberAccess') return false + const inner = node.object + return ( + inner?.type === 'MemberAccess' && + inner.object?.type === 'Identifier' && + inner.object?.name === 'self' && + inner.property?.name === 'request' && + [ + 'body', + 'query', + 'headers', + 'cookies', + 'files', + 'uri', + 'path', + 'arguments', + 'remote_ip', + 'host', + 'query_arguments', + 'body_arguments', + ].includes(node.property?.name) + ) +} + +/** + * Check if node is a Tornado Application call + * @param node + * @param targetName + */ +export function isTornadoCall(node: any, targetName: string): boolean { + if (!node || node.type !== 'CallExpression') return false + const { callee } = node + const funcName = callee.property?.name || callee.name + const objectName = callee.object?.name || callee.object?.property?.name + if (funcName === targetName || objectName === targetName) { + return true + } + if (['__init__', '_CTOR_'].includes(funcName)) { + let current = callee.object + while (current) { + const currentName = current.name || current.property?.name + if (currentName === targetName) return true + current = current.object || current.callee + } + } + return false +} + +/** + * Extract parameter info from URL regex patterns + * @param pattern + */ +export function extractTornadoParams(pattern: string): { named: string[]; positionalCount: number } { + if (!pattern) return { named: [], positionalCount: 0 } + const named = Array.from(pattern.matchAll(/\(\?P<(\w+)>/g)).map((m) => m[1]) + if (named.length > 0) return { named, positionalCount: 0 } + const cleaned = pattern.replace(/\\\(|\\\)/g, '') + const positionalCount = (cleaned.match(/\((?!\?)/g) || []).length + return { named: [], positionalCount } +} diff --git a/src/engine/analyzer/python/common/python-analyzer.ts b/src/engine/analyzer/python/common/python-analyzer.ts index 427ad6df..47833e07 100644 --- a/src/engine/analyzer/python/common/python-analyzer.ts +++ b/src/engine/analyzer/python/common/python-analyzer.ts @@ -561,7 +561,11 @@ class PythonAnalyzer extends (Analyzer as any) { resolved_prop.name = '_CTOR_' } if (!resolved_prop) return defscope - return this.getMemberValue(defscope, resolved_prop, state) + const res = this.getMemberValue(defscope, resolved_prop, state) + if (this.checkerManager && (this.checkerManager as any).checkAtMemberAccess) { + this.checkerManager.checkAtMemberAccess(this, defscope, node, state, { res }) + } + return res } /**