From 6793bc6c6b8f021830c3bd94ec36ecf7363ff0eb Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 14 Jul 2023 14:26:21 +0200 Subject: [PATCH 1/5] Java: Exclude qualifier argument for existing models Excludes candadites for `Argument[this]` where we already have a model that covers a different argument of the containing call. --- .../Telemetry/AutomodelApplicationModeCharacteristics.qll | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index 51e786eebdcd..0a86c00c2eff 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -329,9 +329,10 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics override predicate appliesToEndpoint(Endpoint e) { not ApplicationCandidatesImpl::isSink(e, _) and - exists(DataFlow::Node otherSink | + exists(DataFlow::Node otherSink, Call c | ApplicationCandidatesImpl::isSink(otherSink, _) and - e.asExpr() = otherSink.asExpr().(Argument).getCall().getAnArgument() and + c = otherSink.asExpr().(Argument).getCall() and + e.asExpr() in [c.getQualifier(), c.getAnArgument()] and e != otherSink ) } From 6b425f1395a6b4347e495b63679ab4c21c3e4be3 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 14 Jul 2023 14:45:22 +0200 Subject: [PATCH 2/5] Java: Revert definition of `isNeutral` Reverts the change made in https://github.com/github/codeql/pull/13480/commits/daf274314331318ecffd16e1bf7de399fe06cadc With the change in the aforementioned commit, we were extracting candidates for endpoints that had a neutral _summary_ model. These are bad candidates, as they have already been triaged. --- .../src/Telemetry/AutomodelApplicationModeCharacteristics.qll | 2 +- java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index 0a86c00c2eff..48398585d456 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -79,7 +79,7 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig predicate isNeutral(Endpoint e) { exists(string package, string type, string name, string signature | sinkSpec(e, package, type, name, signature, _, _) and - ExternalFlow::neutralModel(package, type, name, [signature, ""], "sink", _) + ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _) ) } diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll index bc5c3b59a91a..17ab633307a1 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll @@ -60,7 +60,7 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { predicate isNeutral(Endpoint e) { exists(string package, string type, string name, string signature | sinkSpec(e, package, type, name, signature, _, _) and - ExternalFlow::neutralModel(package, type, name, [signature, ""], "sink", _) + ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _) ) } From 79da723878ea294be7362370f5e14b2f7557e153 Mon Sep 17 00:00:00 2001 From: Stephan Brandauer Date: Fri, 21 Jul 2023 10:43:07 +0200 Subject: [PATCH 3/5] Java: only assume that _manual_ MaD sinks have been fully modeled --- ...utomodelApplicationModeCharacteristics.qll | 19 ++++++++++--------- ...tomodelApplicationModeExtractCandidates.ql | 2 +- .../AutomodelFrameworkModeCharacteristics.qll | 8 ++++---- ...AutomodelFrameworkModeExtractCandidates.ql | 2 +- .../AutomodelSharedCharacteristics.qll | 10 ++++++---- 5 files changed, 22 insertions(+), 19 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index 48398585d456..7e8bc95741ae 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -67,13 +67,13 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - predicate isSink(Endpoint e, string kind) { + predicate isSink(Endpoint e, string kind, string provenance) { exists(string package, string type, string name, string signature, string ext, string input | sinkSpec(e, package, type, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _) + ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, provenance) ) or - isCustomSink(e, kind) + isCustomSink(e, kind) and provenance = "custom-sink" } predicate isNeutral(Endpoint e) { @@ -200,7 +200,7 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASin UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and + not ApplicationCandidatesImpl::isSink(e, _, _) and ApplicationModeGetCallable::getCallable(e).getName().matches("is%") and ApplicationModeGetCallable::getCallable(e).getReturnType() instanceof BooleanType } @@ -218,7 +218,7 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Not UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and + not ApplicationCandidatesImpl::isSink(e, _, _) and exists(Callable callable | callable = ApplicationModeGetCallable::getCallable(e) and callable.getName().toLowerCase() = ["exists", "notexists"] and @@ -313,7 +313,8 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter /** * A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already - * been modeled. + * been modeled _manually_. This is restricted to manual sinks only, because only during the manual process do we have + * the expectation that all sinks present in a method have been considered. * * WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed * when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure @@ -324,13 +325,13 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic { OtherArgumentToModeledMethodCharacteristic() { - this = "other argument to a method that has already been modeled" + this = "other argument to a method that has already been modeled manually" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and + not ApplicationCandidatesImpl::isSink(e, _, _) and exists(DataFlow::Node otherSink, Call c | - ApplicationCandidatesImpl::isSink(otherSink, _) and + ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and c = otherSink.asExpr().(Argument).getCall() and e.asExpr() in [c.getQualifier(), c.getAnArgument()] and e != otherSink diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql index 4e40fadd7509..4940b4a741ff 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql @@ -64,7 +64,7 @@ where // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not CharacteristicsImpl::isSink(endpoint, _) and + not CharacteristicsImpl::isSink(endpoint, _, _) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be // a non-sink, and we surface only endpoints that have at least one such sink type. diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll index 17ab633307a1..3903ede7ebc5 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll @@ -50,10 +50,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - predicate isSink(Endpoint e, string kind) { + predicate isSink(Endpoint e, string kind, string provenance) { exists(string package, string type, string name, string signature, string ext, string input | sinkSpec(e, package, type, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _) + ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, provenance) ) } @@ -154,7 +154,7 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASin UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _) and + not FrameworkCandidatesImpl::isSink(e, _, _) and FrameworkModeGetCallable::getCallable(e).getName().matches("is%") and FrameworkModeGetCallable::getCallable(e).getReturnType() instanceof BooleanType } @@ -172,7 +172,7 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Not UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _) and + not FrameworkCandidatesImpl::isSink(e, _, _) and exists(Callable callable | callable = FrameworkModeGetCallable::getCallable(e) and callable.getName().toLowerCase() = ["exists", "notexists"] and diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql index 4186d1c17b9b..e66af08707c8 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql @@ -28,7 +28,7 @@ where // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not CharacteristicsImpl::isSink(endpoint, _) and + not CharacteristicsImpl::isSink(endpoint, _, _) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, parameterName) and // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be // a non-sink, and we surface only endpoints that have at least one such sink type. diff --git a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll index b077f77deb94..b9d631e9deb8 100644 --- a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll @@ -58,9 +58,9 @@ signature module CandidateSig { predicate isSanitizer(Endpoint e, EndpointType t); /** - * Holds if `e` is a sink with the label `kind`. + * Holds if `e` is a sink with the label `kind`, and provenance `provenance`. */ - predicate isSink(Endpoint e, string kind); + predicate isSink(Endpoint e, string kind, string provenance); /** * Holds if `e` is not a sink of any kind. @@ -87,7 +87,7 @@ signature module CandidateSig { * implementations of endpoint characteristics exported by this module. */ module SharedCharacteristics { - predicate isSink = Candidate::isSink/2; + predicate isSink = Candidate::isSink/3; predicate isNeutral = Candidate::isNeutral/1; @@ -282,7 +282,9 @@ module SharedCharacteristics { this = madKind + "-characteristic" } - override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSink(e, madKind) } + override predicate appliesToEndpoint(Candidate::Endpoint e) { + Candidate::isSink(e, madKind, _) + } override Candidate::EndpointType getSinkType() { result = endpointType } } From 2f2f507a5da5b460c426810f076ef9ffe1dc8d6a Mon Sep 17 00:00:00 2001 From: Stephan Brandauer Date: Mon, 24 Jul 2023 13:55:53 +0200 Subject: [PATCH 4/5] Java: drive-by change: remove obsolete custom queries from application mode characteristics --- .../Telemetry/AutomodelApplicationModeCharacteristics.qll | 5 ----- 1 file changed, 5 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index 7e8bc95741ae..da281a7ff87a 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -12,7 +12,6 @@ private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummary private import semmle.code.java.security.ExternalAPIs as ExternalAPIs private import semmle.code.java.Expr as Expr private import semmle.code.java.security.QueryInjection -private import semmle.code.java.security.RequestForgery private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions private import AutomodelJavaUtil as AutomodelJavaUtil private import semmle.code.java.security.PathSanitizer as PathSanitizer @@ -136,10 +135,6 @@ private module ApplicationModeGetCallable implements AutomodelSharedGetCallable: * should be empty. */ private predicate isCustomSink(Endpoint e, string kind) { - e.asExpr() instanceof ArgumentToExec and kind = "command injection" - or - e instanceof RequestForgerySink and kind = "request forgery" - or e instanceof QueryInjectionSink and kind = "sql" } From 13027a1094ba9cf28a08eca514d7161ad8b6111d Mon Sep 17 00:00:00 2001 From: Stephan Brandauer Date: Mon, 24 Jul 2023 14:09:10 +0200 Subject: [PATCH 5/5] Java: review suggestions from @atorralba --- .../AutomodelApplicationModeCharacteristics.qll | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index da281a7ff87a..57bc413438b4 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -25,7 +25,17 @@ newtype JavaRelatedLocationType = CallContext() * A class representing nodes that are arguments to calls. */ private class ArgumentNode extends DataFlow::Node { - ArgumentNode() { this.asExpr() = [any(Call c).getAnArgument(), any(Call c).getQualifier()] } + Call c; + + ArgumentNode() { + exists(Argument arg | this.asExpr() = arg and not arg.isVararg() and c = arg.getCall()) + or + this.(DataFlow::ImplicitVarargsArray).getCall() = c + or + this = DataFlow::getInstanceArgument(c) + } + + Call getCall() { result = c } } /** @@ -325,10 +335,9 @@ private class OtherArgumentToModeledMethodCharacteristic extends Characteristics override predicate appliesToEndpoint(Endpoint e) { not ApplicationCandidatesImpl::isSink(e, _, _) and - exists(DataFlow::Node otherSink, Call c | + exists(Endpoint otherSink | ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and - c = otherSink.asExpr().(Argument).getCall() and - e.asExpr() in [c.getQualifier(), c.getAnArgument()] and + e.getCall() = otherSink.getCall() and e != otherSink ) }