diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll index 51e786eebdcd..57bc413438b4 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll @@ -12,7 +12,6 @@ private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummary private import semmle.code.java.security.ExternalAPIs as ExternalAPIs private import semmle.code.java.Expr as Expr private import semmle.code.java.security.QueryInjection -private import semmle.code.java.security.RequestForgery private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions private import AutomodelJavaUtil as AutomodelJavaUtil private import semmle.code.java.security.PathSanitizer as PathSanitizer @@ -26,7 +25,17 @@ newtype JavaRelatedLocationType = CallContext() * A class representing nodes that are arguments to calls. */ private class ArgumentNode extends DataFlow::Node { - ArgumentNode() { this.asExpr() = [any(Call c).getAnArgument(), any(Call c).getQualifier()] } + Call c; + + ArgumentNode() { + exists(Argument arg | this.asExpr() = arg and not arg.isVararg() and c = arg.getCall()) + or + this.(DataFlow::ImplicitVarargsArray).getCall() = c + or + this = DataFlow::getInstanceArgument(c) + } + + Call getCall() { result = c } } /** @@ -67,19 +76,19 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - predicate isSink(Endpoint e, string kind) { + predicate isSink(Endpoint e, string kind, string provenance) { exists(string package, string type, string name, string signature, string ext, string input | sinkSpec(e, package, type, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _) + ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, provenance) ) or - isCustomSink(e, kind) + isCustomSink(e, kind) and provenance = "custom-sink" } predicate isNeutral(Endpoint e) { exists(string package, string type, string name, string signature | sinkSpec(e, package, type, name, signature, _, _) and - ExternalFlow::neutralModel(package, type, name, [signature, ""], "sink", _) + ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _) ) } @@ -136,10 +145,6 @@ private module ApplicationModeGetCallable implements AutomodelSharedGetCallable: * should be empty. */ private predicate isCustomSink(Endpoint e, string kind) { - e.asExpr() instanceof ArgumentToExec and kind = "command injection" - or - e instanceof RequestForgerySink and kind = "request forgery" - or e instanceof QueryInjectionSink and kind = "sql" } @@ -200,7 +205,7 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASin UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and + not ApplicationCandidatesImpl::isSink(e, _, _) and ApplicationModeGetCallable::getCallable(e).getName().matches("is%") and ApplicationModeGetCallable::getCallable(e).getReturnType() instanceof BooleanType } @@ -218,7 +223,7 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Not UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and + not ApplicationCandidatesImpl::isSink(e, _, _) and exists(Callable callable | callable = ApplicationModeGetCallable::getCallable(e) and callable.getName().toLowerCase() = ["exists", "notexists"] and @@ -313,7 +318,8 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter /** * A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already - * been modeled. + * been modeled _manually_. This is restricted to manual sinks only, because only during the manual process do we have + * the expectation that all sinks present in a method have been considered. * * WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed * when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure @@ -324,14 +330,14 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic { OtherArgumentToModeledMethodCharacteristic() { - this = "other argument to a method that has already been modeled" + this = "other argument to a method that has already been modeled manually" } override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _) and - exists(DataFlow::Node otherSink | - ApplicationCandidatesImpl::isSink(otherSink, _) and - e.asExpr() = otherSink.asExpr().(Argument).getCall().getAnArgument() and + not ApplicationCandidatesImpl::isSink(e, _, _) and + exists(Endpoint otherSink | + ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and + e.getCall() = otherSink.getCall() and e != otherSink ) } diff --git a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql index 4e40fadd7509..4940b4a741ff 100644 --- a/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelApplicationModeExtractCandidates.ql @@ -64,7 +64,7 @@ where // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not CharacteristicsImpl::isSink(endpoint, _) and + not CharacteristicsImpl::isSink(endpoint, _, _) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input) and // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be // a non-sink, and we surface only endpoints that have at least one such sink type. diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll index bc5c3b59a91a..3903ede7ebc5 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeCharacteristics.qll @@ -50,17 +50,17 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - predicate isSink(Endpoint e, string kind) { + predicate isSink(Endpoint e, string kind, string provenance) { exists(string package, string type, string name, string signature, string ext, string input | sinkSpec(e, package, type, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _) + ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, provenance) ) } predicate isNeutral(Endpoint e) { exists(string package, string type, string name, string signature | sinkSpec(e, package, type, name, signature, _, _) and - ExternalFlow::neutralModel(package, type, name, [signature, ""], "sink", _) + ExternalFlow::neutralModel(package, type, name, [signature, ""], _, _) ) } @@ -154,7 +154,7 @@ private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASin UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _) and + not FrameworkCandidatesImpl::isSink(e, _, _) and FrameworkModeGetCallable::getCallable(e).getName().matches("is%") and FrameworkModeGetCallable::getCallable(e).getReturnType() instanceof BooleanType } @@ -172,7 +172,7 @@ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::Not UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" } override predicate appliesToEndpoint(Endpoint e) { - not FrameworkCandidatesImpl::isSink(e, _) and + not FrameworkCandidatesImpl::isSink(e, _, _) and exists(Callable callable | callable = FrameworkModeGetCallable::getCallable(e) and callable.getName().toLowerCase() = ["exists", "notexists"] and diff --git a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql index 4186d1c17b9b..e66af08707c8 100644 --- a/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql +++ b/java/ql/src/Telemetry/AutomodelFrameworkModeExtractCandidates.ql @@ -28,7 +28,7 @@ where // label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in // overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been // modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it. - not CharacteristicsImpl::isSink(endpoint, _) and + not CharacteristicsImpl::isSink(endpoint, _, _) and meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, parameterName) and // The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be // a non-sink, and we surface only endpoints that have at least one such sink type. diff --git a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll index b077f77deb94..b9d631e9deb8 100644 --- a/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll +++ b/java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll @@ -58,9 +58,9 @@ signature module CandidateSig { predicate isSanitizer(Endpoint e, EndpointType t); /** - * Holds if `e` is a sink with the label `kind`. + * Holds if `e` is a sink with the label `kind`, and provenance `provenance`. */ - predicate isSink(Endpoint e, string kind); + predicate isSink(Endpoint e, string kind, string provenance); /** * Holds if `e` is not a sink of any kind. @@ -87,7 +87,7 @@ signature module CandidateSig { * implementations of endpoint characteristics exported by this module. */ module SharedCharacteristics { - predicate isSink = Candidate::isSink/2; + predicate isSink = Candidate::isSink/3; predicate isNeutral = Candidate::isNeutral/1; @@ -282,7 +282,9 @@ module SharedCharacteristics { this = madKind + "-characteristic" } - override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSink(e, madKind) } + override predicate appliesToEndpoint(Candidate::Endpoint e) { + Candidate::isSink(e, madKind, _) + } override Candidate::EndpointType getSinkType() { result = endpointType } }