diff --git a/go/ql/lib/change-notes/2024-12-16-any-content-readers.md b/go/ql/lib/change-notes/2024-12-16-any-content-readers.md new file mode 100644 index 000000000000..aa244c1b97af --- /dev/null +++ b/go/ql/lib/change-notes/2024-12-16-any-content-readers.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- +* By implementing `ImplicitFieldReadNode` it is now possible to declare a dataflow node that reads any content (fields, array members, map keys and values). For example, this is appropriate for modelling a serialization method that flattens a potentially deep data structure into a string or byte array. +* The `Template.Execute[Template]` methods of the `text/template` package now correctly convey taint from any nested fields to their result. This may produce more results from any taint-tracking query when the `text/template` package is in use. diff --git a/go/ql/lib/ext/text.template.model.yml b/go/ql/lib/ext/text.template.model.yml index 669af3a8854f..3ff0321a43c2 100644 --- a/go/ql/lib/ext/text.template.model.yml +++ b/go/ql/lib/ext/text.template.model.yml @@ -7,5 +7,5 @@ extensions: - ["text/template", "", False, "HTMLEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - ["text/template", "", False, "JSEscape", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] - ["text/template", "", False, "JSEscapeString", "", "", "Argument[0]", "ReturnValue", "taint", "manual"] - - ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] - - ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"] +# - ["text/template", "Template", True, "Execute", "", "", "Argument[1]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input. +# - ["text/template", "Template", True, "ExecuteTemplate", "", "", "Argument[2]", "Argument[0]", "taint", "manual"] # Implemented in QL to provide an arbitrary content read from the input. diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index 032196312483..2b36d42702ab 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -143,26 +143,28 @@ predicate jumpStep(Node n1, Node n2) { * Thus, `node2` references an object with a content `x` that contains the * value of `node1`. */ -predicate storeStep(Node node1, ContentSet c, Node node2) { - // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, - // which in turn flows into the pointer content of `p` - exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | - node1 = rhs and - node2.(PostUpdateNode).getPreUpdateNode() = base and - c = any(DataFlow::FieldContent fc | fc.getField() = f) +predicate storeStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + // a write `(*p).f = rhs` is modeled as two store steps: `rhs` is flows into field `f` of `(*p)`, + // which in turn flows into the pointer content of `p` + exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | + node1 = rhs and + node2.(PostUpdateNode).getPreUpdateNode() = base and + c = any(DataFlow::FieldContent fc | fc.getField() = f) + or + node1 = base and + node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + ) or - node1 = base and - node2.(PostUpdateNode).getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + node1 = node2.(AddressOperationNode).getOperand() and c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) + or + containerStoreStep(node1, node2, c) ) or - node1 = node2.(AddressOperationNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node2.getType()) - or - FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) - or - containerStoreStep(node1, node2, c) } /** @@ -170,20 +172,26 @@ predicate storeStep(Node node1, ContentSet c, Node node2) { * Thus, `node1` references an object with a content `c` whose value ends up in * `node2`. */ -predicate readStep(Node node1, ContentSet c, Node node2) { - node1 = node2.(PointerDereferenceNode).getOperand() and - c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) - or - exists(FieldReadNode read | - node2 = read and - node1 = read.getBase() and - c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) +predicate readStep(Node node1, ContentSet cs, Node node2) { + exists(Content c | cs.asOneContent() = c | + node1 = node2.(PointerDereferenceNode).getOperand() and + c = any(DataFlow::PointerContent pc | pc.getPointerType() = node1.getType()) + or + exists(FieldReadNode read | + node2 = read and + node1 = read.getBase() and + c = any(DataFlow::FieldContent fc | fc.getField() = read.getField()) + ) + or + containerReadStep(node1, node2, c) ) or - FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, + FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), cs, node2.(FlowSummaryNode).getSummaryNode()) or - containerReadStep(node1, node2, c) + any(ImplicitFieldReadNode ifrn).shouldImplicitlyReadAllFields(node1) and + cs.isUniversalContent() and + node1 = node2 } /** @@ -462,9 +470,11 @@ ContentApprox getContentApprox(Content c) { any() } /** * Holds if the the content `c` is a container. */ -predicate containerContent(ContentSet c) { - c instanceof ArrayContent or - c instanceof CollectionContent or - c instanceof MapKeyContent or - c instanceof MapValueContent +predicate containerContentSet(ContentSet cs) { + exists(Content c | cs.asOneContent() = c | + c instanceof ArrayContent or + c instanceof CollectionContent or + c instanceof MapKeyContent or + c instanceof MapValueContent + ) } diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll index 68ffe57f5f59..b6ca2c825ebb 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -7,6 +7,7 @@ private import semmle.go.dataflow.FunctionInputsAndOutputs private import semmle.go.dataflow.ExternalFlow private import DataFlowPrivate private import FlowSummaryImpl as FlowSummaryImpl +private import codeql.util.Unit import DataFlowNodes::Public /** @@ -50,6 +51,18 @@ abstract class FunctionModel extends Function { } } +/** + * A unit class for adding nodes that should implicitly read from all nested content. + * + * For example, this might be appropriate for the argument to a method that serializes a struct. + */ +class ImplicitFieldReadNode extends Unit { + /** + * Holds if the node `n` should implicitly read from all nested content in a taint-tracking context. + */ + abstract predicate shouldImplicitlyReadAllFields(DataFlow::Node n); +} + /** * Gets the `Node` corresponding to `insn`. */ @@ -169,6 +182,11 @@ class Content extends TContent { ) { filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 } + + /** + * Gets the `ContentSet` contaning only this content. + */ + ContentSet asContentSet() { result.asOneContent() = this } } /** A reference through a field. */ @@ -236,21 +254,33 @@ class SyntheticFieldContent extends Content, TSyntheticFieldContent { override string toString() { result = s.toString() } } +private newtype TContentSet = + TOneContent(Content c) or + TAllContent() + /** * An entity that represents a set of `Content`s. * * The set may be interpreted differently depending on whether it is * stored into (`getAStoreContent`) or read from (`getAReadContent`). */ -class ContentSet instanceof Content { +class ContentSet instanceof TContentSet { /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { result = this } + Content getAStoreContent() { this = TOneContent(result) } /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { result = this } + Content getAReadContent() { + this = TOneContent(result) + or + this = TAllContent() and exists(result) + } /** Gets a textual representation of this content set. */ - string toString() { result = super.toString() } + string toString() { + exists(Content c | this = TOneContent(c) | result = c.toString()) + or + this = TAllContent() and result = "all content" + } /** * Holds if this element is at the specified location. @@ -262,8 +292,27 @@ class ContentSet instanceof Content { predicate hasLocationInfo( string filepath, int startline, int startcolumn, int endline, int endcolumn ) { - super.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + exists(Content c | this = TOneContent(c) | + c.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + or + this = TAllContent() and + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 } + + /** + * If this is a singleton content set, returns the content. + */ + Content asOneContent() { this = TOneContent(result) } + + /** + * Holds if this is a universal content set. + */ + predicate isUniversalContent() { this = TAllContent() } } /** diff --git a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll index 40c68ceb900a..473902be2996 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/FlowSummaryImpl.qll @@ -55,26 +55,28 @@ module Input implements InputSig { } string encodeContent(ContentSet cs, string arg) { - exists(Field f, string package, string className, string fieldName | - f = cs.(FieldContent).getField() and - f.hasQualifiedName(package, className, fieldName) and - result = "Field" and - arg = package + "." + className + "." + fieldName - ) - or - exists(SyntheticField f | - f = cs.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + exists(Content c | cs.asOneContent() = c | + exists(Field f, string package, string className, string fieldName | + f = c.(FieldContent).getField() and + f.hasQualifiedName(package, className, fieldName) and + result = "Field" and + arg = package + "." + className + "." + fieldName + ) + or + exists(SyntheticField f | + f = c.(SyntheticFieldContent).getField() and result = "SyntheticField" and arg = f + ) + or + c instanceof ArrayContent and result = "ArrayElement" and arg = "" + or + c instanceof CollectionContent and result = "Element" and arg = "" + or + c instanceof MapKeyContent and result = "MapKey" and arg = "" + or + c instanceof MapValueContent and result = "MapValue" and arg = "" + or + c instanceof PointerContent and result = "Dereference" and arg = "" ) - or - cs instanceof ArrayContent and result = "ArrayElement" and arg = "" - or - cs instanceof CollectionContent and result = "Element" and arg = "" - or - cs instanceof MapKeyContent and result = "MapKey" and arg = "" - or - cs instanceof MapValueContent and result = "MapValue" and arg = "" - or - cs instanceof PointerContent and result = "Dereference" and arg = "" } bindingset[token] @@ -513,7 +515,9 @@ module Private { SummaryComponent qualifier() { result = argument(-1) } /** Gets a summary component for field `f`. */ - SummaryComponent field(Field f) { result = content(any(FieldContent c | c.getField() = f)) } + SummaryComponent field(Field f) { + result = content(any(FieldContent c | c.getField() = f).asContentSet()) + } /** Gets a summary component that represents the return value of a call. */ SummaryComponent return() { result = SC::return(_) } diff --git a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 2605dd326c3c..f5525f90e0db 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -36,11 +36,11 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(src, sink, _) or // Treat container flow as taint for the local taint flow relation - exists(DataFlow::Content c | DataFlowPrivate::containerContent(c) | - DataFlowPrivate::readStep(src, c, sink) or - DataFlowPrivate::storeStep(src, c, sink) or - FlowSummaryImpl::Private::Steps::summaryGetterStep(src, c, sink, _) or - FlowSummaryImpl::Private::Steps::summarySetterStep(src, c, sink, _) + exists(DataFlow::ContentSet cs | DataFlowPrivate::containerContentSet(cs) | + DataFlowPrivate::readStep(src, cs, sink) or + DataFlowPrivate::storeStep(src, cs, sink) or + FlowSummaryImpl::Private::Steps::summaryGetterStep(src, cs, sink, _) or + FlowSummaryImpl::Private::Steps::summarySetterStep(src, cs, sink, _) ) } @@ -57,10 +57,11 @@ private Type getElementType(Type containerType) { * of `c` at sinks and inputs to additional taint steps. */ bindingset[node] -predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet c) { - exists(Type containerType | +predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::ContentSet cs) { + exists(Type containerType, DataFlow::Content c | node instanceof DataFlow::ArgumentNode and - getElementType*(node.getType()) = containerType + getElementType*(node.getType()) = containerType and + cs.asOneContent() = c | containerType instanceof ArrayType and c instanceof DataFlow::ArrayContent @@ -158,7 +159,7 @@ predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) { any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred) or FlowSummaryImpl::Private::Steps::summaryStoreStep(pred.(DataFlowPrivate::FlowSummaryNode) - .getSummaryNode(), any(DataFlow::Content c | c instanceof DataFlow::ArrayContent), + .getSummaryNode(), any(DataFlow::ArrayContent ac).asContentSet(), succ.(DataFlowPrivate::FlowSummaryNode).getSummaryNode()) } diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll index deb366b4cdaa..dded5092bcc3 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/NetHttp.qll @@ -122,7 +122,7 @@ module NetHttp { | lastParamIndex = call.getCall().getCalleeType().getNumParameter() - 1 and varArgsSliceArgument = SummaryComponentStack::argument(lastParamIndex) and - arrayContentSC = SummaryComponent::content(arrayContent) and + arrayContentSC = SummaryComponent::content(arrayContent.asContentSet()) and stack = SummaryComponentStack::push(arrayContentSC, varArgsSliceArgument) ) else stack = SummaryComponentStack::argument(n) diff --git a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll index 4ef4da058395..c5a67f388f42 100644 --- a/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll +++ b/go/ql/lib/semmle/go/frameworks/stdlib/TextTemplate.qll @@ -67,4 +67,45 @@ module TextTemplate { input = inp and output = outp } } + + private class ExecuteTemplateMethod extends Method { + int inputArg; + + ExecuteTemplateMethod() { + exists(string name | + this.hasQualifiedName("text/template", "Template", name) and + ( + name = "Execute" and inputArg = 1 + or + name = "ExecuteTemplate" and inputArg = 2 + ) + ) + } + + int getInputArgIdx() { result = inputArg } + } + + private class ExecuteTemplateFieldReader extends DataFlow::ImplicitFieldReadNode { + override predicate shouldImplicitlyReadAllFields(DataFlow::Node n) { + exists(ExecuteTemplateMethod m, DataFlow::MethodCallNode cn | + cn.getTarget() = m and + n = cn.getArgument(m.getInputArgIdx()) + ) + } + } + + private class ExecuteTemplateFunctionModels extends TaintTracking::FunctionModel, + ExecuteTemplateMethod + { + FunctionInput inp; + FunctionOutput outp; + + ExecuteTemplateFunctionModels() { + inp.isParameter(this.getInputArgIdx()) and outp.isParameter(0) + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input = inp and output = outp + } + } } diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.expected b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql new file mode 100644 index 000000000000..94c1b8f30b54 --- /dev/null +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/test.ql @@ -0,0 +1,9 @@ +import go +import utils.test.InlineFlowTest + +string getArgString(DataFlow::Node src, DataFlow::Node sink) { + exists(sink) and + result = src.(DataFlow::CallNode).getArgument(0).getExactValue() +} + +import TaintFlowTestArgString diff --git a/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go new file mode 100644 index 000000000000..8068a314dc55 --- /dev/null +++ b/go/ql/test/library-tests/semmle/go/frameworks/serialization/texttemplate.go @@ -0,0 +1,60 @@ +package xyz + +import ( + "bytes" + "text/template" +) + +type Inner1 struct { + Data string +} + +type Inner2 struct { + Data string +} + +type Inner3 struct { + Data string +} + +type HasInner3Slice struct { + Slice []Inner3 +} + +type Outer struct { + SliceField []Inner1 + PtrField *Inner2 + MapField map[string]Inner3 + DeepField HasInner3Slice +} + +func source(n int) string { return "dummy" } +func sink(arg any) {} + +func test() { + + source1 := source(1) + source2 := source(2) + source3 := source(3) + source4 := source(4) + + toSerialize := Outer{[]Inner1{{source1}}, &Inner2{source2}, map[string]Inner3{"key": {source3}}, + HasInner3Slice{[]Inner3{{source4}}}} + buff1 := new(bytes.Buffer) + buff2 := new(bytes.Buffer) + bytes1 := make([]byte, 10) + bytes2 := make([]byte, 10) + + tmpl, _ := template.New("test").Parse("Template text goes here (irrelevant for test)") + tmpl.ExecuteTemplate(buff1, "test", toSerialize) + buff1.Read(bytes1) + sink(bytes1) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 hasTaintFlow=4 + + // Read `buff2` via an `any`-typed variable, to ensure the static type of the argument to tmpl.Execute makes no difference to the result + var toSerializeAsAny any + toSerializeAsAny = toSerialize + tmpl.Execute(buff2, toSerializeAsAny) + buff2.Read(bytes2) + sink(bytes2) // $ hasTaintFlow=1 hasTaintFlow=2 hasTaintFlow=3 hasTaintFlow=4 + +} diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 1373345423f7..c6a16a0e15d3 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2261,10 +2261,7 @@ module MakeImpl Lang> { returnAp = apNone() or // flow through a callable - exists(DataFlowCall call, ParamNodeEx p, Ap innerReturnAp | - revFlowThrough(call, returnCtx, p, state, _, returnAp, ap, innerReturnAp) and - flowThroughIntoCall(call, node, p, ap, innerReturnAp) - ) + revFlowThrough(_, returnCtx, state, returnAp, ap, node) or // flow out of a callable exists(ReturnPosition pos | @@ -2413,11 +2410,14 @@ module MakeImpl Lang> { pragma[nomagic] private predicate revFlowThrough( - DataFlowCall call, ReturnCtx returnCtx, ParamNodeEx p, FlowState state, - ReturnPosition pos, ApOption returnAp, Ap ap, Ap innerReturnAp + DataFlowCall call, ReturnCtx returnCtx, FlowState state, ApOption returnAp, Ap ap, + ArgNodeEx arg ) { - revFlowParamToReturn(p, state, pos, innerReturnAp, ap) and - revFlowIsReturned(call, returnCtx, returnAp, pos, innerReturnAp) + exists(ParamNodeEx p, ReturnPosition pos, Ap innerReturnAp | + flowThroughIntoCall(call, arg, p, ap, innerReturnAp) and + revFlowParamToReturn(p, state, pos, innerReturnAp, ap) and + revFlowIsReturned(call, returnCtx, returnAp, pos, innerReturnAp) + ) } /** @@ -2543,22 +2543,11 @@ module MakeImpl Lang> { ) } - pragma[nomagic] - private predicate revFlowThroughArg( - DataFlowCall call, ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, - Ap ap - ) { - exists(ParamNodeEx p, Ap innerReturnAp | - revFlowThrough(call, returnCtx, p, state, _, returnAp, ap, innerReturnAp) and - flowThroughIntoCall(call, arg, p, ap, innerReturnAp) - ) - } - pragma[nomagic] predicate callMayFlowThroughRev(DataFlowCall call) { exists(ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, Ap ap | revFlow(arg, state, returnCtx, returnAp, ap) and - revFlowThroughArg(call, arg, state, returnCtx, returnAp, ap) + revFlowThrough(call, returnCtx, state, returnAp, ap, arg) ) }