From a300eedf71126c2dd28c4b5f44447601912fb667 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 27 Nov 2024 23:49:46 +0100 Subject: [PATCH 1/4] ruby: first sketches --- .../src/queries/performance/CouldBebatched.ql | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 ruby/ql/src/queries/performance/CouldBebatched.ql diff --git a/ruby/ql/src/queries/performance/CouldBebatched.ql b/ruby/ql/src/queries/performance/CouldBebatched.ql new file mode 100644 index 000000000000..df1372d34671 --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBebatched.ql @@ -0,0 +1,49 @@ +/** + * @id dsl/could-be-batched + * @kind problem + * @problem.severity info + * @problem.type CodeSmell + * @problem.description Use `ActiveRecord::Relation#in_batches` to process records in batches. + * @problem.links https://api.rubyonrails.org/classes/ActiveRecord/Batches/ClassMethods.html#method-i-in_batches + */ + +import ruby +// private import codeql.ruby.CFG +import codeql.ruby.Concepts +import codeql.ruby.frameworks.ActiveRecord + +// collection.each { |item| expensiveCall(item) } +predicate expensiveCall(DataFlow::CallNode c) { c instanceof SqlExecution } + +string loopMethodName() { + // TODO: check these + result in ["each", "map", "foreach", "flat_map", "do"] +} + +class LoopingCall extends DataFlow::CallNode { + DataFlow::CallableNode loopBlock; + + LoopingCall() { + this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() + } + + DataFlow::CallableNode getLoopBlock() { result = loopBlock } +} + +predicate happensInLoop(DataFlow::CallNode e) { + any(LoopingCall loop).getLoopBlock().asCallableAstNode() = e.asExpr().getScope() +} + +// Active Record +/** A call to e.g. `user.update_attribute(name, "foo")` */ +private class LoadElementsCall extends ActiveRecordInstanceMethodCall { + LoadElementsCall() { this.getMethodName() in ["latest", "find_by", "where"] } +} + +// from LoopingCall loopCall +// select loopCall, loopCall.getLoopBlock() +from DataFlow::CallNode call +where + happensInLoop(call) and //and expensiveCall(call) + call instanceof LoadElementsCall +select call, "This call happens in a loop" From f6cb090e58ead16a77b9b4054472a70e6564ccd2 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 3 Dec 2024 17:31:21 +0100 Subject: [PATCH 2/4] ruby: more tweaking I opened up `ActiveRecordModelFinderCall` to be public --- .../codeql/ruby/frameworks/ActiveRecord.qll | 3 +- .../src/queries/performance/CouldBeAsync.ql | 95 +++++++++++++++++++ .../src/queries/performance/CouldBebatched.ql | 49 ---------- 3 files changed, 96 insertions(+), 51 deletions(-) create mode 100644 ruby/ql/src/queries/performance/CouldBeAsync.ql delete mode 100644 ruby/ql/src/queries/performance/CouldBebatched.ql diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index 7348bfc699bb..e8584f57adf3 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -352,8 +352,7 @@ private Expr getUltimateReceiver(MethodCall call) { } // A call to `find`, `where`, etc. that may return active record model object(s) -private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode -{ +class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode { private ActiveRecordModelClass cls; ActiveRecordModelFinderCall() { diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.ql b/ruby/ql/src/queries/performance/CouldBeAsync.ql new file mode 100644 index 000000000000..5cdfe89455c2 --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeAsync.ql @@ -0,0 +1,95 @@ +/** + * @id ruby/could-be-async + * @kind problem + * @problem.severity info + * @problem.description Use `ActiveRecord::Relation#load_async` to load records asynchronously. + */ + +import ruby +// private import codeql.ruby.CFG +import codeql.ruby.Concepts +import codeql.ruby.frameworks.ActiveRecord + +// collection.each { |item| expensiveCall(item) } +predicate expensiveCall(DataFlow::CallNode c) { c instanceof SqlExecution } + +string loopMethodName() { + // TODO: check these + result in [ + "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", + "collect", "collect!", "select", "select!", "reject", "reject!" + // , "collect", "select", "reject", "find_all", "find", + // "detect", "any?", "all?", "one?", "none?", "one", "none", "min", "max", "minmax", "min_by", + // "max_by", "minmax_by", "sort", "sort_by", "sort_by!", "sort" + ] +} + +class LoopingCall extends DataFlow::CallNode { + DataFlow::CallableNode loopBlock; + + LoopingCall() { + this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() + } + + DataFlow::CallableNode getLoopBlock() { result = loopBlock } +} + +predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { + loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() +} + +// predicate directLoop(@ruby_while) +predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) { + exists(LoopingCall innerLoop | + happensInLoop(outerLoop, innerLoop) and + happensInLoop(innerLoop, e) + ) +} + +predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) { + happensInLoop(loop, e) and + not happensInOuterLoop(loop, e) +} + +// Active Record +private class LoadElementsCall extends ActiveRecordInstanceMethodCall { + LoadElementsCall() { + this.getMethodName() in ["latest", "find_by", "where", "count", "find"] and + not any(PluckCall p).chaines() = this + } +} + +private class PluckCall extends ActiveRecordInstanceMethodCall { + PluckCall() { this.getMethodName() in ["pluck"] } + + ActiveRecordInstanceMethodCall chaines() { result = getChain(this.getInstance()) } +} + +private ActiveRecordInstanceMethodCall getChain(ActiveRecordInstanceMethodCall c) { + result = c + or + result = getChain(c.getInstance()) +} + +ActiveRecordInstanceMethodCall longChain(ActiveRecordInstanceMethodCall c) { + 2 < strictcount(ActiveRecordInstanceMethodCall ch | ch = getChain(c)) and + result = getChain(c) and + result != c +} + +ActiveRecordInstanceMethodCall pluckChain(PluckCall c) { result = longChain(c) } + +// from LoopingCall loopCall +// select loopCall, loopCall.getLoopBlock() +from LoopingCall loop, DataFlow::CallNode call, string message +where + happensInInnermostLoop(loop, call) and + ( + call instanceof ActiveRecordModelFinderCall and + not call.getMethodName() in ["new", "create"] and + message = "could be chained with load_async" + or + call instanceof PluckCall and + message = "could be async_pluck" + ) +select call, "This call happens inside $@, and " + message, loop, "this loop" diff --git a/ruby/ql/src/queries/performance/CouldBebatched.ql b/ruby/ql/src/queries/performance/CouldBebatched.ql deleted file mode 100644 index df1372d34671..000000000000 --- a/ruby/ql/src/queries/performance/CouldBebatched.ql +++ /dev/null @@ -1,49 +0,0 @@ -/** - * @id dsl/could-be-batched - * @kind problem - * @problem.severity info - * @problem.type CodeSmell - * @problem.description Use `ActiveRecord::Relation#in_batches` to process records in batches. - * @problem.links https://api.rubyonrails.org/classes/ActiveRecord/Batches/ClassMethods.html#method-i-in_batches - */ - -import ruby -// private import codeql.ruby.CFG -import codeql.ruby.Concepts -import codeql.ruby.frameworks.ActiveRecord - -// collection.each { |item| expensiveCall(item) } -predicate expensiveCall(DataFlow::CallNode c) { c instanceof SqlExecution } - -string loopMethodName() { - // TODO: check these - result in ["each", "map", "foreach", "flat_map", "do"] -} - -class LoopingCall extends DataFlow::CallNode { - DataFlow::CallableNode loopBlock; - - LoopingCall() { - this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() - } - - DataFlow::CallableNode getLoopBlock() { result = loopBlock } -} - -predicate happensInLoop(DataFlow::CallNode e) { - any(LoopingCall loop).getLoopBlock().asCallableAstNode() = e.asExpr().getScope() -} - -// Active Record -/** A call to e.g. `user.update_attribute(name, "foo")` */ -private class LoadElementsCall extends ActiveRecordInstanceMethodCall { - LoadElementsCall() { this.getMethodName() in ["latest", "find_by", "where"] } -} - -// from LoopingCall loopCall -// select loopCall, loopCall.getLoopBlock() -from DataFlow::CallNode call -where - happensInLoop(call) and //and expensiveCall(call) - call instanceof LoadElementsCall -select call, "This call happens in a loop" From 51ecf4888c9a5c7cc3dd0795b2bb5ba818a31ea6 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 17 Dec 2024 15:20:35 +0100 Subject: [PATCH 3/4] ruby: first draft qhelp plus some tweaks --- .../queries/performance/CouldBeAsync.qhelp | 24 +++++++++++ .../src/queries/performance/CouldBeAsync.ql | 41 +++++-------------- .../performance/examples/async_pluck.rb | 16 ++++++++ .../queries/performance/examples/preload.rb | 14 +++++++ .../performance/examples/straight_loop.rb | 8 ++++ 5 files changed, 72 insertions(+), 31 deletions(-) create mode 100644 ruby/ql/src/queries/performance/CouldBeAsync.qhelp create mode 100644 ruby/ql/src/queries/performance/examples/async_pluck.rb create mode 100644 ruby/ql/src/queries/performance/examples/preload.rb create mode 100644 ruby/ql/src/queries/performance/examples/straight_loop.rb diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.qhelp b/ruby/ql/src/queries/performance/CouldBeAsync.qhelp new file mode 100644 index 000000000000..280ac776799c --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeAsync.qhelp @@ -0,0 +1,24 @@ + + + + +

+When an AcriveRecord query is executed synchronously, the application is blocked until the query completes. This can lead to poor performance, especially when the query is slow or when many queries are executed in sequence. This query identifies ActiveRecord queries that could be executed asynchronously to improve performance. +Specifically, this query identifies ActiveRecord queries that are executed in a loop. If each query is independent of the others, the queries could be executed in parallel by using the built-in Ruby load_async method. +In those cases, where the query includes a pluck call, the query could be executed asynchronously by using the async_pluck method. +

+
+ +

If possible, split the loop into two. The first creates a map of promises resolving to the async query results. The second runs throuhg this map, waiting on each promise, and does whatever the original loop did with the result of the query.

+
+ +

The following (suboptimal) example code executes a series of ActiveRecord queries in a loop. The queries are independent of each other, so they could be executed in parallel to improve performance.

+ +

To be able to fetch the necessary information asynchronously, we first pull it out into its own (implicit) loop: + +

We can now use the async_pluck method to execute the queries in parallel.

+ +
+
\ No newline at end of file diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.ql b/ruby/ql/src/queries/performance/CouldBeAsync.ql index 5cdfe89455c2..c86e0f7263ce 100644 --- a/ruby/ql/src/queries/performance/CouldBeAsync.ql +++ b/ruby/ql/src/queries/performance/CouldBeAsync.ql @@ -1,26 +1,21 @@ /** - * @id ruby/could-be-async + * @name Could be async + * @description Use `ActiveRecord::Relation#load_async` to load records asynchronously. * @kind problem * @problem.severity info - * @problem.description Use `ActiveRecord::Relation#load_async` to load records asynchronously. + * @precision high + * @id rb/could-be-async + * @tags performance */ import ruby -// private import codeql.ruby.CFG import codeql.ruby.Concepts import codeql.ruby.frameworks.ActiveRecord -// collection.each { |item| expensiveCall(item) } -predicate expensiveCall(DataFlow::CallNode c) { c instanceof SqlExecution } - string loopMethodName() { - // TODO: check these result in [ "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", "collect", "collect!", "select", "select!", "reject", "reject!" - // , "collect", "select", "reject", "find_all", "find", - // "detect", "any?", "all?", "one?", "none?", "one", "none", "min", "max", "minmax", "min_by", - // "max_by", "minmax_by", "sort", "sort_by", "sort_by!", "sort" ] } @@ -51,38 +46,22 @@ predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) { not happensInOuterLoop(loop, e) } -// Active Record -private class LoadElementsCall extends ActiveRecordInstanceMethodCall { - LoadElementsCall() { - this.getMethodName() in ["latest", "find_by", "where", "count", "find"] and - not any(PluckCall p).chaines() = this - } -} - private class PluckCall extends ActiveRecordInstanceMethodCall { PluckCall() { this.getMethodName() in ["pluck"] } - ActiveRecordInstanceMethodCall chaines() { result = getChain(this.getInstance()) } + ActiveRecordInstance chaines() { result = getChain(this) } } -private ActiveRecordInstanceMethodCall getChain(ActiveRecordInstanceMethodCall c) { - result = c +private ActiveRecordInstance getChain(ActiveRecordInstanceMethodCall c) { + result = c.getInstance() or result = getChain(c.getInstance()) } -ActiveRecordInstanceMethodCall longChain(ActiveRecordInstanceMethodCall c) { - 2 < strictcount(ActiveRecordInstanceMethodCall ch | ch = getChain(c)) and - result = getChain(c) and - result != c -} - -ActiveRecordInstanceMethodCall pluckChain(PluckCall c) { result = longChain(c) } - -// from LoopingCall loopCall -// select loopCall, loopCall.getLoopBlock() from LoopingCall loop, DataFlow::CallNode call, string message where + not call.getLocation().getFile().getAbsolutePath().matches("%test%") and + not call = any(PluckCall p).chaines() and happensInInnermostLoop(loop, call) and ( call instanceof ActiveRecordModelFinderCall and diff --git a/ruby/ql/src/queries/performance/examples/async_pluck.rb b/ruby/ql/src/queries/performance/examples/async_pluck.rb new file mode 100644 index 000000000000..daedac75db6b --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/async_pluck.rb @@ -0,0 +1,16 @@ +require 'async_pluck' + +# Preload User data in parallel +user_data = User.where(login: repo_names_by_owner.keys).async_pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] +end + +repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg +end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/preload.rb b/ruby/ql/src/queries/performance/examples/preload.rb new file mode 100644 index 000000000000..19c0af583f7f --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/preload.rb @@ -0,0 +1,14 @@ +# Preload User data +user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] + end + + repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/straight_loop.rb b/ruby/ql/src/queries/performance/examples/straight_loop.rb new file mode 100644 index 000000000000..4c64d3bb84f2 --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/straight_loop.rb @@ -0,0 +1,8 @@ +repo_names_by_owner.map do |owner_slug, repo_names| + owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first + owner_type = owner_type == "User" ? "USER" : "ORGANIZATION" + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file From 5d3a541ee3ddca601427fe1d25033757df85f16c Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 19 Dec 2024 13:16:23 +0100 Subject: [PATCH 4/4] Ruby: revome cases where the loop may be interrupted --- .../src/queries/performance/CouldBeAsync.ql | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ruby/ql/src/queries/performance/CouldBeAsync.ql b/ruby/ql/src/queries/performance/CouldBeAsync.ql index c86e0f7263ce..b4e4ea686d96 100644 --- a/ruby/ql/src/queries/performance/CouldBeAsync.ql +++ b/ruby/ql/src/queries/performance/CouldBeAsync.ql @@ -9,8 +9,10 @@ */ import ruby +private import codeql.ruby.AST import codeql.ruby.Concepts import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.TaintTracking string loopMethodName() { result in [ @@ -33,7 +35,6 @@ predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() } -// predicate directLoop(@ruby_while) predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) { exists(LoopingCall innerLoop | happensInLoop(outerLoop, innerLoop) and @@ -58,10 +59,28 @@ private ActiveRecordInstance getChain(ActiveRecordInstanceMethodCall c) { result = getChain(c.getInstance()) } +// The ActiveRecord instance is used to potentially control the loop +predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { + TaintTracking::localTaint(ar, guard) and + guard = guardForLoopControl(_, _) +} + +// A guard for controlling the loop +DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { + result.asExpr().getAstNode() = cond.getCondition().getAChild*() and + ( + control.(MethodCall).getMethodName() = "raise" + or + control instanceof NextStmt + ) and + control = cond.getBranch(_).getAChild() +} + from LoopingCall loop, DataFlow::CallNode call, string message where not call.getLocation().getFile().getAbsolutePath().matches("%test%") and not call = any(PluckCall p).chaines() and + not usedInLoopControlGuard(call, _) and happensInInnermostLoop(loop, call) and ( call instanceof ActiveRecordModelFinderCall and