Skip to content

Commit 43926b9

Browse files
feat: add explain query features to aggregate query in Cloud Datastore (#30704)
1 parent 99773d3 commit 43926b9

10 files changed

Lines changed: 443 additions & 34 deletions

File tree

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright 2024 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
require "datastore_helper"
18+
19+
describe "Datastore Aggregate Query Explain", :datastore do
20+
let(:prefix) { "gcloud-explain-#{SecureRandom.hex 4}" }
21+
let(:kind) { "ExplainTask" }
22+
let(:tasks) do
23+
3.times.map do |i|
24+
dataset.entity "#{prefix}-#{kind}", "#{prefix}-task-#{i}" do |t|
25+
t["description"] = "explain task #{i}"
26+
t["done"] = false
27+
end
28+
end
29+
end
30+
31+
before do
32+
dataset.save(*tasks)
33+
# Ensure the entities are created
34+
try_with_backoff "getting tasks" do
35+
entities = dataset.find_all tasks.map(&:key)
36+
raise "not all tasks created" if entities.count != tasks.count
37+
end
38+
end
39+
40+
after do
41+
dataset.delete(*tasks)
42+
end
43+
44+
it "returns explain_metrics when analyze is true" do
45+
query = dataset.query "#{prefix}-#{kind}"
46+
aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
47+
results = dataset.run_aggregation aggregate_query, explain_options: { analyze: true }
48+
49+
_(results.get("total")).must_equal tasks.count
50+
51+
_(results.explain_metrics).must_be_kind_of Google::Cloud::Datastore::V1::ExplainMetrics
52+
_(results.explain_metrics.plan_summary).wont_be_nil
53+
_(results.explain_metrics.execution_stats).wont_be_nil
54+
_(results.explain_metrics.execution_stats.results_returned).must_equal 1
55+
end
56+
57+
it "does not return execution stats when analyze is false" do
58+
query = dataset.query "#{prefix}-#{kind}"
59+
aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
60+
results = dataset.run_aggregation aggregate_query, explain_options: { analyze: false }
61+
62+
_(results.get("total")).must_be :nil?
63+
64+
_(results.explain_metrics).must_be_kind_of Google::Cloud::Datastore::V1::ExplainMetrics
65+
_(results.explain_metrics.plan_summary).wont_be_nil
66+
_(results.explain_metrics.execution_stats).must_be :nil?
67+
end
68+
69+
it "runs aggregate query via dataset transaction" do
70+
dataset.transaction do |tx|
71+
query = tx.query "#{prefix}-#{kind}"
72+
aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
73+
results = tx.run_aggregation aggregate_query, explain_options: { analyze: true }
74+
75+
_(results.get("total")).must_equal tasks.count
76+
77+
_(results.explain_metrics).must_be_kind_of Google::Cloud::Datastore::V1::ExplainMetrics
78+
_(results.explain_metrics.plan_summary).wont_be_nil
79+
_(results.explain_metrics.execution_stats).wont_be_nil
80+
_(results.explain_metrics.execution_stats.results_returned).must_equal 1
81+
end
82+
end
83+
84+
it "runs aggregate query via dataset read-only transaction" do
85+
dataset.read_only_transaction do |tx|
86+
query = tx.query "#{prefix}-#{kind}"
87+
aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
88+
results = tx.run_aggregation aggregate_query, explain_options: { analyze: true }
89+
90+
_(results.get("total")).must_equal tasks.count
91+
92+
_(results.explain_metrics).must_be_kind_of Google::Cloud::Datastore::V1::ExplainMetrics
93+
_(results.explain_metrics.plan_summary).wont_be_nil
94+
_(results.explain_metrics.execution_stats).wont_be_nil
95+
_(results.explain_metrics.execution_stats.results_returned).must_equal 1
96+
end
97+
end
98+
end

google-cloud-datastore/acceptance/datastore/aggregate_query_test.rb

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,12 @@
134134
end
135135

136136
it "returns count on filter with and without read time" do
137+
sleep(0.5) # wait for `before` tx that saves all characters to settle
137138
read_time = Time.now
138-
sleep(0.5)
139+
sleep(0.5) # wait so that "arya" tx happens definitely after `read_time` (for read time aggregation query)
139140
arya["alive"] = false
140141
dataset.transaction { |tx| tx.save arya }
141-
sleep(0.5)
142+
sleep(0.5) # wait for the "arya" tx to settle (so that something is changed since `read_time`)
142143

143144
query = Google::Cloud::Datastore.new
144145
.query("Character")
@@ -354,10 +355,19 @@
354355
end
355356

356357
it "returns sum on filter with and without read time" do
358+
sleep(0.5) # wait for `before` tx that saves all characters to settle
357359
read_time = Time.now
358-
sleep(0.5)
359-
dataset.transaction { |tx| tx.delete george }
360-
sleep(0.5)
360+
sleep(0.5) # wait so the "Paul" tx happens definitely after `read_time` (for read time aggregation query)
361+
paul = Google::Cloud::Datastore::Entity.new.tap do |e|
362+
e["name"] = "Paul"
363+
e["family"] = "Martin"
364+
e["appearances"] = 3
365+
e["alive"] = true
366+
end
367+
paul.key = Google::Cloud::Datastore::Key.new "Character", "Paul"
368+
paul.key.parent = book
369+
dataset.transaction { |tx| tx.save paul }
370+
sleep(0.5) # wait for the "Paul" tx to settle (so that something is changed since `read_time`)
361371

362372
query = Google::Cloud::Datastore.new
363373
.query("Character")
@@ -371,7 +381,7 @@
371381
_(res.get).must_equal 1
372382
res = dataset.run_aggregation aggregate_query
373383
_(res.get).must_be_kind_of Integer
374-
_(res.get).must_equal 0
384+
_(res.get).must_equal 4
375385
end
376386

377387
it "returns sum on limit" do
@@ -555,10 +565,19 @@
555565
end
556566

557567
it "returns average on filter with and without read time" do
568+
sleep(0.5) # wait for `before` tx that saves all characters to settle
558569
read_time = Time.now
559-
sleep(0.5)
560-
dataset.transaction { |tx| tx.delete george }
561-
sleep(0.5)
570+
sleep(0.5) # wait so the "Paul" tx happens definitely after `read_time` (for read time aggregation query)
571+
paul = Google::Cloud::Datastore::Entity.new.tap do |e|
572+
e["name"] = "Paul"
573+
e["family"] = "Martin"
574+
e["appearances"] = 3
575+
e["alive"] = true
576+
end
577+
paul.key = Google::Cloud::Datastore::Key.new "Character", "Paul"
578+
paul.key.parent = book
579+
dataset.transaction { |tx| tx.save paul }
580+
sleep(0.5) # wait for the "Paul" tx to settle (so that something is changed since `read_time`)
562581

563582
query = Google::Cloud::Datastore.new
564583
.query("Character")
@@ -572,7 +591,7 @@
572591
_(res.get).must_equal 1.0
573592
res = dataset.run_aggregation aggregate_query
574593
_(res.get).must_be_kind_of Float
575-
_(res.get).must_equal 0.0
594+
_(res.get).must_equal 2.0
576595
end
577596

578597
it "returns average on limit" do

google-cloud-datastore/lib/google/cloud/datastore/dataset.rb

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,10 @@ def run query, namespace: nil, consistency: nil, read_time: nil, explain_options
549549
# [Eventual Consistency in Google Cloud
550550
# Datastore](https://cloud.google.com/datastore/docs/articles/balancing-strong-and-eventual-consistency-with-google-cloud-datastore/#h.tf76fya5nqk8)
551551
# for more information.
552+
# @param [Hash, Google::Cloud::Datastore::V1::ExplainOptions] explain_options The options for query explanation.
553+
# Provide this argument to enable explain metrics. If this argument is left unset,
554+
# the results will not include explain metrics.
555+
# See {Google::Cloud::Datastore::V1::ExplainOptions} for details. Optional.
552556
#
553557
# @return [Google::Cloud::Datastore::Dataset::AggregateQueryResults]
554558
#
@@ -607,15 +611,32 @@ def run query, namespace: nil, consistency: nil, read_time: nil, explain_options
607611
# done: false
608612
# res = datastore.run_aggregation gql_query, namespace: "example-ns"
609613
#
610-
def run_aggregation aggregate_query, namespace: nil, consistency: nil, read_time: nil
614+
# @example Run the aggregate query with explain options to get query plan and execution statistics.
615+
# require "google/cloud/datastore"
616+
#
617+
# datastore = Google::Cloud::Datastore.new
618+
#
619+
# query = datastore.query("Task")
620+
# aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
621+
# results = datastore.run_aggregation aggregate_query, explain_options: { analyze: true }
622+
#
623+
# if results.explain_metrics
624+
# stats = results.explain_metrics.execution_stats
625+
# puts "Read operations: #{stats.read_operations}"
626+
# end
627+
#
628+
def run_aggregation aggregate_query, namespace: nil, consistency: nil, read_time: nil, explain_options: nil
611629
ensure_service!
612630
unless aggregate_query.is_a?(AggregateQuery) || aggregate_query.is_a?(GqlQuery)
613631
raise ArgumentError, "Cannot run a #{aggregate_query.class} object."
614632
end
615633
check_consistency! consistency
616634
aggregate_query_res = service.run_aggregation_query aggregate_query.to_grpc, namespace,
617-
consistency: consistency, read_time: read_time
618-
AggregateQueryResults.from_grpc aggregate_query_res
635+
consistency: consistency,
636+
read_time: read_time,
637+
explain_options: explain_options
638+
639+
AggregateQueryResults.from_grpc aggregate_query_res, explain_options
619640
end
620641

621642
##

google-cloud-datastore/lib/google/cloud/datastore/dataset/aggregate_query_results.rb

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,24 @@ class AggregateQueryResults
5757
# @return [Google::Protobuf::Timestamp]
5858
attr_reader :read_time
5959

60+
# Query explain metrics. This is only present when the `explain_options`
61+
# are provided to {Google::Cloud::Datastore::Dataset#run_aggregation}.
62+
# It is sent only once with the response.
63+
#
64+
# @return [Google::Cloud::Datastore::V1::ExplainMetrics, nil]
65+
attr_reader :explain_metrics
66+
67+
##
68+
# The options for query explanation.
69+
#
70+
# This is a copy of the input parameter supplied to the {Dataset#run_aggregation} function.
71+
#
72+
# @return [Google::Cloud::Datastore::V1::ExplainOptions, nil]
73+
attr_reader :explain_options
74+
6075
##
6176
# @private
62-
attr_writer :aggregate_fields, :read_time
77+
attr_writer :aggregate_fields, :read_time, :explain_metrics, :explain_options
6378

6479
##
6580
# Retrieves the aggregate data.
@@ -113,22 +128,33 @@ def get aggregate_alias = nil
113128
##
114129
# @private New AggregateQueryResults from a
115130
# Google::Cloud::Datastore::V1::RunAggregationQueryResponse object.
116-
def self.from_grpc aggregate_query_response
117-
aggregate_fields = aggregate_query_response
118-
.batch
119-
.aggregation_results[0]
120-
.aggregate_properties
121-
.map do |aggregate_alias, value|
122-
if value.has_integer_value?
123-
[aggregate_alias, value.integer_value]
124-
else
125-
[aggregate_alias, value.double_value]
126-
end
127-
end
131+
def self.from_grpc aggregate_query_response, explain_options
132+
# If the aggregate query is run with explain_options and analyze = false,
133+
# the RunAggregationQueryResponse will not have batch results
134+
# only explain metrics.
135+
aggregate_fields = {}
136+
read_time = nil
137+
138+
if aggregate_query_response.batch
139+
aggregate_fields = aggregate_query_response
140+
.batch
141+
.aggregation_results[0]
142+
.aggregate_properties
143+
.map do |aggregate_alias, value|
144+
if value.has_integer_value?
145+
[aggregate_alias, value.integer_value]
146+
else
147+
[aggregate_alias, value.double_value]
148+
end
149+
end
150+
read_time = aggregate_query_response.batch.read_time
151+
end
128152

129153
new.tap do |aq_result|
154+
aq_result.explain_options = explain_options
155+
aq_result.explain_metrics = aggregate_query_response.explain_metrics
130156
aq_result.aggregate_fields = aggregate_fields.to_h
131-
aq_result.read_time = aggregate_query_response.batch.read_time
157+
aq_result.read_time = read_time
132158
end
133159
end
134160
end

google-cloud-datastore/lib/google/cloud/datastore/read_only_transaction.rb

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,14 +236,34 @@ def run query, namespace: nil, explain_options: nil
236236
# .add_count
237237
# res = tx.run_aggregation aggregate_query
238238
# end
239+
# @example Run the aggregate query with explain options:
240+
# require "google/cloud/datastore"
241+
#
242+
# datastore = Google::Cloud::Datastore.new
243+
#
244+
# datastore.read_only_transaction do |tx|
245+
# query = tx.query("Task")
246+
# aggregate_query = query.aggregate_query.add_count aggregate_alias: "total"
247+
# results = tx.run_aggregation aggregate_query, explain_options: { analyze: true }
248+
#
249+
# if results.explain_metrics
250+
# stats = results.explain_metrics.execution_stats
251+
# puts "Read operations: #{stats.read_operations}"
252+
# end
253+
# end
239254
#
240-
def run_aggregation aggregate_query, namespace: nil
255+
def run_aggregation aggregate_query, namespace: nil, explain_options: nil
241256
ensure_service!
242257
unless aggregate_query.is_a?(AggregateQuery) || aggregate_query.is_a?(GqlQuery)
243258
raise ArgumentError, "Cannot run a #{aggregate_query.class} object."
244259
end
245-
aggregate_query_results = service.run_aggregation_query aggregate_query.to_grpc, namespace, transaction: @id
246-
Dataset::AggregateQueryResults.from_grpc aggregate_query_results
260+
261+
aggregate_query_results = service.run_aggregation_query aggregate_query.to_grpc,
262+
namespace,
263+
transaction: @id,
264+
explain_options: explain_options
265+
266+
Dataset::AggregateQueryResults.from_grpc aggregate_query_results, explain_options
247267
end
248268

249269
##

google-cloud-datastore/lib/google/cloud/datastore/service.rb

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,15 @@ def run_query query, namespace = nil, consistency: nil, transaction: nil, read_t
9898
end
9999

100100
## Query for aggregates
101-
def run_aggregation_query query, namespace = nil, consistency: nil, transaction: nil, read_time: nil
101+
def run_aggregation_query query, namespace = nil, consistency: nil, transaction: nil, read_time: nil,
102+
explain_options: nil
103+
if explain_options
104+
explain_options = ::Gapic::Protobuf.coerce(
105+
explain_options,
106+
to: ::Google::Cloud::Datastore::V1::ExplainOptions
107+
)
108+
end
109+
102110
gql_query = nil
103111
if query.is_a? Google::Cloud::Datastore::V1::GqlQuery
104112
gql_query = query
@@ -115,7 +123,8 @@ def run_aggregation_query query, namespace = nil, consistency: nil, transaction:
115123
partition_id: partition_id,
116124
read_options: read_options,
117125
aggregation_query: query,
118-
gql_query: gql_query
126+
gql_query: gql_query,
127+
explain_options: explain_options
119128
end
120129

121130
##

0 commit comments

Comments
 (0)