Skip to content

Commit 7787966

Browse files
committed
dbsp_nexmark: Fix event rate.
Nexmark query q15 ran very slowly as the number of events increased past about 10M, causing runs of 100M to take multiple minutes when most of the other queries ran much faster. We traced this back to the `first_event_rate` option, which controlled the number of events emitted for each simulated second, with a default value of 10,000,000. Because q15 aggregates by day, this meant that all 100M generated events would be in the same aggregation group. We looked at what the Flink generator does. It also has a `first_event_rate` setting, which defaults to 10,000. If we interpret this as a rate per second (as Feldera interpreted this setting), then this would still put all of the events in one day because 100,000,000 / 10,000 = 10,000, which is less than the 86,400 seconds in one day. However, despite the name, the Flink setting is the interval between events in microseconds, which means that its default was actually 100 events per second, or 8.64 million events/day, which is significantly less than the 100 million events in the simulation. This commit changes the Feldera Nexmark generator to default to the same inter-event interval. I thought that it was too confusing to retain this setting under the same name, so I changed its name to `event_interval`, denominated in milliseconds. This speeds up q15 for me from about 180s to about 36s, a 5x speedup. Signed-off-by: Ben Pfaff <blp@feldera.com>
1 parent f2597bd commit 7787966

File tree

6 files changed

+13
-37
lines changed

6 files changed

+13
-37
lines changed

benchmark/run-nexmark.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,6 @@ case $runner:$language in
462462
esac
463463
for query in $queries; do
464464
run_log $CARGO bench --bench nexmark -- \
465-
--first-event-rate=10000000 \
466465
--max-events=$events \
467466
--cpu-cores $cores \
468467
--num-event-generators $cores \

crates/nexmark/src/config.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ pub struct GeneratorOptions {
9191
#[clap(long, default_value = "46", env = "NEXMARK_BID_PROPORTION")]
9292
pub bid_proportion: usize,
9393

94-
/// Initial overall event rate (per second).
95-
#[clap(long, default_value = "10000000", env = "NEXMARK_FIRST_EVENT_RATE")]
96-
pub first_event_rate: usize,
94+
/// Time between events, in milliseconds.
95+
#[clap(long, default_value = "10", env = "NEXMARK_EVENT_INTERVAL")]
96+
pub event_interval: usize,
9797

9898
/// Ratio of bids to 'hot' auctions compared to all other auctions.
9999
#[clap(long, default_value = "2", env = "NEXMARK_HOT_AUCTION_RATIO")]
@@ -170,7 +170,7 @@ impl Default for GeneratorOptions {
170170
avg_bid_byte_size: 100,
171171
avg_person_byte_size: 200,
172172
bid_proportion: 46,
173-
first_event_rate: 10_000_000,
173+
event_interval: 10,
174174
hot_auction_ratio: 2,
175175
hot_bidders_ratio: 4,
176176
hot_sellers_ratio: 4,

crates/nexmark/src/generator/config.rs

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ pub struct Config {
3535
/// generating different event numbers (using `first_event_number +
3636
/// events_count_so_far*num_generators`, for example).
3737
pub first_event_number: usize,
38-
39-
/// Delay between events, in microseconds. If the array has more than one
40-
/// entry then the rate is changed every {@link #stepLengthSec}, and wraps
41-
/// around.
42-
pub inter_event_delay_us: [f64; 1],
4338
}
4439

4540
/// Implementation of config methods based on the Java implementation at
@@ -51,16 +46,6 @@ impl Config {
5146
first_event_id: u64,
5247
first_event_number: usize,
5348
) -> Config {
54-
// The inter_event_delay is calculated as the number (or fraction) of
55-
// micro seconds that pass between each event. Unlike the Java
56-
// implementation, this is not dependent on the number of generators
57-
// because each generator here returns interleaved events. For example,
58-
// with 3 generators, the first generator emits events based on the
59-
// event numbers 0, 3 and 6 etc., where as the Java implementation uses
60-
// 0, 1 and 2 locally for each generator and so adds a factor of
61-
// num_generators.
62-
let inter_event_delay = 1_000_000.0 / (options.first_event_rate as f64);
63-
6449
// Original Java implementation says:
6550
// "Scale maximum down to avoid overflow in getEstimatedSizeBytes."
6651
// but including to ensure similar behavior.
@@ -84,7 +69,6 @@ impl Config {
8469
first_event_id,
8570
max_events,
8671
first_event_number,
87-
inter_event_delay_us: [inter_event_delay],
8872
}
8973
}
9074

@@ -115,7 +99,7 @@ impl Config {
11599
// What timestamp should the event with `eventNumber` have for this
116100
// generator?
117101
pub fn timestamp_for_event(&self, event_number: u64) -> u64 {
118-
self.base_time + (self.inter_event_delay_us[0] * event_number as f64) as u64 / 1000
102+
self.base_time + self.options.event_interval as u64 * event_number
119103
}
120104
}
121105

@@ -264,13 +248,11 @@ pub mod tests {
264248
assert_eq!(config.next_adjusted_event_number(num_events), expected);
265249
}
266250

267-
// With the default first event rate of 10_000_000 events per second there
268-
// is 1_000_000 µs/s / 10_000_000 events/s = 0.1µs / event, so the timestamp
269-
// should increase by 0.1µs, or 0.0001ms for each event.
251+
// The default event interval is 10 ms.
270252
#[rstest]
271-
#[case(10_000, 1)]
272-
#[case(20_000, 2)]
273-
#[case(50_000, 5)]
253+
#[case(1, 10)]
254+
#[case(2, 20)]
255+
#[case(5, 50)]
274256
fn test_timestamp_for_event_single_generator(#[case] event_number: u64, #[case] expected: u64) {
275257
assert_eq!(
276258
Config::default().timestamp_for_event(event_number),

crates/nexmark/src/generator/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ pub mod tests {
295295
"got: {:?}, want: Event::NewAuction(_)",
296296
next_event.event
297297
);
298-
assert_eq!(next_event.event_timestamp, event_num / 10);
298+
assert_eq!(next_event.event_timestamp, event_num * 10);
299299
}
300300

301301
// And the rest of the events in the first epoch are bids.

crates/nexmark/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ pub mod tests {
297297
#[test]
298298
fn test_source_with_multiple_generators() {
299299
let options = GeneratorOptions {
300-
first_event_rate: 1_000_000,
301300
num_event_generators: 3,
302301
max_events: 10,
303302
..GeneratorOptions::default()

scripts/bench.bash

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,16 @@ rm -rf ${RESULTS_DIR}
2121
mkdir -p ${RESULTS_DIR}
2222

2323
# Run nexmark benchmark
24-
EVENT_RATE=10000000
2524
MAX_EVENTS=100000000
2625
GENERATORS=8
2726
CORES=6
2827
if [ "$SMOKE" != "" ]; then
29-
EVENT_RATE=10000000
3028
MAX_EVENTS=1000000
3129
fi
3230

3331
FILES=( "q0" "q1" "q2" "q3" "q4" "q5" "q6" "q7" "q8" "q9" "q12" "q13" "q14" "q15" "q16" "q17" "q18" "q19" "q20" "q21" "q22" )
3432
for FILE in "${FILES[@]}"
35-
do cargo bench --bench nexmark -- --first-event-rate=${EVENT_RATE} --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators ${GENERATORS} --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_CSV_FILE} --query $FILE
33+
do cargo bench --bench nexmark -- --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators ${GENERATORS} --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_CSV_FILE} --query $FILE
3634
done
3735
mkdir -p ${NEXMARK_RESULTS_DIR}
3836
mv crates/nexmark/${NEXMARK_CSV_FILE} $NEXMARK_RESULTS_DIR
@@ -90,14 +88,12 @@ fi
9088
#mv crates/dbsp/${LDBC_CSV_FILE} ${LDBC_RESULTS_DIR}
9189

9290
# Run nexmark benchmark with persistence
93-
EVENT_RATE=5000000
9491
MAX_EVENTS=3000000
9592
CORES=1
9693
if [ "$SMOKE" != "" ]; then
97-
EVENT_RATE=5000000
9894
MAX_EVENTS=100000
9995
fi
100-
cargo bench --bench nexmark -- --first-event-rate=${EVENT_RATE} --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators 6 --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_DRAM_CSV_FILE}
96+
cargo bench --bench nexmark -- --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators 6 --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_DRAM_CSV_FILE}
10197
mv crates/nexmark/${NEXMARK_DRAM_CSV_FILE} $NEXMARK_RESULTS_DIR
102-
#cargo bench --bench nexmark --features persistence -- --first-event-rate=${EVENT_RATE} --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators 6 --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_PERSISTENCE_CSV_FILE}
98+
#cargo bench --bench nexmark --features persistence -- --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators 6 --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_PERSISTENCE_CSV_FILE}
10399
#mv crates/nexmark/${NEXMARK_PERSISTENCE_CSV_FILE} $NEXMARK_RESULTS_DIR

0 commit comments

Comments
 (0)