#!/bin/bash # # Usage (to test): $ SMOKE=true bash scripts/bench.bash # # - `SMOKE`: Run benchmarks quickly but not representative # set -ex RESULTS_DIR=benchmark-run-data NEXMARK_RESULTS_DIR=$RESULTS_DIR/nexmark GALEN_RESULTS_DIR=$RESULTS_DIR/galen LDBC_RESULTS_DIR=$RESULTS_DIR/ldbc NEXMARK_CSV_FILE='nexmark_results.csv' NEXMARK_DRAM_CSV_FILE='dram_nexmark_results.csv' NEXMARK_PERSISTENCE_CSV_FILE='persistence_nexmark_results.csv' GALEN_CSV_FILE='galen_results.csv' LDBC_CSV_FILE='ldbc_results.csv' rm -f crates/dbsp/${GALEN_CSV_FILE} crates/dbsp/${LDBC_CSV_FILE} crates/nexmark/${NEXMARK_DRAM_CSV_FILE} rm -rf ${RESULTS_DIR} mkdir -p ${RESULTS_DIR} # Run nexmark benchmark MAX_EVENTS=100000000 if [ "$SMOKE" != "" ]; then MAX_EVENTS=1000000 fi if [ "$CLOUD" = "" ]; then GENERATORS=8 CORES=6 FILES=( "q0" "q1" "q2" "q3" "q4" "q5" "q6" "q7" "q8" "q9" "q12" "q13" "q14" "q15" "q16" "q17" "q18" "q19" "q20" "q21" "q22" ) for FILE in "${FILES[@]}" do cargo bench --bench nexmark -- --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators ${GENERATORS} --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_CSV_FILE} --query $FILE done mkdir -p ${NEXMARK_RESULTS_DIR} mv crates/nexmark/${NEXMARK_CSV_FILE} $NEXMARK_RESULTS_DIR fi # Run SQL benchmarks # These require a running instance of redpanda (if they don't use nexmark connector) and pipeline-manager. KAFKA_BROKER=localhost:9092 FELDERA_API=http://localhost:8080 if [ "$CLOUD" != "" ]; then FELDERA_API=$API_URL KAFKA_BROKER='${secret:demo-bootstrap-servers}' CLOUD_OPTIONS='-O security.protocol=${secret:demo-security-protocol} -O ssl.ca.pem=${secret:demo-ssl-ca-pem} -O ssl.certificate.pem=${secret:demo-ssl-certificate-pem} -O ssl.key.pem=${secret:demo-ssl-key-pem} -O ssl.key.password=${secret:demo-ssl-key-password} -O ssl.endpoint.identification.algorithm=${secret:demo-ssl-endpoint-identification-algorithm} -O sasl.mechanism=${secret:demo-sasl-mechanism} -O sasl.username=${secret:demo-sasl-username} -O sasl.password=${secret:demo-sasl-password} --api-key '${API_KEY} fi sql_benchmark() { mkdir -p $RESULTS_DIR/$name local csv=$1 metrics=$2; shift; shift python3 benchmark/feldera-sql/run.py \ --api-url $FELDERA_API \ --events $MAX_EVENTS \ -O bootstrap.servers=$KAFKA_BROKER \ --csv "$RESULTS_DIR/$name/$csv" \ --csv-metrics "$RESULTS_DIR/$name/$metrics" \ --metrics-interval 1 \ --poller-threads 10 \ "$@" } DIR="benchmark/feldera-sql/benchmarks/" if [[ -z "$CLOUD" ]]; then TESTS="nexmark" else TESTS="nexmark" fi for test in ${TESTS}; do if [[ -e ${test}/generate.bash ]]; then rpk topic -X brokers=$KAFKA_BROKER delete -r '.*' ${test}/generate.bash fi name=$(basename $test) sql_benchmark "sql_${name}_results.csv" "sql_${name}_metrics.csv" --folder benchmarks/${name} # We are currently skipping running storage benchmarks on cloud until we can get a # better disk for cloud. if [ "$CLOUD" = "" ]; then sql_benchmark "sql_storage_${name}_results.csv" "sql_storage_${name}_metrics.csv" --storage --folder benchmarks/${name} fi done if [ "$CLOUD" = "" ]; then # Run galen benchmark cargo bench --bench galen -- --workers 10 --csv ${GALEN_CSV_FILE} mkdir -p ${GALEN_RESULTS_DIR} mv crates/dbsp/${GALEN_CSV_FILE} ${GALEN_RESULTS_DIR} # Run ldbc benchmarks DATASET_SMALL='graph500-22' DATASET_MEDIUM='datagen-8_4-fb' if [ "$SMOKE" != "" ]; then DATASET_SMALL='wiki-Talk' DATASET_MEDIUM='kgs' fi # Run nexmark benchmark with persistence MAX_EVENTS=3000000 CORES=1 if [ "$SMOKE" != "" ]; then MAX_EVENTS=100000 fi cargo bench --bench nexmark -- --max-events=${MAX_EVENTS} --cpu-cores ${CORES} --num-event-generators 6 --source-buffer-size 10000 --input-batch-size 40000 --csv ${NEXMARK_DRAM_CSV_FILE} mv crates/nexmark/${NEXMARK_DRAM_CSV_FILE} $NEXMARK_RESULTS_DIR fi