diff --git a/.coveralls.yml b/.coveralls.yml deleted file mode 100644 index 91600595..00000000 --- a/.coveralls.yml +++ /dev/null @@ -1 +0,0 @@ -service_name: travis-ci diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..a9bfb308 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +--- +name: CI + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Ruby ${{ matrix.ruby-version }} + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + ruby-version: + - "3.2" + - "3.3" + - "3.4" + - "4.0" + - "jruby-9.4" + - "jruby-10.0.2.0" + + steps: + - name: Check out + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Set up Ruby ${{ matrix.ruby-version }} + uses: ruby/setup-ruby@afeafc3d1ab54a631816aba4c914a0081c12ff2f # v1.310.0 + with: + ruby-version: ${{ matrix.ruby-version }} + bundler-cache: true + cache-version: ${{ matrix.ruby-version }} + + - name: Run specs + run: bundle exec rake diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/.rubocop.yml b/.rubocop.yml deleted file mode 100644 index 19201ee3..00000000 --- a/.rubocop.yml +++ /dev/null @@ -1,15 +0,0 @@ -AllCops: - Exclude: - - lib/prometheus/client/version.rb - -AlignHash: - EnforcedHashRocketStyle: table - -Style/TrailingCommaInArguments: - EnforcedStyleForMultiline: comma - -Style/TrailingCommaInLiteral: - EnforcedStyleForMultiline: comma - -Metrics/AbcSize: - Max: 18 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9d4a92a2..00000000 --- a/.travis.yml +++ /dev/null @@ -1,11 +0,0 @@ -sudo: false -language: ruby -# Needed for rainbow 2.2.1 / rubygems issues. -before_install: - - | - if [[ "$(ruby -e 'puts RUBY_VERSION')" != 1.* ]]; then gem update --system; fi -rvm: - - 1.9.3 - - 2.3.3 - - 2.4.0 - - jruby-9.1.5.0 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..f2d14fe4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,317 @@ +# CHANGELOG + +# Unreleased changes + +_None outstanding_ + +# 4.2.5 / 2025-07-05 + +_**Codename:** Surprise dependency_ + +## Small improvements + +- [#324](https://github.com/prometheus/client_ruby/pull/324) Do not use benchmark gem + in production: + Ruby 3.5 is moving `Benchmark` out of default gems, so we need to explicitly install + it from Rubygems in dev. It turned out we were also using it in prod code, so this PR + replaced that usage with a small method that makes calls to + `Process.clock_gettime(Process::CLOCK_MONOTONIC)`. + +# 4.2.4 / 2025-02-02 + +_**Codename:** FOSDEM 25th Anniversary Edition_ + +## Small improvements + +- [#316](https://github.com/prometheus/client_ruby/pull/316) Use binary search for + histogram buckets: + This change speeds up observations in histogram metrics by using a binary search + rather than a sequential search through the bucket array. This is possible because + we enforce that histogram buckets are sorted at initialization. + +# 4.2.3 / 2024-06-28 + +_**Codename:** Now with 25% fewer test dependencies!_ + +## Small improvements + +- [#308](https://github.com/prometheus/client_ruby/pull/308) Declare base64 gem + dependency, ready for Ruby 3.4: + Ruby 3.4 and above will require an explicit dependency on `base64` as it will no + longer be included with CRuby. This gets us ready for that change. + +# 4.2.2 / 2023-10-31 + +_**Codename:** 🎃🦇 Spooky type conversion 🦇🎃_ + +## Bug fixes + +- [#296](https://github.com/prometheus/client_ruby/pull/296) Stringify non-string job + names in push client: + Previously, an error would be raised if you passed a symbol as the job name, which + is inconsistent with how we handle label values in the rest of the client. This + change converts the job name to a string before trying to use it. +- [#297](https://github.com/prometheus/client_ruby/pull/297) Stringify grouping key + values in push client: + Same thing as #296, but for grouping key values. + +# 4.2.1 / 2023-08-04 + +_**Codename:** If a bug falls in the forest_ + +## Bug fixes + +- [#291](https://github.com/prometheus/client_ruby/pull/291) Handle `/` in job name in + `Prometheus::Client::Push`: + Previously, if you included a `/` in your job name when using the Pushgateway client, + you'd get a `400` error back as we didn't encode it properly. We now base64 encode it + per the Pushgateway spec. + + It's possible that nobody has hit this bug (`/` is fairly unlikely to appear in a job + name) or that the error message (a `400` from Pushgateway with a complaint about an + odd number of path components) didn't make it look like a bug in the Ruby client. + Either way, this hopefully brings us fully in line with the spec! + +# 4.2.0 / 2023-07-25 + +_**Codename:** Funny number_ + +## Small improvements + +- [#287](https://github.com/prometheus/client_ruby/pull/287) Add `Gauge#set_to_current_time`: + Does what you'd expect - sets a gauge to the current unix epoch timestamp (including + fractional seconds). + + Other client libraries have this and it's about time we did! + +# 4.1.0 / 2023-03-20 + +_**Codename:** They finally made a point release_ + +## Small improvements + +- [#264](https://github.com/prometheus/client_ruby/pull/264) Add JRuby 9.3 to build matrix: + JRuby 9.3 was released, and added as an officially supported version +- [#273](https://github.com/prometheus/client_ruby/pull/273) Add Ruby 3.2 to build matrix: + Ruby 3.2 was released, and added as an officially supported version +- [#280](https://github.com/prometheus/client_ruby/pull/280) Optimize incrementing values + in DirectFileStore adapter: + There were some expensive method calls being made multiple times when they didn't need + to be for simple increments. This PR introduces a specialised implementation for that + case. +- [#277](https://github.com/prometheus/client_ruby/pull/277) Allow use of `instance` and + `job` labels: + It's now possible to set the `instance` and `job` labels on metrics, where previously + they had been reserved. + + The reason we'd reserved them is that Prometheus automatically generates values + for them when it scrapes a target, and we didn't want to cause a collision. It + turns out Prometheus handles that collision just fine. + + By default, Prometheus server will prepend `exported_` to them if they're present + in the scraped data (i.e. `exported_instance` and `exported_job`). Users can set + `honor_labels` in their Prometheus server config if they prefer the labels from + the scraped metric data to take precedence over the labels generated by the + server. + +## Bug fixes + +- [#268](https://github.com/prometheus/client_ruby/pull/268) Use lowercase response headers + in Rack example: + Rack 3.0.0 started requiring this for compatibility with HTTP/2 +- [#271](https://github.com/prometheus/client_ruby/pull/271) Use lowercase for HTTP headers + in middleware: + Fixes the same issue from above in our middleware +- [#270](https://github.com/prometheus/client_ruby/pull/270) Small compatibility fixes in + Rack example: + Apple have taken port 5000 for AirPlay, so we had to move away from it. Go has changed + how you install binaries, so we updated those instructions too. + +# 4.0.0 / 2022-03-27 + +_**Codename:** The "barely a release" release_ + +This version contains a single - sadly breaking - change. + +- [#251](https://github.com/prometheus/client_ruby/pull/251) Remove framework-specific + route detection from collector middleware: + In 3.0.0 [we shipped](https://github.com/prometheus/client_ruby/issues/245) a feature + that attempted to use framework-specific information to determine the path of the + request in `Prometheus::Middleware::Collector`. + + Sadly, we found out after shipping it that it was prone to multiple issues. We spent + a decent amount of time looking into them in depth, and came to the conclusion that + there wasn't any reasonable way to fix them - the issues are inherent to the feature. + + For a full, detailed write-up of our investigation, see [this + comment](https://github.com/prometheus/client_ruby/issues/249#issuecomment-1061317511). + + Almost all users will be unaffected by this change, but it is breaking per the + definition we've used for previous releases, so we've erred on the side of caution and + bumped the major version to communicate that. + + If you use Sinatra or Grape with the `Prometheus::Middleware::Collector`, you will + notice different `path` labels being generated. If not, this release will change + nothing for you. + + If you want the behaviour from 3.0.0 - or any custom path label generation you'd + prefer - we've updated [our collector middleware + documentation](https://github.com/prometheus/client_ruby/blob/v4.0.0/examples/rack/README.md#collector). + + **This may be a breaking change**. Labels may change in existing metrics. + +# 3.0.0 / 2022-02-05 + +This new major version includes some breaking changes. They should be reasonably easy to +adapt to, but please read the details below: + +## Breaking changes + +Please refer to [UPGRADING.md](UPGRADING.md) for details on upgrading from versions +`< 3.0.0`. + +- [#206](https://github.com/prometheus/client_ruby/pull/206) Include `SCRIPT_NAME` when + determining path in Collector: + When determining the path for a request, `Rack::Request` prefixes the + `SCRIPT_NAME`. This was a problem with our code when using mountable engines, + where the engine part of the path gets lost. This patch fixes that to include `SCRIPT_NAME` as part of the path. + + **This may be a breaking change**. Labels may change in existing metrics. + +- [#245](https://github.com/prometheus/client_ruby/pull/206) Use framework-specific route + info and handle consecutive path segments containing IDs in Collector: + When generating the `path` label, we now use framework-specific information from the + request environment to produce better labels for apps written in the Sinatra and Grape + frameworks. Rails doesn't provide the information we need to do the same there, but we + hope to get such functionality added in a future release. + + Our framework-agnostic fallback (which Rails apps will use) has also been improved. It + now supports stripping IDs/UUIDs from consecutive path segments, where previously only + alternating segments would be correctly stripped. + + **This may be a breaking change**. Labels may change in existing metrics. + +- [#209](https://github.com/prometheus/client_ruby/pull/209) Automatically initialize metrics + without labels. + Following the [Prometheus Best Practices](https://prometheus.io/docs/practices/instrumentation/#avoid-missing-metrics), + client libraries are expected to automatically export a 0 value when declaring a metric + that has no labels. + We missed this recommendation in the past, and this wasn't happening. Starting from this + version, all metrics without labels will be immediately exported with `0` value, without + need for an increment / observation. + + **This may be a breaking change**. Depending on your particular metrics, this may + result in a significant increase to the number of time series being exported. We + recommend you test this and make sure it doesn't cause problems. + +- [#220](https://github.com/prometheus/client_ruby/pull/220) and [#234](https://github.com/prometheus/client_ruby/pull/234) + Improvements to Pushgateway client: + - The `job` parameter is now mandatory when instantiating `Prometheus::Client::Push` + and will raise `ArgumentError` if not specified, or if `nil` or an empty string/object + are passed. + - The `Prometheus::Client::Push` initializer now takes keyword arguments. + - You can now pass a set of arbitrary key-value pairs (`grouping_key`) to uniquely + identify a job instance, rather than just an `instance` label. + - Fixed URI escaping of spaces in the path when pushing to to Pushgateway. In the + past, spaces were being encoded as `+` instead of `%20`, which resulted in + incorrect label values in the grouping key. + - We now correctly encode special values in `job` and `grouping_key` that can't + ordinarily be represented in the URL. This mean you can have a forward slash (`/`) + in a grouping key label value, or set one to the empty string. + - We validate that labels in your `grouping_key` don't clash with labels in the + metrics being submitted, and raise an error if they do. + - We raise an error on a non-2xx HTTP response from the Pushgateway. + + **This is a breaking change if you use Pushgateway**. You will need to update your + code to pass keyword arguments to the `Prometheus::Client::Push` initializer. + +- [#242](https://github.com/prometheus/client_ruby/pull/242) Move HTTP Basic + Authentication credentials in `Prometheus::Client::Push` to separate method call: + In earlier versions, these were provided as part of the `gateway` URL, which had some + significant downsides when it came to special characters in usernames/passwords. + + These credentials must now be passed via an explicit call to `basic_auth` on an + instance of `Prometheus::Client::Push`. + + **This is a breaking change if you use Pushgateway with HTTP Basic Authentication**. + You will need to update your code to call this method instead of including the + credentials in the URL. + +- [#236](https://github.com/prometheus/client_ruby/pull/236) Validate label names: + Previously, we didn't validate that label names match the character set required by + Prometheus (`[a-zA-Z_][a-zA-Z0-9_]*`). As of this release, we raise an error if a + metric is initialized with label names that don't match that regex. + + **This is a breaking change**. While it's likely that Prometheus server would have + been failing to scrape metrics with such labels anyway, declaring them will now cause + an error to be raised in your code. + +- [#237](https://github.com/prometheus/client_ruby/pull/237) Drop support for old Ruby versions: + Ruby versions below 2.6 are no longer supported upstream, and `client_ruby` is no + longer tested against them. + + **This may be a breaking change**. We no longer make efforts to ensure that + `client_ruby` works on older versions, and any issues filed specific to them will be + considered invalid. + +## New Features + +- [#199](https://github.com/prometheus/client_ruby/pull/199) Add `port` filtering option + to Exporter middleware. + You can now specify a `port` when adding `Prometheus::Middleware::Exporter` to your + middleware chain, and metrics will only be exported if the `/metrics` request comes + through that port. + +- [#222](https://github.com/prometheus/client_ruby/pull/222) Enable configuring `Net::HTTP` + timeouts for Pushgateway calls. + You can now specify `open_timeout` and `read_timeout` when instantiating + `Prometheus::Client::Push`, to control these timeouts. + +## Code improvements and bug fixes + +- [#201](https://github.com/prometheus/client_ruby/pull/201) Make all registry methods + thread safe. + +- [#227](https://github.com/prometheus/client_ruby/pull/227) Fix `with_labels` bug that + made it completely non-functional, and occasionally resulted in `DirectFileStore` file + corruption. + + +# 2.1.0 / 2020-06-29 + +## New Features + +- [#177](https://github.com/prometheus/client_ruby/pull/177) Added Histogram helpers to + generate linear and exponential buckets, as the Client Library Guidelines recommend. +- [#172](https://github.com/prometheus/client_ruby/pull/172) Added :most_recent + aggregation for gauges on DirectFileStore. + +## Code improvements + +- Fixed several warnings that started firing in the latest versions of Ruby. + +# 2.0.0 / 2020-01-28 + +## Breaking changes + +- [#176](https://github.com/prometheus/client_ruby/pull/176) BUGFIX: Values observed at + the upper limit of a histogram bucket are now counted in that bucket, not the following + one. This is unlikely to break functionality and you probably don't need to make code + changes, but it may break tests. + +## New features + +- [#156](https://github.com/prometheus/client_ruby/pull/156) Added `init_label_set` method, + which allows declaration of time series on app startup, starting at 0. + + +# 1.0.0 / 2019-11-04 + +## Breaking changes + +- This release saw a number of breaking changes to better comply with latest best practices + for naming and client behaviour. Please refer to [UPGRADING.md](UPGRADING.md) for details + if upgrading from `<= 0.9`. + +- The main feature of this release was adding support for multi-process environments such + as pre-fork servers (Unicorn, Puma). diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..d325872b --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Prometheus Community Code of Conduct + +Prometheus follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). diff --git a/COMPATIBILITY.md b/COMPATIBILITY.md new file mode 100644 index 00000000..dea01a37 --- /dev/null +++ b/COMPATIBILITY.md @@ -0,0 +1,25 @@ +# Compatibility + +We aim for the Prometheus Ruby client to be compatible with all supported +versions of Ruby, across the MRI and JRuby platforms. + +Any Ruby version that has not received an End-of-Life notice (e.g. +[this notice for Ruby 2.1](https://www.ruby-lang.org/en/news/2017/04/01/support-of-ruby-2-1-has-ended/)) +is supported. + +To ensure we're meeting these guidelines, we test the client against all +supported versions, as specified in our [build matrix](.github/workflows/ci.yml). + +# Deprecation + +Whenever a version of Ruby falls out of support we will mirror that change in +the Prometheus Ruby client by updating the build matrix and releasing a new +major version. + +At that point we will close any issues that affect only the unsupported version, +and may choose to remove any workarounds from the code that are only necessary +for the unsupported version. + +The major version bump signals the break in compatibility. If the client happens +to work on unsupported versions of Ruby this is by chance, and we wouldn't +consider that version to be officially supported. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3bbe74bc..e35531c6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,11 +2,11 @@ Prometheus uses GitHub to manage reviews of pull requests. -* If you have a trivial fix or improvement, go ahead and create a pull request, - addressing (with `@...`) the maintainer of this repository (see - [MAINTAINERS.md](MAINTAINERS.md)) in the description of the pull request. +* If you have a trivial fix or improvement, go ahead and create a pull request. * If you plan to do something more involved, first discuss your ideas on our [mailing list](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). This will avoid unnecessary work and surely give you and us a good deal of inspiration. + +* Be sure to sign your commits off (per the [DCO](https://github.com/probot/dco#how-it-works)) by including `--signoff` as a parameter to your `git commit` commands. diff --git a/Gemfile b/Gemfile index fdddf886..8b6fee68 100644 --- a/Gemfile +++ b/Gemfile @@ -2,18 +2,11 @@ source 'https://rubygems.org' gemspec -def ruby_version?(constraint) - Gem::Dependency.new('', constraint).match?('', RUBY_VERSION) -end - group :test do - gem 'coveralls' - gem 'json', '< 2.0' if ruby_version?('< 2.0') - gem 'rack', '< 2.0' if ruby_version?('< 2.2.2') + gem 'simplecov' + gem 'json' + gem 'rack' gem 'rack-test' gem 'rake' gem 'rspec' - gem 'rubocop', '< 0.42' - gem 'term-ansicolor', '< 1.4' if ruby_version?('< 2.0') - gem 'tins', '< 1.7' if ruby_version?('< 2.0') end diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 35993c41..7b058214 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -1 +1,3 @@ -* Tobias Schmidt +* Ben Kochie +* Chris Sinjakli +* Daniel Magliola diff --git a/README.md b/README.md index b16f44dc..e60f7de9 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,17 @@ through a HTTP interface. Intended to be used together with a [Prometheus server][1]. [![Gem Version][4]](http://badge.fury.io/rb/prometheus-client) -[![Build Status][3]](http://travis-ci.org/prometheus/client_ruby) -[![Dependency Status][5]](https://gemnasium.com/prometheus/client_ruby) -[![Code Climate][6]](https://codeclimate.com/github/prometheus/client_ruby) -[![Coverage Status][7]](https://coveralls.io/r/prometheus/client_ruby) +[![Build Status][3]](https://github.com/prometheus/client_ruby/actions/workflows/ci.yml) ## Usage +### Installation + +For a global installation run `gem install prometheus-client`. + +If you're using [Bundler](https://bundler.io/) add `gem "prometheus-client"` to your `Gemfile`. +Make sure to run `bundle install` afterwards. + ### Overview ```ruby @@ -21,12 +25,12 @@ require 'prometheus/client' prometheus = Prometheus::Client.registry # create a new counter metric -http_requests = Prometheus::Client::Counter.new(:http_requests, 'A counter of HTTP requests made') +http_requests = Prometheus::Client::Counter.new(:http_requests, docstring: 'A counter of HTTP requests made') # register the metric prometheus.register(http_requests) # equivalent helper function -http_requests = prometheus.counter(:http_requests, 'A counter of HTTP requests made') +http_requests = prometheus.counter(:http_requests, docstring: 'A counter of HTTP requests made') # start using the counter http_requests.increment @@ -52,11 +56,11 @@ use Rack::Deflater use Prometheus::Middleware::Collector use Prometheus::Middleware::Exporter -run ->(_) { [200, {'Content-Type' => 'text/html'}, ['OK']] } +run ->(_) { [200, {'content-type' => 'text/html'}, ['OK']] } ``` Start the server and have a look at the metrics endpoint: -[http://localhost:5000/metrics](http://localhost:5000/metrics). +[http://localhost:5123/metrics](http://localhost:5123/metrics). For further instructions and other scripts to get started, have a look at the integrated [example application](examples/rack/README.md). @@ -66,29 +70,69 @@ integrated [example application](examples/rack/README.md). The Ruby client can also be used to push its collected metrics to a [Pushgateway][8]. This comes in handy with batch jobs or in other scenarios where it's not possible or feasible to let a Prometheus server scrape a Ruby -process. +process. TLS and HTTP basic authentication are supported. ```ruby require 'prometheus/client' require 'prometheus/client/push' -prometheus = Prometheus::Client.registry +registry = Prometheus::Client.registry # ... register some metrics, set/increment/observe/etc. their values # push the registry state to the default gateway -Prometheus::Client::Push.new('my-batch-job').add(prometheus) +Prometheus::Client::Push.new(job: 'my-batch-job').add(registry) -# optional: specify the instance name (instead of IP) and gateway +# optional: specify a grouping key that uniquely identifies a job instance, and gateway. +# +# Note: the labels you use in the grouping key must not conflict with labels set on the +# metrics being pushed. If they do, an error will be raised. Prometheus::Client::Push.new( - 'my-job', 'instance-name', 'http://example.domain:1234').add(prometheus) + job: 'my-batch-job', + gateway: 'https://example.domain:1234', + grouping_key: { instance: 'some-instance', extra_key: 'foobar' } +).add(registry) -# If you want to replace any previously pushed metrics for a given instance, +# If you want to replace any previously pushed metrics for a given grouping key, # use the #replace method. -Prometheus::Client::Push.new('my-batch-job', 'instance').replace(prometheus) - -# If you want to delete all previously pushed metrics for a given instance, +# +# Unlike #add, this will completely replace the metrics under the specified grouping key +# (i.e. anything currently present in the pushgateway for the specified grouping key, but +# not present in the registry for that grouping key will be removed). +# +# See https://github.com/prometheus/pushgateway#put-method for a full explanation. +Prometheus::Client::Push.new(job: 'my-batch-job').replace(registry) + +# If you want to delete all previously pushed metrics for a given grouping key, # use the #delete method. -Prometheus::Client::Push.new('my-batch-job', 'instance').delete +Prometheus::Client::Push.new(job: 'my-batch-job').delete +``` + +#### Basic authentication + +By design, `Prometheus::Client::Push` doesn't read credentials for HTTP basic +authentication when they are passed in via the gateway URL using the +`http://user:password@example.com:9091` syntax, and will in fact raise an error if they're +supplied that way. + +The reason for this is that when using that syntax, the username and password +have to follow the usual rules for URL encoding of characters [per RFC +3986](https://datatracker.ietf.org/doc/html/rfc3986#section-2.1). + +Rather than place the burden of correctly performing that encoding on users of this gem, +we decided to have a separate method for supplying HTTP basic authentication credentials, +with no requirement to URL encode the characters in them. + +Instead of passing credentials like this: + +```ruby +push = Prometheus::Client::Push.new(job: "my-job", gateway: "http://user:password@localhost:9091") +``` + +please pass them like this: + +```ruby +push = Prometheus::Client::Push.new(job: "my-job", gateway: "http://localhost:9091") +push.basic_auth("user", "password") ``` ## Metrics @@ -100,16 +144,16 @@ The following metric types are currently supported. Counter is a metric that exposes merely a sum or tally of things. ```ruby -counter = Prometheus::Client::Counter.new(:service_requests_total, '...') +counter = Prometheus::Client::Counter.new(:service_requests_total, docstring: '...', labels: [:service]) # increment the counter for a given label set -counter.increment({ service: 'foo' }) +counter.increment(labels: { service: 'foo' }) # increment by a given value -counter.increment({ service: 'bar' }, 5) +counter.increment(by: 5, labels: { service: 'bar' }) # get current value for a given label set -counter.get({ service: 'bar' }) +counter.get(labels: { service: 'bar' }) # => 5 ``` @@ -119,21 +163,21 @@ Gauge is a metric that exposes merely an instantaneous value or some snapshot thereof. ```ruby -gauge = Prometheus::Client::Gauge.new(:room_temperature_celsius, '...') +gauge = Prometheus::Client::Gauge.new(:room_temperature_celsius, docstring: '...', labels: [:room]) # set a value -gauge.set({ room: 'kitchen' }, 21.534) +gauge.set(21.534, labels: { room: 'kitchen' }) # retrieve the current value for a given label set -gauge.get({ room: 'kitchen' }) +gauge.get(labels: { room: 'kitchen' }) # => 21.534 # increment the value (default is 1) -gauge.increment({ room: 'kitchen' }) +gauge.increment(labels: { room: 'kitchen' }) # => 22.534 # decrement the value by a given value -gauge.decrement({ room: 'kitchen' }, 5) +gauge.decrement(by: 5, labels: { room: 'kitchen' }) # => 17.534 ``` @@ -144,32 +188,311 @@ response sizes) and counts them in configurable buckets. It also provides a sum of all observed values. ```ruby -histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, '...') +histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, docstring: '...', labels: [:service]) # record a value -histogram.observe({ service: 'users' }, Benchmark.realtime { service.call(arg) }) +histogram.observe(Benchmark.realtime { service.call(arg) }, labels: { service: 'users' }) # retrieve the current bucket values -histogram.get({ service: 'users' }) +histogram.get(labels: { service: 'users' }) # => { 0.005 => 3, 0.01 => 15, 0.025 => 18, ..., 2.5 => 42, 5 => 42, 10 = >42 } ``` +Histograms provide default buckets of `[0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]` + +You can specify your own buckets, either explicitly, or using the `Histogram.linear_buckets` +or `Histogram.exponential_buckets` methods to define regularly spaced buckets. + ### Summary Summary, similar to histograms, is an accumulator for samples. It captures Numeric data and provides an efficient percentile calculation mechanism. +For now, only `sum` and `total` (count of observations) are supported, no actual quantiles. + ```ruby -summary = Prometheus::Client::Summary.new(:service_latency_seconds, '...') +summary = Prometheus::Client::Summary.new(:service_latency_seconds, docstring: '...', labels: [:service]) # record a value -summary.observe({ service: 'database' }, Benchmark.realtime { service.call() }) +summary.observe(Benchmark.realtime { service.call() }, labels: { service: 'database' }) -# retrieve the current quantile values -summary.get({ service: 'database' }) -# => { 0.5 => 0.1233122, 0.9 => 3.4323, 0.99 => 5.3428231 } +# retrieve the current sum and total values +summary_value = summary.get(labels: { service: 'database' }) +summary_value['sum'] # => 123.45 +summary_value['count'] # => 100 ``` +## Labels + +All metrics can have labels, allowing grouping of related time series. + +Labels are an extremely powerful feature, but one that must be used with care. +Refer to the best practices on [naming](https://prometheus.io/docs/practices/naming/) and +[labels](https://prometheus.io/docs/practices/instrumentation/#use-labels). + +Most importantly, avoid labels that can have a large number of possible values (high +cardinality). For example, an HTTP Status Code is a good label. A User ID is **not**. + +Labels are specified optionally when updating metrics, as a hash of `label_name => value`. +Refer to [the Prometheus documentation](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels) +as to what's a valid `label_name`. + +In order for a metric to accept labels, their names must be specified when first initializing +the metric. Then, when the metric is updated, all the specified labels must be present. + +Example: + +```ruby +https_requests_total = Counter.new(:http_requests_total, docstring: '...', labels: [:service, :status_code]) + +# increment the counter for a given label set +https_requests_total.increment(labels: { service: "my_service", status_code: response.status_code }) +``` + +### Pre-set Label Values + +You can also "pre-set" some of these label values, if they'll always be the same, so you don't +need to specify them every time: + +```ruby +https_requests_total = Counter.new(:http_requests_total, + docstring: '...', + labels: [:service, :status_code], + preset_labels: { service: "my_service" }) + +# increment the counter for a given label set +https_requests_total.increment(labels: { status_code: response.status_code }) +``` + +### `with_labels` + +Similar to pre-setting labels, you can get a new instance of an existing metric object, +with a subset (or full set) of labels set, so that you can increment / observe the metric +without having to specify the labels for every call. + +Moreover, if all the labels the metric can take have been pre-set, validation of the labels +is done on the call to `with_labels`, and then skipped for each observation, which can +lead to performance improvements. If you are incrementing a counter in a fast loop, you +definitely want to be doing this. + + +Examples: + +**Pre-setting labels for ease of use:** + +```ruby +# in the metric definition: +records_processed_total = registry.counter.new(:records_processed_total, + docstring: '...', + labels: [:service, :component], + preset_labels: { service: "my_service" }) + +# in one-off calls, you'd specify the missing labels (component in this case) +records_processed_total.increment(labels: { component: 'a_component' }) + +# you can also have a "view" on this metric for a specific component where this label is +# pre-set: +class MyComponent + def metric + @metric ||= records_processed_total.with_labels(component: "my_component") + end + + def process + records.each do |record| + # process the record + metric.increment + end + end +end +``` + +### `init_label_set` + +The time series of a metric are not initialized until something happens. For counters, for example, this means that the time series do not exist until the counter is incremented for the first time. + +To get around this problem the client provides the `init_label_set` method that can be used to initialise the time series of a metric for a given label set. + +### Reserved labels + +The following labels are reserved by the client library, and attempting to use them in a +metric definition will result in a +`Prometheus::Client::LabelSetValidator::ReservedLabelError` being raised: + + - `:job` + - `:instance` + - `:pid` + +## Data Stores + +The data for all the metrics (the internal counters associated with each labelset) +is stored in a global Data Store object, rather than in the metric objects themselves. +(This "storage" is ephemeral, generally in-memory, it's not "long-term storage") + +The main reason to do this is that different applications may have different requirements +for their metrics storage. Applications running in pre-fork servers (like Unicorn, for +example), require a shared store between all the processes, to be able to report coherent +numbers. At the same time, other applications may not have this requirement but be very +sensitive to performance, and would prefer instead a simpler, faster store. + +By having a standardized and simple interface that metrics use to access this store, +we abstract away the details of storing the data from the specific needs of each metric. +This allows us to then simply swap around the stores based on the needs of different +applications, with no changes to the rest of the client. + +The client provides 3 built-in stores, but if neither of these is ideal for your +requirements, you can easily make your own store and use that instead. More on this below. + +### Configuring which store to use. + +By default, the Client uses the `Synchronized` store, which is a simple, thread-safe Store +for single-process scenarios. + +If you need to use a different store, set it in the Client Config: + +```ruby +Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DataStore.new(store_specific_params) +``` + +NOTE: You **must** make sure to set the `data_store` before initializing any metrics. +If using Rails, you probably want to set up your Data Store on `config/application.rb`, +or `config/environments/*`, both of which run before `config/initializers/*` + +Also note that `config.data_store` is set to an *instance* of a `DataStore`, not to the +class. This is so that the stores can receive parameters. Most of the built-in stores +don't require any, but `DirectFileStore` does, for example. + +When instantiating metrics, there is an optional `store_settings` attribute. This is used +to set up store-specific settings for each metric. For most stores, this is not used, but +for multi-process stores, this is used to specify how to aggregate the values of each +metric across multiple processes. For the most part, this is used for Gauges, to specify +whether you want to report the `SUM`, `MAX`, `MIN`, or `MOST_RECENT` value observed across +all processes. For almost all other cases, you'd leave the default (`SUM`). More on this +on the *Aggregation* section below. + +Custom stores may also accept extra parameters besides `:aggregation`. See the +documentation of each store for more details. + +### Built-in stores + +There are 3 built-in stores, with different trade-offs: + +- **Synchronized**: Default store. Thread safe, but not suitable for multi-process + scenarios (e.g. pre-fork servers, like Unicorn). Stores data in Hashes, with all accesses + protected by Mutexes. +- **SingleThreaded**: Fastest store, but only suitable for single-threaded scenarios. + This store does not make any effort to synchronize access to its internal hashes, so + it's absolutely not thread safe. +- **DirectFileStore**: Stores data in binary files, one file per process and per metric. + This is generally the recommended store to use with pre-fork servers and other + "multi-process" scenarios. There are some important caveats to using this store, so + please read on the section below. + +### `DirectFileStore` caveats and things to keep in mind + +Each metric gets a file for each process, and manages its contents by storing keys and +binary floats next to them, and updating the offsets of those Floats directly. When +exporting metrics, it will find all the files that apply to each metric, read them, +and aggregate them. + +**Aggregation of metrics**: Since there will be several files per metrics (one per process), +these need to be aggregated to present a coherent view to Prometheus. Depending on your +use case, you may need to control how this works. When using this store, +each Metric allows you to specify an `:aggregation` setting, defining how +to aggregate the multiple possible values we can get for each labelset. By default, +Counters, Histograms and Summaries are `SUM`med, and Gauges report all their values (one +for each process), tagged with a `pid` label. You can also select `SUM`, `MAX`, `MIN`, or +`MOST_RECENT` for your gauges, depending on your use case. + +Please note that the `MOST_RECENT` aggregation only works for gauges, and it does not +allow the use of `increment` / `decrement`, you can only use `set`. + +**Memory Usage**: When scraped by Prometheus, this store will read all these files, get all +the values and aggregate them. We have notice this can have a noticeable effect on memory +usage for your app. We recommend you test this in a realistic usage scenario to make sure +you won't hit any memory limits your app may have. + +**Resetting your metrics on each run**: You should also make sure that the directory where +you store your metric files (specified when initializing the `DirectFileStore`) is emptied +when your app starts. Otherwise, each app run will continue exporting the metrics from the +previous run. + +If you have this issue, one way to do this is to run code similar to this as part of you +initialization: + +```ruby +Dir["#{app_path}/tmp/prometheus/*.bin"].each do |file_path| + File.unlink(file_path) +end +``` + +If you are running in pre-fork servers (such as Unicorn or Puma with multiple processes), +make sure you do this **before** the server forks. Otherwise, each child process may delete +files created by other processes on *this* run, instead of deleting old files. + +**Declare metrics before fork**: As well as deleting files before your process forks, you +should make sure to declare your metrics before forking too. Because the metric registry +is held in memory, any metrics declared after forking will only be present in child +processes where the code declaring them ran, and as a result may not be consistently +exported when scraped (i.e. they will only appear when a child process that declared them +is scraped). + +If you're absolutely sure that every child process will run the metric declaration code, +then you won't run into this issue, but the simplest approach is to declare the metrics +before forking. + +**Large numbers of files**: Because there is an individual file per metric and per process +(which is done to optimize for observation performance), you may end up with a large number +of files. We don't currently have a solution for this problem, but we're working on it. + +**Performance**: Even though this store saves data on disk, it's still much faster than +would probably be expected, because the files are never actually `fsync`ed, so the store +never blocks while waiting for disk. The kernel's page cache is incredibly efficient in +this regard. If in doubt, check the benchmark scripts described in the documentation for +creating your own stores and run them in your particular runtime environment to make sure +this provides adequate performance. + + +### Building your own store, and stores other than the built-in ones. + +If none of these stores is suitable for your requirements, you can easily make your own. + +The interface and requirements of Stores are specified in detail in the `README.md` +in the `client/data_stores` directory. This thoroughly documents how to make your own +store. + +There are also links there to non-built-in stores created by others that may be useful, +either as they are, or as a starting point for making your own. + +### Aggregation settings for multi-process stores + +If you are in a multi-process environment (such as pre-fork servers like Unicorn), each +process will probably keep their own counters, which need to be aggregated when receiving +a Prometheus scrape, to report coherent total numbers. + +For Counters, Histograms and quantile-less Summaries this is simply a matter of +summing the values of each process. + +For Gauges, however, this may not be the right thing to do, depending on what they're +measuring. You might want to take the maximum or minimum value observed in any process, +rather than the sum of all of them. By default, we export each process's individual +value, with a `pid` label identifying each one. + +If these defaults don't work for your use case, you should use the `store_settings` +parameter when registering the metric, to specify an `:aggregation` setting. + +```ruby +free_disk_space = registry.gauge(:free_disk_space_bytes, + docstring: "Free disk space, in bytes", + store_settings: { aggregation: :max }) +``` + +NOTE: This will only work if the store you're using supports the `:aggregation` setting. +Of the built-in stores, only `DirectFileStore` does. + +Also note that the `:aggregation` setting works for all metric types, not just for gauges. +It would be unusual to use it for anything other than gauges, but if your use-case +requires it, the store will respect your aggregation wishes. + ## Tests Install necessary development gems with `bundle install` and run tests with @@ -181,11 +504,8 @@ rake [1]: https://github.com/prometheus/prometheus [2]: http://rack.github.io/ -[3]: https://secure.travis-ci.org/prometheus/client_ruby.svg?branch=master +[3]: https://github.com/prometheus/client_ruby/actions/workflows/ci.yml/badge.svg [4]: https://badge.fury.io/rb/prometheus-client.svg -[5]: https://gemnasium.com/prometheus/client_ruby.svg -[6]: https://codeclimate.com/github/prometheus/client_ruby.svg -[7]: https://coveralls.io/repos/prometheus/client_ruby/badge.svg?branch=master [8]: https://github.com/prometheus/pushgateway [9]: lib/prometheus/middleware/exporter.rb [10]: lib/prometheus/middleware/collector.rb diff --git a/Rakefile b/Rakefile index 4262dd70..cf7648d2 100644 --- a/Rakefile +++ b/Rakefile @@ -2,10 +2,9 @@ require 'bundler' require 'rspec/core/rake_task' -require 'rubocop/rake_task' desc 'Default: run specs' -task default: [:spec, :rubocop] +task default: [:spec] # test alias task test: :spec @@ -15,7 +14,4 @@ RSpec::Core::RakeTask.new do |t| t.rspec_opts = '--require ./spec/spec_helper.rb' end -desc 'Lint code' -RuboCop::RakeTask.new - Bundler::GemHelper.install_tasks diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..5e6f976d --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,6 @@ +# Reporting a security issue + +The Prometheus security policy, including how to report vulnerabilities, can be +found here: + +[https://prometheus.io/docs/operating/security/](https://prometheus.io/docs/operating/security/) diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 00000000..8d065f21 --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,370 @@ +# Upgrading from 3.x.x to 4.x.x + +## Objectives + +4.0.0 contains a single breaking change - the [removal +of](https://github.com/prometheus/client_ruby/pull/251) [framework-specific route +detection](https://github.com/prometheus/client_ruby/pull/245) from +`Prometheus::Middleware::Collector`. + +## Removal of framework-specific route detection + +In 3.0.0 we added a feature that used specific information provided by the Sinatra and +Grape web frameworks to generate the `path` label in `Prometheus::Middleware::Collector`. + +This feature turned out to be inherently flawed, due to limitations in the information we +can extract from the request environment. [This +comment](https://github.com/prometheus/client_ruby/issues/249#issuecomment-1061317511) +goes into much more depth on the investigation we did and the conclusions we came to. + +Most users will be unaffected by this change. If you use Sinatra or Grape and +`Prometheus::Middleware::Collector` you will notice that your `path` label values will be +much more similar to the ones we generated in the 2.x.x release series. + +# Upgrading from 2.x.x to 3.x.x + +## Objectives + +Most of the breaking changes in 3.0.0 are in `Prometheus::Client::Push`, which has had a +fairly major overhaul. + +As well as that, there are a handful of smaller breaking changes. + +## Ruby + +The minimum supported Ruby version is now 2.6. This will change over time according to our +[compatibility policy](COMPATIBILITY.md). + +## Push client improvements + +### Keyword arguments + +In line with changes we made for the 0.10.0 release (see below), +`Prometheus::Client::Push` now favours the use of keyword arguments for improved clarity +at the callsites. Specifically, the constructor now takes several keyword arguments rather +than relying entirely on positional arguments. Where you would previously have written: + +```ruby +Prometheus::Client::Push.new('my-batch-job', 'some-instance', 'https://example.domain:1234') +``` + +you would now write: + +```ruby +Prometheus::Client::Push.new( + job: 'my-batch-job', + gateway: 'https://example.domain:1234', + grouping_key: { instance: 'some-instance', extra_key: 'foobar' } +).add(registry) +``` + +### Removal of `instance` in favour of `grouping_key` + +Previously, it was possible to specify the instance of a job for which metrics were being +pushed, like: + +```ruby +Prometheus::Client::Push.new('my-batch-job', 'some-instance').add(registry) +``` + +What this really did under-the-hood was set a grouping key with a single key-value pair in +it. The Pushgateway itself [supports arbitrary grouping +keys](https://github.com/prometheus/pushgateway#url) made up of many key-value pairs. We +now support submitting metrics with such grouping keys: + +```ruby +Prometheus::Client::Push.new( + job: 'my-batch-job', + grouping_key: { instance: 'some-instance', extra_key: 'foobar' } +).add(registry) +``` + +### Separate method for setting basic auth credentials + +Previously, when initializing a `Prometheus::Client::Push` instance with HTTP Basic +Authentication credentials, you would make a call like: + +```ruby +push = Prometheus::Client::Push.new("my-job", "some-instance", "http://user:password@localhost:9091") +``` + +In most cases, this was fine, but would break if the user or password contained any +non-URL-safe characters ([per RFC +3986](https://datatracker.ietf.org/doc/html/rfc3986#section-2.1)). + +While it is possible to pass those characters using percent-encoding, previous versions of +`Prometheus::Client::Push` didn't decode them before passing them into the HTTP client, +meaning that approach wouldn't work as the credentials we sent to the server would be +wrong. + +We [discussed how to fix +it](https://github.com/prometheus/client_ruby/issues/170#issuecomment-1003765815) and +decided it would be better to have a separate method for supplying HTTP Basic +Authentication credentials, with no requirement for percent-encoding, than to make users +jump through the hoops of correctly encoding the username and password in the gateway URL. + +In the 3.x.x release series, HTTP Basic Authentication credentials should be passed like +this: + +```ruby +push = Prometheus::Client::Push.new(job: "my-job", gateway: "http://localhost:9091") +push.basic_auth("user", "password") +``` + +We also explicitly reject usernames and passwords being passed in the gateway URL, and +will raise an error if they are passed that way. + +### Presence of `job` is now validated + +We now validate that the `job` passed to the `Prometheus::Client::Push` initializer is not +`nil` and isn't the empty string. + +### Raising errors on non-2xx responses from Pushgateway + +Previously, if the Pushgateway (or a proxy between us and it) returned a non-2xx HTTP +response, we would silently fail to submit metrics to it. + +Now, an appropriate error is raised, indicating which class of non-2xx response was +received. If you want to `rescue` those errors and handle them explicitly, they are all +subclasses of `Prometheus::Client::Push::HttpError`. If you only want to handle some of +them, or want to handle each class of non-2xx response differently, you can `rescue` one +or more of: + + - `Prometheus::Client::Push::HttpRedirectError` + - `Prometheus::Client::Push::HttpClientError` + - `Prometheus::Client::Push::HttpServerError` + +_Note: `Prometheus::Client::Push` does not follow redirects. You should configure the +client to talk directly to an instance of the Pushgateway._ + +### Fixed encoding of spaces in `job` and `instance` + +In a [previous +commit](https://github.com/prometheus/client_ruby/pull/188/commits/f31bdcb8eda943f8ddf720e0b9d65ac22124cc93) +we addressed the deprecation (and later removal in Ruby 3.0) of `URI.escape` by switching +to `CGI.escape` for encoding the values of `job` and `instance` which would ultimately end +up in the grouping key. + +Unfortunately, this proved to be a subtly breaking change, as `CGI.escape` encodes spaces +(`" "`) as `"+"` rather than `"%20"`. This led to spaces in the values of `job` and +`instance` being turned into literal plus signs. + +In 3.x.x, [we have +switched](https://github.com/prometheus/client_ruby/pull/220/commits/ec5c5aa6979aa295d91fbc16e76e5eb09f82a256) +to `ERB::Util::url_encode`, which handles this case correctly. You may notice your metrics +being published under a different grouping key as a result of this change (if either your +`job` or `instance` values contained spaces). + +## Automatic initialization of time series with no labels + +The [Prometheus documentation on best +practices](https://prometheus.io/docs/practices/instrumentation/#avoid-missing-metrics) +recommends exporting a default value for any time series you know will exist in advance. +For series with no labels, other Prometheus clients (including Go, Java, and Python) do +this automatically, so we have matched that behaviour in the 3.x.x series. + +## Added `SCRIPT_NAME` to path labels in Collector middleware + +Previously, we did not include `Rack::Request`'s `SCRIPT_NAME` when building paths in +`Prometheus::Middleware::Collector`. We have now added this, which means that any +application using the included collector middleware with a non-empty `SCRIPT_NAME` will +generate different path labels. + +This will most typically be present when mounting several Rack applications in the same +server process, such as when using [Rails +Engines](https://guides.rubyonrails.org/engines.html). + +## Improved stripping of IDs/UUIDs from paths in Collector middleware + +Where available (currently for applications written in the Sinatra and Grape frameworks), +we now use framework-specific equivalents to `PATH_INFO` in +`Prometheus::Middleware::Collector`, which means that rather than having path segments +replaced with the generic `:id` and `:uuid` placeholders, you'll see the route as you +defined it in your framework. + +For frameworks where that information isn't available to us (most notably Rails), we still +fall back to using `PATH_INFO`, though we have also improved how we strip IDs/UUIDs from +it. Previously, we would only strip them from alternating path segments due to the way we +were matching them. We have improved that matching so it works even when there are +IDs/UUIDs in consecutive path segments. + +You may notice the path label change for some of your endpoints. + +## Improved validation of label names + +Earlier versions of the Ruby Prometheus client performed limited validation of label names +(e.g. ensuring that they didn't start with `__`). The validation rules for label names are +specified [in the Prometheus +documentation](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels), +and we now apply them during metric declaration. Specifically, we have added a check that +label names match the regex `[a-zA-Z_][a-zA-Z0-9_]*`. + +Any labels previously let through by the lack of validation were invalid, and likely would +have caused problems when scraped by Prometheus server. + +# Upgrading from 0.9 to 0.10.x + +## Objectives + +0.10.0 represents a big step forward for the Prometheus Ruby client, which comes with some +breaking changes. The objectives behind those changes are: + +1. Bringing the Ruby client in line with [Prometheus conventions and best + practices](https://prometheus.io/docs/instrumenting/writing_clientlibs/) +2. Adding support for multi-process web servers like Unicorn. This was done by introducing + the notion of pluggable storage backends. + + The client can now be configured with different storage backends, and we provide 3 with + the gem: thread-safe (default), thread-unsafe (best performance in single-threaded use + cases), and a multi-process backend that can be used in forking web servers like + Unicorn. + + Users of the library can build their own storage backend to support different + use cases provided they conform to the same interface. + +## Ruby + +The minimum supported Ruby version is now 2.3. This will change over time according to our +[compatibility policy](COMPATIBILITY.md). + +## Data Stores + +The single biggest feature in this release is support for multi-process web servers. + +The way this was achieved was by introducing a standard interface for metric storage +backends and providing implementations for the most common use-cases. + +If you're using a multi-process web server, you'll want `DirectFileStore`, which +aggregates metrics across the processes. + +```ruby +Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: '/tmp/direct_file_store') +``` + +The default store is the `Synchronized` store, which provides a threadsafe implementation, +but one which doesn't work in multi-process scenarios. + +If you're absolutely sure that you won't use multiple threads or processes, you can use the +`SingleThreaded` data store and avoid the locking overhead. Note that in almost all use +cases the performance overhead won't matter, which is why we use the `Synchronized` store +by default. + +## Keyword arguments (kwargs) + +Many multi-parameter methods have had their arguments changed to keyword arguments for +improved clarity at the callsite. + +### 0.9 +```ruby +counter = Prometheus::Client::Counter.new(:service_requests_total, '...') +``` + +### 0.10 +```ruby +counter = Prometheus::Client::Counter.new(:service_requests_total, docstring: '...') +``` + +### Labels + +Labels must now be declared at metric initialization. Observing a value with a label that +wasn't passed in at initialization will raise an error. + +### 0.9 + +```ruby +counter = Prometheus::Client::Counter.new(:service_requests_total, '...') +counter.increment({ service: 'foo' }) +``` + +### 0.10 + +```ruby +counter = Prometheus::Client::Counter.new(:service_requests_total, docstring: '...', labels: [:service]) +counter.increment(labels: { service: 'foo' }) +``` + +## Histograms + +Keys in the hash returned from the get method are now strings. + +Histograms now include a "+Inf" bucket as well as the sum of all observations. + +### 0.9 + +```ruby +histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, '...', {}, [0.1, 0.3, 1.2]) + +histogram.observe({ service: 'users' }, 0.1) +histogram.observe({ service: 'users' }, 0.3) +histogram.observe({ service: 'users' }, 0.4) +histogram.observe({ service: 'users' }, 1.2) +histogram.observe({ service: 'users' }, 1.5) + +histogram.get({ service: 'users' }) +# => {0.1=>1.0, 0.3=>2.0, 1.2=>4.0} +``` +### 0.10 + +```ruby +histogram = Prometheus::Client::Histogram.new(:service_latency_seconds, docstring: '...', labels: [:service], buckets: [0.1, 0.3, 1.2]) + +histogram.observe(0.1, labels: { service: 'users' }) +histogram.observe(0.3, labels: { service: 'users' }) +histogram.observe(0.4, labels: { service: 'users' }) +histogram.observe(1.2, labels: { service: 'users' }) +histogram.observe(1.5, labels: { service: 'users' }) + +histogram.get(labels: { service: 'users' }) +# => {"0.1"=>0.0, "0.3"=>1.0, "1.2"=>3.0, "+Inf"=>5.0, "sum"=>3.5} +``` + +## Summaries + +Summaries no longer include quantiles. They include the sum and the count instead. + +### 0.9 + +```ruby +summary = Prometheus::Client::Histogram.new(:service_latency_seconds, '...', {}, [0.1, 0.3, 1.2]) + +summary.observe({ service: 'users' }, 0.1) +summary.observe({ service: 'users' }, 0.3) +summary.observe({ service: 'users' }, 0.4) +summary.observe({ service: 'users' }, 1.2) +summary.observe({ service: 'users' }, 1.5) + +summary.get({ service: 'users' }) +# => {0.1=>1.0, 0.3=>2.0, 1.2=>4.0} +``` +### 0.10 + +```ruby +summary = Prometheus::Client::Summary.new(:service_latency_seconds, docstring: '...', labels: [:service]) + +summary.observe(0.1, labels: { service: 'users' }) +summary.observe(0.3, labels: { service: 'users' }) +summary.observe(0.4, labels: { service: 'users' }) +summary.observe(1.2, labels: { service: 'users' }) +summary.observe(1.5, labels: { service: 'users' }) + +summary.get(labels: { service: 'users' }) +# => {"count"=>5.0, "sum"=>3.5} +``` + +## Rack middleware + +Because metric labels must be declared up front, we've removed support for customising the +labels set in the default Rack middleware we provide. + +We did make an attempt to preserve that ability, but decided that the interface was too +confusing and removed it in #121. We might revisit this and have another try at a better +interface in the future. + +## Extra reserved label: `pid` + +When adding support for multi-process web servers, we realised that aggregating gauges +reported by individual processes (e.g. by summing them) is almost never what you want to +do. + +We decided to expose each process's value individually, with a `pid` label set to +differentiate between the proesses. Because of that, `pid` is now a reserved label. diff --git a/examples/rack/README.md b/examples/rack/README.md index aecdfc6c..9c6188e6 100644 --- a/examples/rack/README.md +++ b/examples/rack/README.md @@ -23,7 +23,7 @@ output of `/metrics` and terminate. Start a Prometheus server with the provided config: ```bash -prometheus -config.file ./prometheus.yml +prometheus --config.file ./prometheus.yml ``` In another terminal, start the application server: @@ -33,8 +33,8 @@ bundle install bundle exec unicorn -c ./unicorn.conf ``` -You can now open the [example app](http://localhost:5000/) and its [metrics -page](http://localhost:5000/metrics) to inspect the output. The running +You can now open the [example app](http://localhost:5123/) and its [metrics +page](http://localhost:5123/metrics) to inspect the output. The running Prometheus server can be used to [play around with the metrics][rate-query]. [rate-query]: http://localhost:9090/graph#%5B%7B%22range_input%22%3A%221h%22%2C%22expr%22%3A%22rate(http_server_requests_total%5B1m%5D)%22%2C%22tab%22%3A0%7D%5D @@ -44,19 +44,52 @@ Prometheus server can be used to [play around with the metrics][rate-query]. The example shown in [`config.ru`](config.ru) is a trivial rack application using the default collector and exporter middlewares. -In order to use custom label builders in the collector, change the line to -something like this: +Currently, the collector middleware doesn't offer any flexibility around label +keys or values (see #111). If you have more sophisticated requirements, we +recommend creating your own collector middleware. + +If your requirements are minimal, one option is to subclass +`Prometheus::Middleware::Collector` and override the methods you need to. For +example, if you want to [change the way IDs are stripped from the +path](https://github.com/prometheus/client_ruby/blob/982fe2e3c37e2940d281573c7689224152dd791f/lib/prometheus/middleware/collector.rb#L97-L101) +you could override the appropriate method: + +```ruby +require 'prometheus/middleware/collector' + +class MyCollector < Prometheus::Middleware::Collector + def strip_ids_from_path(path) + super(path) + .gsub(/8675309/, ':jenny\\1') + end +end +``` + +and use your class in `config.ru` instead. + +If you want to completely customise how the `path` label is generated, you can +override `generate_path`. For example, to use +[Sinatra](https://github.com/sinatra/sinatra)'s framework-specific route info +from the request environment: ```ruby -use Prometheus::Middleware::Collector, counter_label_builder: ->(env, code) { - { - code: code, - method: env['REQUEST_METHOD'].downcase, - # Include the HTTP Host header as label. - host: env['HTTP_HOST'].to_s, - # Include path, but replace all numeric IDs to keep cardinality low. - # Think '/users/1234/comments' -> '/users/:id/comments' - path: env['PATH_INFO'].to_s.gsub(/\/\d+(\/|$)/, '/:id\\1'), - } -} +require 'prometheus/middleware/collector' + +class MyCollector < Prometheus::Middleware::Collector + def generate_path(env) + # `sinatra.route` contains both the request method and the route, separated + # by a space (e.g. "GET /payments/:id"). To get just the request path, you + # can partition the string on " ". + env['sinatra.route'].partition(' ').last + end +end ``` + +Just make sure that your custom path generation logic strips IDs from the path +it returns, or gets the path from a source that would never contain them in the +first place (such as `sinatra.route`), otherwise you'll generate a huge number +of label values! + +**Note:** `Prometheus::Middleware::Collector` isn't explicitly designed to be +subclassed, so the internals are liable to change at any time, including in +patch releases. Overriding its methods is done at your own risk! diff --git a/examples/rack/config.ru b/examples/rack/config.ru index 4b36b777..e545444f 100755 --- a/examples/rack/config.ru +++ b/examples/rack/config.ru @@ -11,9 +11,9 @@ srand app = lambda do |_| case rand when 0..0.8 - [200, { 'Content-Type' => 'text/html' }, ['OK']] + [200, { 'content-type' => 'text/html' }, ['OK']] when 0.8..0.95 - [404, { 'Content-Type' => 'text/html' }, ['Not Found']] + [404, { 'content-type' => 'text/html' }, ['Not Found']] else raise NoMethodError, 'It is a bug!' end diff --git a/examples/rack/prometheus.yml b/examples/rack/prometheus.yml index ab9c4477..2e57f88d 100644 --- a/examples/rack/prometheus.yml +++ b/examples/rack/prometheus.yml @@ -10,4 +10,4 @@ scrape_configs: - job_name: "rack-example" static_configs: - targets: - - "localhost:5000" + - "localhost:5123" diff --git a/examples/rack/run b/examples/rack/run index eaaa6929..bf89080f 100755 --- a/examples/rack/run +++ b/examples/rack/run @@ -20,11 +20,11 @@ if ! installed vegeta; then fatal "Could not find go. Either run the examples manually or install" fi - go get github.com/tsenart/vegeta - go install github.com/tsenart/vegeta + go get github.com/tsenart/vegeta # versions of Go < 1.18 + go install github.com/tsenart/vegeta@latest # versions of Go >= 1.18 fi -PORT=5000 +PORT=5123 URL=http://127.0.0.1:${PORT}/ log "starting example server" diff --git a/examples/rack/unicorn.conf b/examples/rack/unicorn.conf index 290ca789..f1ffcfdc 100644 --- a/examples/rack/unicorn.conf +++ b/examples/rack/unicorn.conf @@ -1,3 +1,3 @@ -listen 5000 +listen 5123 worker_processes 1 preload_app true diff --git a/lib/prometheus/client.rb b/lib/prometheus/client.rb index a7e9acaa..fe09d9ba 100644 --- a/lib/prometheus/client.rb +++ b/lib/prometheus/client.rb @@ -1,6 +1,7 @@ # encoding: UTF-8 require 'prometheus/client/registry' +require 'prometheus/client/config' module Prometheus # Client is a ruby implementation for a Prometheus compatible client. @@ -9,5 +10,9 @@ module Client def self.registry @registry ||= Registry.new end + + def self.config + @config ||= Config.new + end end end diff --git a/lib/prometheus/client/config.rb b/lib/prometheus/client/config.rb new file mode 100644 index 00000000..7f76c2a0 --- /dev/null +++ b/lib/prometheus/client/config.rb @@ -0,0 +1,15 @@ +# encoding: UTF-8 + +require 'prometheus/client/data_stores/synchronized' + +module Prometheus + module Client + class Config + attr_accessor :data_store + + def initialize + @data_store = Prometheus::Client::DataStores::Synchronized.new + end + end + end +end diff --git a/lib/prometheus/client/counter.rb b/lib/prometheus/client/counter.rb index d1d85b48..28ec2f1e 100644 --- a/lib/prometheus/client/counter.rb +++ b/lib/prometheus/client/counter.rb @@ -10,17 +10,11 @@ def type :counter end - def increment(labels = {}, by = 1) + def increment(by: 1, labels: {}) raise ArgumentError, 'increment must be a non-negative number' if by < 0 label_set = label_set_for(labels) - synchronize { @values[label_set] += by } - end - - private - - def default - 0.0 + @store.increment(labels: label_set, by: by) end end end diff --git a/lib/prometheus/client/data_stores/README.md b/lib/prometheus/client/data_stores/README.md new file mode 100644 index 00000000..3e396a77 --- /dev/null +++ b/lib/prometheus/client/data_stores/README.md @@ -0,0 +1,306 @@ +# Custom Data Stores + +Stores are basically an abstraction over a Hash, whose keys are in turn a Hash of labels +plus a metric name. The intention behind having different data stores is solving +different requirements for different production scenarios, or performance trade-offs. + +The most common of these scenarios are pre-fork servers like Unicorn, which have multiple +separate processes gathering metrics. If each of these had their own store, the metrics +reported on each Prometheus scrape would be different, depending on which process handles +the request. Solving this requires some sort of shared storage between these processes, +and there are many ways to solve this problem, each with their own trade-offs. + +This abstraction allows us to easily plug in the most adequate store for each scenario. + +## Interface + +`Store` exposes a `for_metric` method, which returns a store-specific and metric-specific +`MetricStore` object, which represents a "view" onto the actual internal storage for one +particular metric. Each metric / collector object will have a references to this +`MetricStore` and interact with it directly. + +The `MetricStore` class must expose `synchronize`, `set`, `increment`, `get` and `all_values` +methods, which are explained in the code sample below. Its initializer should be called +only by `Store#for_metric`, not directly. + +All values stored are `Float`s. + +Internally, a `Store` can store the data however it needs to, based on its requirements. +For example, a store that needs to work in a multi-process environment needs to have a +shared section of memory, via either Files, an MMap, an external database, or whatever the +implementor chooses for their particular use case. + +Each `Store` / `MetricStore` will also choose how to divide responsibilities over the +storage of values. For some use cases, each `MetricStore` may have their own individual +storage, whereas for others, the `Store` may own a central storage, and `MetricStore` +objects will access it through the `Store`. This depends on the design choices of each `Store`. + +`Store` and `MetricStore` MUST be thread safe. This applies not only to operations on +stored values (`set`, `increment`), but `MetricStore` must also expose a `synchronize` +method that would allow a Metric to increment multiple values atomically (Histograms need +this, for example). + +Ideally, multiple keys should be modifiable simultaneously, but this is not a +hard requirement. + +This is what the interface looks like, in practice: + +```ruby +module Prometheus + module Client + module DataStores + class CustomStore + + # Return a MetricStore, which provides a view of the internal data store, + # catering specifically to that metric. + # + # - `metric_settings` specifies configuration parameters for this metric + # specifically. These may or may not be necessary, depending on each specific + # store and metric. The most obvious example of this is for gauges in + # multi-process environments, where the developer needs to choose how those + # gauges will get aggregated between all the per-process values. + # + # The settings that the store will accept, and what it will do with them, are + # 100% Store-specific. Each store should document what settings it will accept + # and how to use them, so the developer using that store can pass the appropriate + # instantiating the Store itself, and the Metrics they declare. + # + # - `metric_type` is specified in case a store wants to validate that the settings + # are valid for the metric being set up. It may go unused by most Stores + # + # Even if your store doesn't need these two parameters, the Store must expose them + # to make them swappable. + def for_metric(metric_name, metric_type:, metric_settings: {}) + # Generally, here a Store would validate that the settings passed in are valid, + # and raise if they aren't. + validate_metric_settings(metric_type: metric_type, + metric_settings: metric_settings) + MetricStore.new(store: self, + metric_name: metric_name, + metric_type: metric_type, + metric_settings: metric_settings) + end + + + # MetricStore manages the data for one specific metric. It's generally a view onto + # the central store shared by all metrics, but it could also hold the data itself + # if that's better for the specific scenario + class MetricStore + # This constructor is internal to this store, so the signature doesn't need to + # be this. No one other than the Store should be creating MetricStores + def initialize(store:, metric_name:, metric_type:, metric_settings:) + end + + # Metrics may need to modify multiple values at once (Histograms do this, for + # example). MetricStore needs to provide a way to synchronize those, in addition + # to all of the value modifications being thread-safe without a need for simple + # Metrics to call `synchronize` + def synchronize + raise NotImplementedError + end + + # Store a value for this metric and a set of labels + # Internally, may add extra "labels" to disambiguate values between, + # for example, different processes + def set(labels:, val:) + raise NotImplementedError + end + + def increment(labels:, by: 1) + raise NotImplementedError + end + + # Return a value for a set of labels + # Will return the same value stored by `set`, as opposed to `all_values`, which + # may aggregate multiple values. + # + # For example, in a multi-process scenario, `set` may add an extra internal + # label tagging the value with the process id. `get` will return the value for + # "this" process ID. `all_values` will return an aggregated value for all + # process IDs. + def get(labels:) + raise NotImplementedError + end + + # Returns all the sets of labels seen by the Store, and the aggregated value for + # each. + # + # In some cases, this is just a matter of returning the stored value. + # + # In other cases, the store may need to aggregate multiple values for the same + # set of labels. For example, in a multiple process it may need to `sum` the + # values of counters from each process. Or for `gauges`, it may need to take the + # `max`. This is generally specified in `metric_settings` when calling + # `Store#for_metric`. + def all_values + raise NotImplementedError + end + end + end + end + end +end +``` + +## Conventions + +- Your store MAY require or accept extra settings for each metric on the call to `for_metric`. +- You SHOULD validate these parameters to make sure they are correct, and raise if they aren't. +- If your store needs to aggregate multiple values for the same metric (for example, in + a multi-process scenario), you MUST accept a setting to define how values are aggregated. + - This setting MUST be called `:aggregation` + - It MUST support, at least, `:sum`, `:max` and `:min`. + - It MAY support other aggregation modes that may apply to your requirements. + - It MUST default to `:sum` + +## Testing your Store + +In order to make it easier to test your store, the basic functionality is tested using +`shared_examples`: + +`it_behaves_like Prometheus::Client::DataStores` + +Follow the simple structure in `synchronized_spec.rb` for a starting point. + +Note that if your store stores data somewhere other than in-memory (in files, Redis, +databases, etc), you will need to do cleanup between tests in a `before` block. + +The tests for `DirectFileStore` have a good example at the top of the file. This file also +has some examples on testing multi-process stores, checking that aggregation between +processes works correctly. + +## Benchmarking your custom data store + +If you are developing your own data store, you probably want to benchmark it to see how +it compares to the built-in ones, and to make sure it achieves the performance you want. + +The Prometheus Ruby Client includes some benchmarks (in the `spec/benchmarks` directory) +to help you with this, and also with validating that your store works correctly. + +The `README` in that directory contains more information what these benchmarks are for, +and how to use them. + +## Extra Stores and Research + +In the process of abstracting stores away, and creating the built-in ones, GoCardless +has created a good amount of research, benchmarks, and experimental stores, which +weren't useful to include in this repo, but may be a useful resource or starting point +if you are building your own store. + +Check out the [GoCardless Data Stores Experiments](https://github.com/gocardless/prometheus-client-ruby-data-stores-experiments) +repository for these. + +## Sample, imaginary multi-process Data Store + +This is just an example of how one could implement a data store, and a clarification on +the "aggregation" point + +Important: This is a **toy example**, intended simply to show how this could work / how to +implement these interfaces. + +There are some key pieces of code missing, which are fairly uninteresting, this only shows +the parts that illustrate the idea of storing multiple different values, and aggregating +them + +```ruby +module Prometheus + module Client + module DataStores + # Stores all the data in a magic data structure that keeps cross-process data, in a + # way that all processes can read it, but each can write only to their own set of + # keys. + # It doesn't care how that works, this is not an actual solution to anything, + # just an example of how the interface would work with something like that. + # + # Metric Settings have one possible key, `aggregation`, which must be one of + # `AGGREGATION_MODES` + class SampleMagicMultiprocessStore + AGGREGATION_MODES = [MAX = :max, MIN = :min, SUM = :sum] + DEFAULT_METRIC_SETTINGS = { aggregation: SUM } + + def initialize + @internal_store = MagicHashSharedBetweenProcesses.new # PStore, for example + end + + def for_metric(metric_name, metric_type:, metric_settings: {}) + settings = DEFAULT_METRIC_SETTINGS.merge(metric_settings) + validate_metric_settings(metric_settings: settings) + MetricStore.new(store: self, + metric_name: metric_name, + metric_type: metric_type, + metric_settings: settings) + end + + private + + def validate_metric_settings(metric_settings:) + raise unless metric_settings.has_key?(:aggregation) + raise unless metric_settings[:aggregation].in?(AGGREGATION_MODES) + end + + class MetricStore + def initialize(store:, metric_name:, metric_type:, metric_settings:) + @store = store + @internal_store = store.internal_store + @metric_name = metric_name + @aggregation_mode = metric_settings[:aggregation] + end + + def set(labels:, val:) + @internal_store[store_key(labels)] = val.to_f + end + + def get(labels:) + @internal_store[store_key(labels)] + end + + def all_values + non_aggregated_values = all_store_values.each_with_object({}) do |(labels, v), acc| + if labels["__metric_name"] == @metric_name + label_set = labels.reject { |k,_| k.in?("__metric_name", "__pid") } + acc[label_set] ||= [] + acc[label_set] << v + end + end + + # Aggregate all the different values for each label_set + non_aggregated_values.each_with_object({}) do |(label_set, values), acc| + acc[label_set] = aggregate(values) + end + end + + private + + def all_store_values + # This assumes there's a something common that all processes can write to, and + # it's magically synchronized (which is not true of a PStore, for example, but + # would of some sort of external data store like Redis, Memcached, SQLite) + + # This could also have some sort of: + # file_list = Dir.glob(File.join(path, '*.db')).sort + # which reads all the PStore files / MMapped files, etc, and returns a hash + # with all of them together, which then `values` and `label_sets` can use + end + + # This method holds most of the key to how this Store works. Adding `_pid` as + # one of the labels, we hold each process's value separately, which we can + # aggregate later + def store_key(labels) + labels.merge( + { + "__metric_name" => @metric_name, + "__pid" => Process.pid + } + ) + end + + def aggregate(values) + # This is a horrible way to do this, just illustrating the point + values.send(@aggregation_mode) + end + end + end + end + end +end +``` diff --git a/lib/prometheus/client/data_stores/direct_file_store.rb b/lib/prometheus/client/data_stores/direct_file_store.rb new file mode 100644 index 00000000..1c09dc4d --- /dev/null +++ b/lib/prometheus/client/data_stores/direct_file_store.rb @@ -0,0 +1,368 @@ +require 'fileutils' +require "cgi" + +module Prometheus + module Client + module DataStores + # Stores data in binary files, one file per process and per metric. + # This is generally the recommended store to use to deal with pre-fork servers and + # other "multi-process" scenarios. + # + # Each process will get a file for a metric, and it will manage its contents by + # storing keys next to binary-encoded Floats, and keeping track of the offsets of + # those Floats, to be able to update them directly as they increase. + # + # When exporting metrics, the process that gets scraped by Prometheus will find + # all the files that apply to a metric, read their contents, and aggregate them + # (generally that means SUMming the values for each labelset). + # + # In order to do this, each Metric needs an `:aggregation` setting, specifying how + # to aggregate the multiple possible values we can get for each labelset. By default, + # Counters, Histograms and Summaries get `SUM`med, and Gauges will report `ALL` + # values, tagging each one with a `pid` label. + # For Gauges, it's also possible to set `SUM`, MAX` or `MIN` as aggregation, to get + # the highest / lowest value / or the sum of all the processes / threads. + # + # Before using this Store, please read the "`DirectFileStore` caveats and things to + # keep in mind" section of the main README in this repository. It includes a number + # of important things to keep in mind. + + class DirectFileStore + class InvalidStoreSettingsError < StandardError; end + AGGREGATION_MODES = [MAX = :max, MIN = :min, SUM = :sum, ALL = :all, MOST_RECENT = :most_recent] + DEFAULT_METRIC_SETTINGS = { aggregation: SUM } + DEFAULT_GAUGE_SETTINGS = { aggregation: ALL } + + def initialize(dir:) + @store_settings = { dir: dir } + FileUtils.mkdir_p(dir) + end + + def for_metric(metric_name, metric_type:, metric_settings: {}) + default_settings = DEFAULT_METRIC_SETTINGS + if metric_type == :gauge + default_settings = DEFAULT_GAUGE_SETTINGS + end + + settings = default_settings.merge(metric_settings) + validate_metric_settings(metric_type, settings) + + MetricStore.new(metric_name: metric_name, + store_settings: @store_settings, + metric_settings: settings) + end + + private + + def validate_metric_settings(metric_type, metric_settings) + unless metric_settings.has_key?(:aggregation) && + AGGREGATION_MODES.include?(metric_settings[:aggregation]) + raise InvalidStoreSettingsError, + "Metrics need a valid :aggregation key" + end + + unless (metric_settings.keys - [:aggregation]).empty? + raise InvalidStoreSettingsError, + "Only :aggregation setting can be specified" + end + + if metric_settings[:aggregation] == MOST_RECENT && metric_type != :gauge + raise InvalidStoreSettingsError, + "Only :gauge metrics support :most_recent aggregation" + end + end + + class MetricStore + attr_reader :metric_name, :store_settings + + def initialize(metric_name:, store_settings:, metric_settings:) + @metric_name = metric_name + @store_settings = store_settings + @values_aggregation_mode = metric_settings[:aggregation] + @store_opened_by_pid = nil + + @lock = Monitor.new + end + + # Synchronize is used to do a multi-process Mutex, when incrementing multiple + # values at once, so that the other process, reading the file for export, doesn't + # get incomplete increments. + # + # `in_process_sync`, instead, is just used so that two threads don't increment + # the same value and get a context switch between read and write leading to an + # inconsistency + def synchronize + in_process_sync do + internal_store.with_file_lock do + yield + end + end + end + + def set(labels:, val:) + in_process_sync do + internal_store.write_value(store_key(labels), val.to_f) + end + end + + def increment(labels:, by: 1) + if @values_aggregation_mode == DirectFileStore::MOST_RECENT + raise InvalidStoreSettingsError, + "The :most_recent aggregation does not support the use of increment"\ + "/decrement" + end + + key = store_key(labels) + in_process_sync do + internal_store.increment_value(key, by.to_f) + end + end + + def get(labels:) + in_process_sync do + internal_store.read_value(store_key(labels)) + end + end + + def all_values + stores_data = Hash.new{ |hash, key| hash[key] = [] } + + # There's no need to call `synchronize` here. We're opening a second handle to + # the file, and `flock`ing it, which prevents inconsistent reads + stores_for_metric.each do |file_path| + begin + store = FileMappedDict.new(file_path, true) + store.all_values.each do |(labelset_qs, v, ts)| + # Labels come as a query string, and CGI::parse returns arrays for each key + # "foo=bar&x=y" => { "foo" => ["bar"], "x" => ["y"] } + # Turn the keys back into symbols, and remove the arrays + label_set = CGI::parse(labelset_qs).map do |k, vs| + [k.to_sym, vs.first] + end.to_h + + stores_data[label_set] << [v, ts] + end + ensure + store.close if store + end + end + + # Aggregate all the different values for each label_set + aggregate_hash = Hash.new { |hash, key| hash[key] = 0.0 } + stores_data.each_with_object(aggregate_hash) do |(label_set, values), acc| + acc[label_set] = aggregate_values(values) + end + end + + private + + def in_process_sync + @lock.synchronize { yield } + end + + def store_key(labels) + if @values_aggregation_mode == ALL + labels[:pid] = process_id + end + + labels.to_a.sort.map{|k,v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}"}.join('&') + end + + def internal_store + if @store_opened_by_pid != process_id + @store_opened_by_pid = process_id + @internal_store = FileMappedDict.new(filemap_filename) + else + @internal_store + end + end + + # Filename for this metric's PStore (one per process) + def filemap_filename + filename = "metric_#{ metric_name }___#{ process_id }.bin" + File.join(@store_settings[:dir], filename) + end + + def stores_for_metric + Dir.glob(File.join(@store_settings[:dir], "metric_#{ metric_name }___*")) + end + + def process_id + Process.pid + end + + def aggregate_values(values) + # Each entry in the `values` array is a tuple of `value` and `timestamp`, + # so for all aggregations except `MOST_RECENT`, we need to only take the + # first value in each entry and ignore the second. + if @values_aggregation_mode == MOST_RECENT + latest_tuple = values.max { |a,b| a[1] <=> b[1] } + latest_tuple.first # return the value without the timestamp + else + values = values.map(&:first) # Discard timestamps + + if @values_aggregation_mode == SUM + values.inject { |sum, element| sum + element } + elsif @values_aggregation_mode == MAX + values.max + elsif @values_aggregation_mode == MIN + values.min + elsif @values_aggregation_mode == ALL + values.first + else + raise InvalidStoreSettingsError, + "Invalid Aggregation Mode: #{ @values_aggregation_mode }" + end + end + end + end + + private_constant :MetricStore + + # A dict of doubles, backed by an file we access directly as a byte array. + # + # The file starts with a 4 byte int, indicating how much of it is used. + # Then 4 bytes of padding. + # There's then a number of entries, consisting of a 4 byte int which is the + # size of the next field, a utf-8 encoded string key, padding to an 8 byte + # alignment, and then a 8 byte float which is the value, and then a 8 byte + # float which is the unix timestamp when the value was set. + class FileMappedDict + INITIAL_FILE_SIZE = 1024*1024 + + attr_reader :capacity, :used, :positions + + def initialize(filename, readonly = false) + @positions = {} + @used = 0 + + open_file(filename, readonly) + @used = @f.read(4).unpack('l')[0] if @capacity > 0 + + if @used > 0 + # File already has data. Read the existing values + with_file_lock { populate_positions } + else + # File is empty. Init the `used` counter, if we're in write mode + if !readonly + @used = 8 + @f.seek(0) + @f.write([@used].pack('l')) + end + end + end + + # Return a list of key-value pairs + def all_values + with_file_lock do + @positions.map do |key, pos| + @f.seek(pos) + value, timestamp = @f.read(16).unpack('dd') + [key, value, timestamp] + end + end + end + + def read_value(key) + if !@positions.has_key?(key) + init_value(key) + end + + pos = @positions[key] + @f.seek(pos) + @f.read(8).unpack('d')[0] + end + + def write_value(key, value) + if !@positions.has_key?(key) + init_value(key) + end + + now = Process.clock_gettime(Process::CLOCK_MONOTONIC) + pos = @positions[key] + @f.seek(pos) + @f.write([value, now].pack('dd')) + @f.flush + end + + def increment_value(key, by) + if !@positions.has_key?(key) + init_value(key) + end + + pos = @positions[key] + @f.seek(pos) + value = @f.read(8).unpack('d')[0] + + now = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @f.seek(-8, :CUR) + @f.write([value + by, now].pack('dd')) + @f.flush + end + + def close + @f.close + end + + def with_file_lock + @f.flock(File::LOCK_EX) + yield + ensure + @f.flock(File::LOCK_UN) + end + + private + + def open_file(filename, readonly) + mode = if readonly + "r" + elsif File.exist?(filename) + "r+b" + else + "w+b" + end + + @f = File.open(filename, mode) + if @f.size == 0 && !readonly + resize_file(INITIAL_FILE_SIZE) + end + @capacity = @f.size + end + + def resize_file(new_capacity) + @f.truncate(new_capacity) + end + + # Initialize a value. Lock must be held by caller. + def init_value(key) + # Pad to be 8-byte aligned. + padded = key + (' ' * (8 - (key.length + 4) % 8)) + value = [padded.length, padded, 0.0, 0.0].pack("lA#{padded.length}dd") + while @used + value.length > @capacity + @capacity *= 2 + resize_file(@capacity) + end + @f.seek(@used) + @f.write(value) + @used += value.length + @f.seek(0) + @f.write([@used].pack('l')) + @f.flush + @positions[key] = @used - 16 + end + + # Read position of all keys. No locking is performed. + def populate_positions + @f.seek(8) + while @f.pos < @used + padded_len = @f.read(4).unpack('l')[0] + key = @f.read(padded_len).unpack("A#{padded_len}")[0].strip + @positions[key] = @f.pos + @f.seek(16, :CUR) + end + end + end + end + end + end +end diff --git a/lib/prometheus/client/data_stores/single_threaded.rb b/lib/prometheus/client/data_stores/single_threaded.rb new file mode 100644 index 00000000..f05cf813 --- /dev/null +++ b/lib/prometheus/client/data_stores/single_threaded.rb @@ -0,0 +1,56 @@ +module Prometheus + module Client + module DataStores + # Stores all the data in a simple Hash for each Metric + # + # Has *no* synchronization primitives, making it the fastest store for single-threaded + # scenarios, but must absolutely not be used in multi-threaded scenarios. + class SingleThreaded + class InvalidStoreSettingsError < StandardError; end + + def for_metric(metric_name, metric_type:, metric_settings: {}) + # We don't need `metric_type` or `metric_settings` for this particular store + validate_metric_settings(metric_settings: metric_settings) + MetricStore.new + end + + private + + def validate_metric_settings(metric_settings:) + unless metric_settings.empty? + raise InvalidStoreSettingsError, + "SingleThreaded doesn't allow any metric_settings" + end + end + + class MetricStore + def initialize + @internal_store = Hash.new { |hash, key| hash[key] = 0.0 } + end + + def synchronize + yield + end + + def set(labels:, val:) + @internal_store[labels] = val.to_f + end + + def increment(labels:, by: 1) + @internal_store[labels] += by + end + + def get(labels:) + @internal_store[labels] + end + + def all_values + @internal_store.dup + end + end + + private_constant :MetricStore + end + end + end +end diff --git a/lib/prometheus/client/data_stores/synchronized.rb b/lib/prometheus/client/data_stores/synchronized.rb new file mode 100644 index 00000000..d0a74608 --- /dev/null +++ b/lib/prometheus/client/data_stores/synchronized.rb @@ -0,0 +1,62 @@ +module Prometheus + module Client + module DataStores + # Stores all the data in simple hashes, one per metric. Each of these metrics + # synchronizes access to their hash, but multiple metrics can run observations + # concurrently. + class Synchronized + class InvalidStoreSettingsError < StandardError; end + + def for_metric(metric_name, metric_type:, metric_settings: {}) + # We don't need `metric_type` or `metric_settings` for this particular store + validate_metric_settings(metric_settings: metric_settings) + MetricStore.new + end + + private + + def validate_metric_settings(metric_settings:) + unless metric_settings.empty? + raise InvalidStoreSettingsError, + "Synchronized doesn't allow any metric_settings" + end + end + + class MetricStore + def initialize + @internal_store = Hash.new { |hash, key| hash[key] = 0.0 } + @lock = Monitor.new + end + + def synchronize + @lock.synchronize { yield } + end + + def set(labels:, val:) + synchronize do + @internal_store[labels] = val.to_f + end + end + + def increment(labels:, by: 1) + synchronize do + @internal_store[labels] += by + end + end + + def get(labels:) + synchronize do + @internal_store[labels] + end + end + + def all_values + synchronize { @internal_store.dup } + end + end + + private_constant :MetricStore + end + end + end +end diff --git a/lib/prometheus/client/formats/text.rb b/lib/prometheus/client/formats/text.rb index 040435fa..b735389c 100644 --- a/lib/prometheus/client/formats/text.rb +++ b/lib/prometheus/client/formats/text.rb @@ -40,37 +40,31 @@ class << self private def representation(metric, label_set, value, &block) - set = metric.base_labels.merge(label_set) - if metric.type == :summary - summary(metric.name, set, value, &block) + summary(metric.name, label_set, value, &block) elsif metric.type == :histogram - histogram(metric.name, set, value, &block) + histogram(metric.name, label_set, value, &block) else - yield metric(metric.name, labels(set), value) + yield metric(metric.name, labels(label_set), value) end end def summary(name, set, value) - value.each do |q, v| - yield metric(name, labels(set.merge(quantile: q)), v) - end - l = labels(set) - yield metric("#{name}_sum", l, value.sum) - yield metric("#{name}_count", l, value.total) + yield metric("#{name}_sum", l, value["sum"]) + yield metric("#{name}_count", l, value["count"]) end def histogram(name, set, value) bucket = "#{name}_bucket" value.each do |q, v| + next if q == "sum" yield metric(bucket, labels(set.merge(le: q)), v) end - yield metric(bucket, labels(set.merge(le: '+Inf')), value.total) l = labels(set) - yield metric("#{name}_sum", l, value.sum) - yield metric("#{name}_count", l, value.total) + yield metric("#{name}_sum", l, value["sum"]) + yield metric("#{name}_count", l, value["+Inf"]) end def metric(name, labels, value) diff --git a/lib/prometheus/client/gauge.rb b/lib/prometheus/client/gauge.rb index ee4c0c50..fbcfdd4a 100644 --- a/lib/prometheus/client/gauge.rb +++ b/lib/prometheus/client/gauge.rb @@ -12,32 +12,30 @@ def type end # Sets the value for the given label set - def set(labels, value) + def set(value, labels: {}) unless value.is_a?(Numeric) raise ArgumentError, 'value must be a number' end - @values[label_set_for(labels)] = value.to_f + @store.set(labels: label_set_for(labels), val: value) + end + + def set_to_current_time(labels: {}) + @store.set(labels: label_set_for(labels), val: Time.now.to_f) end # Increments Gauge value by 1 or adds the given value to the Gauge. # (The value can be negative, resulting in a decrease of the Gauge.) - def increment(labels = {}, by = 1) + def increment(by: 1, labels: {}) label_set = label_set_for(labels) - synchronize do - @values[label_set] ||= 0 - @values[label_set] += by - end + @store.increment(labels: label_set, by: by) end # Decrements Gauge value by 1 or subtracts the given value from the Gauge. # (The value can be negative, resulting in a increase of the Gauge.) - def decrement(labels = {}, by = 1) + def decrement(by: 1, labels: {}) label_set = label_set_for(labels) - synchronize do - @values[label_set] ||= 0 - @values[label_set] -= by - end + @store.increment(labels: label_set, by: -by) end end end diff --git a/lib/prometheus/client/histogram.rb b/lib/prometheus/client/histogram.rb index 23c5899f..8b02f007 100644 --- a/lib/prometheus/client/histogram.rb +++ b/lib/prometheus/client/histogram.rb @@ -6,63 +6,141 @@ module Prometheus module Client # A histogram samples observations (usually things like request durations # or response sizes) and counts them in configurable buckets. It also - # provides a sum of all observed values. + # provides a total count and sum of all observed values. class Histogram < Metric - # Value represents the state of a Histogram at a given point. - class Value < Hash - attr_accessor :sum, :total - - def initialize(buckets) - @sum = 0.0 - @total = 0.0 - - buckets.each do |bucket| - self[bucket] = 0.0 - end - end - - def observe(value) - @sum += value - @total += 1 - - each_key do |bucket| - self[bucket] += 1 if value <= bucket - end - end - end - # DEFAULT_BUCKETS are the default Histogram buckets. The default buckets # are tailored to broadly measure the response time (in seconds) of a # network service. (From DefBuckets client_golang) DEFAULT_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10].freeze + attr_reader :buckets + # Offer a way to manually specify buckets - def initialize(name, docstring, base_labels = {}, - buckets = DEFAULT_BUCKETS) - raise ArgumentError, 'Unsorted buckets, typo?' unless sorted? buckets + def initialize(name, + docstring:, + labels: [], + preset_labels: {}, + buckets: DEFAULT_BUCKETS, + store_settings: {}) + raise ArgumentError, 'Unsorted buckets, typo?' unless sorted?(buckets) @buckets = buckets - super(name, docstring, base_labels) + super(name, + docstring: docstring, + labels: labels, + preset_labels: preset_labels, + store_settings: store_settings) + end + + def self.linear_buckets(start:, width:, count:) + count.times.map { |idx| start.to_f + idx * width } + end + + def self.exponential_buckets(start:, factor: 2, count:) + count.times.map { |idx| start.to_f * factor ** idx } + end + + def with_labels(labels) + new_metric = self.class.new(name, + docstring: docstring, + labels: @labels, + preset_labels: preset_labels.merge(labels), + buckets: @buckets, + store_settings: @store_settings) + + # The new metric needs to use the same store as the "main" declared one, otherwise + # any observations on that copy with the pre-set labels won't actually be exported. + new_metric.replace_internal_store(@store) + + new_metric end def type :histogram end - def observe(labels, value) - if labels[:le] - raise ArgumentError, 'Label with name "le" is not permitted' + # Records a given value. The recorded value is usually positive + # or zero. A negative value is accepted but prevents current + # versions of Prometheus from properly detecting counter resets + # in the sum of observations. See + # https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations + # for details. + def observe(value, labels: {}) + bucket = buckets.bsearch { |upper_limit| upper_limit >= value } + bucket = "+Inf" if bucket.nil? + + base_label_set = label_set_for(labels) + + # This is basically faster than doing `.merge` + bucket_label_set = base_label_set.dup + bucket_label_set[:le] = bucket.to_s + sum_label_set = base_label_set.dup + sum_label_set[:le] = "sum" + + @store.synchronize do + @store.increment(labels: bucket_label_set, by: 1) + @store.increment(labels: sum_label_set, by: value) + end + end + + # Returns a hash with all the buckets plus +Inf (count) plus Sum for the given label set + def get(labels: {}) + base_label_set = label_set_for(labels) + + all_buckets = buckets + ["+Inf", "sum"] + + @store.synchronize do + all_buckets.each_with_object({}) do |upper_limit, acc| + acc[upper_limit.to_s] = @store.get(labels: base_label_set.merge(le: upper_limit.to_s)) + end.tap do |acc| + accumulate_buckets(acc) + end + end + end + + # Returns all label sets with their values expressed as hashes with their buckets + def values + values = @store.all_values + + result = values.each_with_object({}) do |(label_set, v), acc| + actual_label_set = label_set.reject{|l| l == :le } + acc[actual_label_set] ||= @buckets.map{|b| [b.to_s, 0.0]}.to_h + acc[actual_label_set][label_set[:le].to_s] = v end - label_set = label_set_for(labels) - synchronize { @values[label_set].observe(value) } + result.each do |(_label_set, v)| + accumulate_buckets(v) + end + end + + def init_label_set(labels) + base_label_set = label_set_for(labels) + + @store.synchronize do + (buckets + ["+Inf", "sum"]).each do |bucket| + @store.set(labels: base_label_set.merge(le: bucket.to_s), val: 0) + end + end end private - def default - Value.new(@buckets) + # Modifies the passed in parameter + def accumulate_buckets(h) + bucket_acc = 0 + buckets.each do |upper_limit| + bucket_value = h[upper_limit.to_s] + h[upper_limit.to_s] += bucket_acc + bucket_acc += bucket_value + end + + inf_value = h["+Inf"] || 0.0 + h["+Inf"] = inf_value + bucket_acc + end + + def reserved_labels + [:le] end def sorted?(bucket) diff --git a/lib/prometheus/client/label_set_validator.rb b/lib/prometheus/client/label_set_validator.rb index b5fb64c4..e67dde54 100644 --- a/lib/prometheus/client/label_set_validator.rb +++ b/lib/prometheus/client/label_set_validator.rb @@ -5,19 +5,22 @@ module Client # LabelSetValidator ensures that all used label sets comply with the # Prometheus specification. class LabelSetValidator - # TODO: we might allow setting :instance in the future - RESERVED_LABELS = [:job, :instance].freeze + BASE_RESERVED_LABELS = [:pid].freeze + LABEL_NAME_REGEX = /\A[a-zA-Z_][a-zA-Z0-9_]*\Z/ class LabelSetError < StandardError; end class InvalidLabelSetError < LabelSetError; end class InvalidLabelError < LabelSetError; end class ReservedLabelError < LabelSetError; end - def initialize - @validated = {} + attr_reader :expected_labels, :reserved_labels + + def initialize(expected_labels:, reserved_labels: []) + @expected_labels = expected_labels.sort + @reserved_labels = BASE_RESERVED_LABELS + reserved_labels end - def valid?(labels) + def validate_symbols!(labels) unless labels.respond_to?(:all?) raise InvalidLabelSetError, "#{labels} is not a valid label set" end @@ -29,22 +32,24 @@ def valid?(labels) end end - def validate(labels) - return labels if @validated.key?(labels.hash) - - valid?(labels) - - unless @validated.empty? || match?(labels, @validated.first.last) - raise InvalidLabelSetError, 'labels must have the same signature' + def validate_labelset!(labelset) + begin + return labelset if keys_match?(labelset) + rescue ArgumentError + # If labelset contains keys that are a mixture of strings and symbols, this will + # raise when trying to sort them, but the error should be the same: + # InvalidLabelSetError end - @validated[labels.hash] = labels + raise InvalidLabelSetError, "labels must have the same signature " \ + "(keys given: #{labelset.keys} vs." \ + " keys expected: #{expected_labels}" end private - def match?(a, b) - a.keys.sort == b.keys.sort + def keys_match?(labelset) + labelset.keys.sort == expected_labels end def validate_symbol(key) @@ -54,13 +59,19 @@ def validate_symbol(key) end def validate_name(key) - return true unless key.to_s.start_with?('__') + if key.to_s.start_with?('__') + raise ReservedLabelError, "label #{key} must not start with __" + end + + unless key.to_s =~ LABEL_NAME_REGEX + raise InvalidLabelError, "label name must match /#{LABEL_NAME_REGEX}/" + end - raise ReservedLabelError, "label #{key} must not start with __" + true end def validate_reserved_key(key) - return true unless RESERVED_LABELS.include?(key) + return true unless reserved_labels.include?(key) raise ReservedLabelError, "#{key} is reserved" end diff --git a/lib/prometheus/client/metric.rb b/lib/prometheus/client/metric.rb index 7be679df..2094ed53 100644 --- a/lib/prometheus/client/metric.rb +++ b/lib/prometheus/client/metric.rb @@ -7,42 +7,83 @@ module Prometheus module Client # Metric class Metric - attr_reader :name, :docstring, :base_labels + attr_reader :name, :docstring, :labels, :preset_labels - def initialize(name, docstring, base_labels = {}) - @mutex = Mutex.new - @validator = LabelSetValidator.new - @values = Hash.new { |hash, key| hash[key] = default } + def initialize(name, + docstring:, + labels: [], + preset_labels: {}, + store_settings: {}) validate_name(name) validate_docstring(docstring) - @validator.valid?(base_labels) + @validator = LabelSetValidator.new(expected_labels: labels, + reserved_labels: reserved_labels) + @validator.validate_symbols!(labels) + @validator.validate_symbols!(preset_labels) + + @labels = labels + @store_settings = store_settings @name = name @docstring = docstring - @base_labels = base_labels + @preset_labels = stringify_values(preset_labels) + + @all_labels_preset = false + if preset_labels.keys.length == labels.length + @validator.validate_labelset!(preset_labels) + @all_labels_preset = true + end + + @store = Prometheus::Client.config.data_store.for_metric( + name, + metric_type: type, + metric_settings: store_settings + ) + + # WARNING: Our internal store can be replaced later by `with_labels` + # Everything we do after this point needs to still work if @store gets replaced + init_label_set({}) if labels.empty? + end + + protected def replace_internal_store(new_store) + @store = new_store end + # Returns the value for the given label set - def get(labels = {}) - @validator.valid?(labels) + def get(labels: {}) + label_set = label_set_for(labels) + @store.get(labels: label_set) + end + + def with_labels(labels) + new_metric = self.class.new(name, + docstring: docstring, + labels: @labels, + preset_labels: preset_labels.merge(labels), + store_settings: @store_settings) - @values[labels] + # The new metric needs to use the same store as the "main" declared one, otherwise + # any observations on that copy with the pre-set labels won't actually be exported. + new_metric.replace_internal_store(@store) + + new_metric + end + + def init_label_set(labels) + @store.set(labels: label_set_for(labels), val: 0) end # Returns all label sets with their values def values - synchronize do - @values.each_with_object({}) do |(labels, value), memo| - memo[labels] = value - end - end + @store.all_values end private - def default - nil + def reserved_labels + [] end def validate_name(name) @@ -62,11 +103,17 @@ def validate_docstring(docstring) end def label_set_for(labels) - @validator.validate(labels) + # We've already validated, and there's nothing to merge. Save some cycles + return preset_labels if @all_labels_preset && labels.empty? + labels = stringify_values(labels) + @validator.validate_labelset!(preset_labels.merge(labels)) end - def synchronize - @mutex.synchronize { yield } + def stringify_values(labels) + stringified = {} + labels.each { |k,v| stringified[k] = v.to_s } + + stringified end end end diff --git a/lib/prometheus/client/push.rb b/lib/prometheus/client/push.rb index b9990efe..ca556ed1 100644 --- a/lib/prometheus/client/push.rb +++ b/lib/prometheus/client/push.rb @@ -1,10 +1,15 @@ # encoding: UTF-8 +require 'base64' +require 'thread' require 'net/http' require 'uri' +require 'erb' +require 'set' require 'prometheus/client' require 'prometheus/client/formats/text' +require 'prometheus/client/label_set_validator' module Prometheus # Client is a ruby implementation for a Prometheus compatible client. @@ -12,34 +17,59 @@ module Client # Push implements a simple way to transmit a given registry to a given # Pushgateway. class Push + class HttpError < StandardError; end + class HttpRedirectError < HttpError; end + class HttpClientError < HttpError; end + class HttpServerError < HttpError; end + DEFAULT_GATEWAY = 'http://localhost:9091'.freeze - PATH = '/metrics/jobs/%s'.freeze - INSTANCE_PATH = '/metrics/jobs/%s/instances/%s'.freeze - HEADER = { 'Content-Type' => Formats::Text::CONTENT_TYPE }.freeze + PATH = '/metrics'.freeze SUPPORTED_SCHEMES = %w(http https).freeze - attr_reader :job, :instance, :gateway, :path + attr_reader :job, :gateway, :path + + def initialize(job:, gateway: DEFAULT_GATEWAY, grouping_key: {}, **kwargs) + raise ArgumentError, "job cannot be nil" if job.nil? + raise ArgumentError, "job cannot be empty" if job.empty? + @validator = LabelSetValidator.new(expected_labels: grouping_key.keys) + @validator.validate_symbols!(grouping_key) - def initialize(job, instance = nil, gateway = nil) + @mutex = Mutex.new @job = job - @instance = instance @gateway = gateway || DEFAULT_GATEWAY - @uri = parse(@gateway) - @path = build_path(job, instance) + @grouping_key = grouping_key + @path = build_path(job, grouping_key) + + @uri = parse("#{@gateway}#{@path}") + validate_no_basic_auth!(@uri) + @http = Net::HTTP.new(@uri.host, @uri.port) - @http.use_ssl = @uri.scheme == 'https' + @http.use_ssl = (@uri.scheme == 'https') + @http.open_timeout = kwargs[:open_timeout] if kwargs[:open_timeout] + @http.read_timeout = kwargs[:read_timeout] if kwargs[:read_timeout] + end + + def basic_auth(user, password) + @user = user + @password = password end def add(registry) - request('POST', registry) + synchronize do + request(Net::HTTP::Post, registry) + end end def replace(registry) - request('PUT', registry) + synchronize do + request(Net::HTTP::Put, registry) + end end def delete - @http.send_request('DELETE', path) + synchronize do + request(Net::HTTP::Delete) + end end private @@ -56,18 +86,128 @@ def parse(url) raise ArgumentError, "#{url} is not a valid URL: #{e}" end - def build_path(job, instance) - if instance - format(INSTANCE_PATH, URI.escape(job), URI.escape(instance)) + def build_path(job, grouping_key) + job = job.to_s + + # Job can't be empty, but it can contain `/`, so we need to base64 + # encode it in that case + if job.include?('/') + encoded_job = Base64.urlsafe_encode64(job) + path = "#{PATH}/job@base64/#{encoded_job}" else - format(PATH, URI.escape(job)) + path = "#{PATH}/job/#{ERB::Util::url_encode(job)}" end + + grouping_key.each do |label, value| + value = value.to_s + + if value.include?('/') + encoded_value = Base64.urlsafe_encode64(value) + path += "/#{label}@base64/#{encoded_value}" + # While it's valid for the urlsafe_encode64 function to return an + # empty string when the input string is empty, it doesn't work for + # our specific use case as we're putting the result into a URL path + # segment. A double slash (`//`) can be normalised away by HTTP + # libraries, proxies, and web servers. + # + # For empty strings, we use a single padding character (`=`) as the + # value. + # + # See the pushgateway docs for more details: + # + # https://github.com/prometheus/pushgateway/blob/6393a901f56d4dda62cd0f6ab1f1f07c495b6354/README.md#url + elsif value.empty? + path += "/#{label}@base64/=" + else + path += "/#{label}/#{ERB::Util::url_encode(value)}" + end + end + + path end - def request(method, registry) - data = Formats::Text.marshal(registry) + def request(req_class, registry = nil) + validate_no_label_clashes!(registry) if registry + + req = req_class.new(@uri) + req.content_type = Formats::Text::CONTENT_TYPE + req.basic_auth(@user, @password) if @user + req.body = Formats::Text.marshal(registry) if registry + + response = @http.request(req) + validate_response!(response) + + response + end - @http.send_request(method, path, data, HEADER) + def synchronize + @mutex.synchronize { yield } + end + + def validate_no_basic_auth!(uri) + if uri.user || uri.password + raise ArgumentError, <<~EOF + Setting Basic Auth credentials in the gateway URL is not supported, please call the `basic_auth` method. + + Received username `#{uri.user}` in gateway URL. Instead of passing + Basic Auth credentials like this: + + ``` + push = Prometheus::Client::Push.new(job: "my-job", gateway: "http://user:password@localhost:9091") + ``` + + please pass them like this: + + ``` + push = Prometheus::Client::Push.new(job: "my-job", gateway: "http://localhost:9091") + push.basic_auth("user", "password") + ``` + + While URLs do support passing Basic Auth credentials using the + `http://user:password@example.com/` syntax, the username and + password in that syntax have to follow the usual rules for URL + encoding of characters per RFC 3986 + (https://datatracker.ietf.org/doc/html/rfc3986#section-2.1). + + Rather than place the burden of correctly performing that encoding + on users of this gem, we decided to have a separate method for + supplying Basic Auth credentials, with no requirement to URL encode + the characters in them. + EOF + end + end + + def validate_no_label_clashes!(registry) + # There's nothing to check if we don't have a grouping key + return if @grouping_key.empty? + + # We could be doing a lot of comparisons, so let's do them against a + # set rather than an array + grouping_key_labels = @grouping_key.keys.to_set + + registry.metrics.each do |metric| + metric.labels.each do |label| + if grouping_key_labels.include?(label) + raise LabelSetValidator::InvalidLabelSetError, + "label :#{label} from grouping key collides with label of the " \ + "same name from metric :#{metric.name} and would overwrite it" + end + end + end + end + + def validate_response!(response) + status = Integer(response.code) + if status >= 300 + message = "status: #{response.code}, message: #{response.message}, body: #{response.body}" + if status <= 399 + raise HttpRedirectError, message + elsif status <= 499 + raise HttpClientError, message + else + raise HttpServerError, message + end + end end end end diff --git a/lib/prometheus/client/registry.rb b/lib/prometheus/client/registry.rb index d508d156..0b2f6e9a 100644 --- a/lib/prometheus/client/registry.rb +++ b/lib/prometheus/client/registry.rb @@ -22,7 +22,7 @@ def register(metric) name = metric.name @mutex.synchronize do - if exist?(name.to_sym) + if @metrics.key?(name.to_sym) raise AlreadyRegisteredError, "#{name} has already been registered" end @metrics[name.to_sym] = metric @@ -31,33 +31,57 @@ def register(metric) metric end - def counter(name, docstring, base_labels = {}) - register(Counter.new(name, docstring, base_labels)) + def unregister(name) + @mutex.synchronize do + @metrics.delete(name.to_sym) + end + end + + def counter(name, docstring:, labels: [], preset_labels: {}, store_settings: {}) + register(Counter.new(name, + docstring: docstring, + labels: labels, + preset_labels: preset_labels, + store_settings: store_settings)) end - def summary(name, docstring, base_labels = {}) - register(Summary.new(name, docstring, base_labels)) + def summary(name, docstring:, labels: [], preset_labels: {}, store_settings: {}) + register(Summary.new(name, + docstring: docstring, + labels: labels, + preset_labels: preset_labels, + store_settings: store_settings)) end - def gauge(name, docstring, base_labels = {}) - register(Gauge.new(name, docstring, base_labels)) + def gauge(name, docstring:, labels: [], preset_labels: {}, store_settings: {}) + register(Gauge.new(name, + docstring: docstring, + labels: labels, + preset_labels: preset_labels, + store_settings: store_settings)) end - def histogram(name, docstring, base_labels = {}, - buckets = Histogram::DEFAULT_BUCKETS) - register(Histogram.new(name, docstring, base_labels, buckets)) + def histogram(name, docstring:, labels: [], preset_labels: {}, + buckets: Histogram::DEFAULT_BUCKETS, + store_settings: {}) + register(Histogram.new(name, + docstring: docstring, + labels: labels, + preset_labels: preset_labels, + buckets: buckets, + store_settings: store_settings)) end def exist?(name) - @metrics.key?(name) + @mutex.synchronize { @metrics.key?(name) } end def get(name) - @metrics[name.to_sym] + @mutex.synchronize { @metrics[name.to_sym] } end def metrics - @metrics.values + @mutex.synchronize { @metrics.values } end end end diff --git a/lib/prometheus/client/summary.rb b/lib/prometheus/client/summary.rb index 7b78610e..dff2f360 100644 --- a/lib/prometheus/client/summary.rb +++ b/lib/prometheus/client/summary.rb @@ -1,63 +1,68 @@ # encoding: UTF-8 -require 'quantile' require 'prometheus/client/metric' module Prometheus module Client # Summary is an accumulator for samples. It captures Numeric data and - # provides an efficient quantile calculation mechanism. + # provides the total count and sum of observations. class Summary < Metric - extend Gem::Deprecate - - # Value represents the state of a Summary at a given point. - class Value < Hash - attr_accessor :sum, :total + def type + :summary + end - def initialize(estimator) - @sum = estimator.sum - @total = estimator.observations + # Records a given value. The recorded value is usually positive + # or zero. A negative value is accepted but prevents current + # versions of Prometheus from properly detecting counter resets + # in the sum of observations. See + # https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations + # for details. + def observe(value, labels: {}) + base_label_set = label_set_for(labels) - estimator.invariants.each do |invariant| - self[invariant.quantile] = estimator.query(invariant.quantile) - end + @store.synchronize do + @store.increment(labels: base_label_set.merge(quantile: "count"), by: 1) + @store.increment(labels: base_label_set.merge(quantile: "sum"), by: value) end end - def type - :summary - end + # Returns a hash with "sum" and "count" as keys + def get(labels: {}) + base_label_set = label_set_for(labels) - # Records a given value. - def observe(labels, value) - label_set = label_set_for(labels) - synchronize { @values[label_set].observe(value) } + internal_counters = ["count", "sum"] + + @store.synchronize do + internal_counters.each_with_object({}) do |counter, acc| + acc[counter] = @store.get(labels: base_label_set.merge(quantile: counter)) + end + end end - alias add observe - deprecate :add, :observe, 2016, 10 - # Returns the value for the given label set - def get(labels = {}) - @validator.valid?(labels) + # Returns all label sets with their values expressed as hashes with their sum/count + def values + values = @store.all_values - synchronize do - Value.new(@values[labels]) + values.each_with_object({}) do |(label_set, v), acc| + actual_label_set = label_set.reject{|l| l == :quantile } + acc[actual_label_set] ||= { "count" => 0.0, "sum" => 0.0 } + acc[actual_label_set][label_set[:quantile]] = v end end - # Returns all label sets with their values - def values - synchronize do - @values.each_with_object({}) do |(labels, value), memo| - memo[labels] = Value.new(value) - end + def init_label_set(labels) + base_label_set = label_set_for(labels) + + @store.synchronize do + @store.set(labels: base_label_set.merge(quantile: "count"), val: 0) + @store.set(labels: base_label_set.merge(quantile: "sum"), val: 0) end end private - def default - Quantile::Estimator.new + def reserved_labels + [:quantile] end end end diff --git a/lib/prometheus/client/version.rb b/lib/prometheus/client/version.rb index e8546c71..150eacca 100644 --- a/lib/prometheus/client/version.rb +++ b/lib/prometheus/client/version.rb @@ -2,6 +2,6 @@ module Prometheus module Client - VERSION = '0.7.1' + VERSION = '4.2.5' end end diff --git a/lib/prometheus/middleware/collector.rb b/lib/prometheus/middleware/collector.rb index 4cee038c..66635a4e 100644 --- a/lib/prometheus/middleware/collector.rb +++ b/lib/prometheus/middleware/collector.rb @@ -1,6 +1,5 @@ # encoding: UTF-8 -require 'benchmark' require 'prometheus/client' module Prometheus @@ -11,15 +10,11 @@ module Middleware # By default metrics are registered on the global registry. Set the # `:registry` option to use a custom registry. # - # By default metrics all have the prefix "http_server". Set to something - # else if you like. + # By default metrics all have the prefix "http_server". Set + # `:metrics_prefix` to something else if you like. # - # The request counter metric is broken down by code, method and path by - # default. Set the `:counter_label_builder` option to use a custom label - # builder. - # - # The request duration metric is broken down by method and path by default. - # Set the `:duration_label_builder` option to use a custom label builder. + # The request counter metric is broken down by code, method and path. + # The request duration metric is broken down by method and path. class Collector attr_reader :app, :registry @@ -27,8 +22,6 @@ def initialize(app, options = {}) @app = app @registry = options[:registry] || Client.registry @metrics_prefix = options[:metrics_prefix] || 'http_server' - @counter_lb = options[:counter_label_builder] || COUNTER_LB - @duration_lb = options[:duration_label_builder] || DURATION_LB init_request_metrics init_exception_metrics @@ -40,64 +33,76 @@ def call(env) # :nodoc: protected - # rubocop:disable Metrics/LineLength - aggregation = lambda do |str| - str - .gsub(%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(/|$)}, '/:uuid\\1') - .gsub(%r{/\d+(/|$)}, '/:id\\1') - end - # rubocop:enable Metrics/LineLength - - COUNTER_LB = proc do |env, code| - { - code: code, - method: env['REQUEST_METHOD'].downcase, - path: aggregation.call(env['PATH_INFO']), - } - end - - DURATION_LB = proc do |env, _| - { - method: env['REQUEST_METHOD'].downcase, - path: aggregation.call(env['PATH_INFO']), - } + def realtime + start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) + yield + Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time end def init_request_metrics @requests = @registry.counter( :"#{@metrics_prefix}_requests_total", - 'The total number of HTTP requests handled by the Rack application.', + docstring: + 'The total number of HTTP requests handled by the Rack application.', + labels: %i[code method path] ) @durations = @registry.histogram( :"#{@metrics_prefix}_request_duration_seconds", - 'The HTTP response duration of the Rack application.', + docstring: 'The HTTP response duration of the Rack application.', + labels: %i[method path] ) end def init_exception_metrics @exceptions = @registry.counter( :"#{@metrics_prefix}_exceptions_total", - 'The total number of exceptions raised by the Rack application.', + docstring: 'The total number of exceptions raised by the Rack application.', + labels: [:exception] ) end def trace(env) response = nil - duration = Benchmark.realtime { response = yield } + duration = realtime { response = yield } record(env, response.first.to_s, duration) return response rescue => exception - @exceptions.increment(exception: exception.class.name) + @exceptions.increment(labels: { exception: exception.class.name }) raise end def record(env, code, duration) - @requests.increment(@counter_lb.call(env, code)) - @durations.observe(@duration_lb.call(env, code), duration) + path = generate_path(env) + + counter_labels = { + code: code, + method: env['REQUEST_METHOD'].downcase, + path: path, + } + + duration_labels = { + method: env['REQUEST_METHOD'].downcase, + path: path, + } + + @requests.increment(labels: counter_labels) + @durations.observe(duration, labels: duration_labels) rescue # TODO: log unexpected exception during request recording nil end + + def generate_path(env) + full_path = [env['SCRIPT_NAME'], env['PATH_INFO']].join + + strip_ids_from_path(full_path) + end + + def strip_ids_from_path(path) + path + .gsub(%r{/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(?=/|$)}, '/:uuid\\1') + .gsub(%r{/\d+(?=/|$)}, '/:id\\1') + end end end end diff --git a/lib/prometheus/middleware/exporter.rb b/lib/prometheus/middleware/exporter.rb index 5a74d8e9..a377525c 100644 --- a/lib/prometheus/middleware/exporter.rb +++ b/lib/prometheus/middleware/exporter.rb @@ -21,11 +21,12 @@ def initialize(app, options = {}) @app = app @registry = options[:registry] || Client.registry @path = options[:path] || '/metrics' + @port = options[:port] @acceptable = build_dictionary(FORMATS, FALLBACK) end def call(env) - if env['PATH_INFO'] == @path + if metrics_port?(env['SERVER_PORT']) && env['PATH_INFO'] == @path format = negotiate(env, @acceptable) format ? respond_with(format) : not_acceptable(FORMATS) else @@ -65,7 +66,7 @@ def extract_quality(attributes, default = 1.0) def respond_with(format) [ 200, - { 'Content-Type' => format::CONTENT_TYPE }, + { 'content-type' => format::CONTENT_TYPE }, [format.marshal(@registry)], ] end @@ -75,7 +76,7 @@ def not_acceptable(formats) [ 406, - { 'Content-Type' => 'text/plain' }, + { 'content-type' => 'text/plain' }, ["Supported media types: #{types.join(', ')}"], ] end @@ -86,6 +87,10 @@ def build_dictionary(formats, fallback) memo[format::MEDIA_TYPE] = format end end + + def metrics_port?(request_port) + @port.nil? || @port.to_s == request_port + end end end end diff --git a/prometheus-client.gemspec b/prometheus-client.gemspec index cbe86a72..29aed5cf 100644 --- a/prometheus-client.gemspec +++ b/prometheus-client.gemspec @@ -7,13 +7,19 @@ Gem::Specification.new do |s| s.version = Prometheus::Client::VERSION s.summary = 'A suite of instrumentation metric primitives' \ 'that can be exposed through a web services interface.' - s.authors = ['Tobias Schmidt'] - s.email = ['ts@soundcloud.com'] + s.authors = ['Ben Kochie', 'Chris Sinjakli', 'Daniel Magliola'] + s.email = ['superq@gmail.com', 'chris@sinjakli.co.uk', 'dmagliola@crystalgears.com'] s.homepage = 'https://github.com/prometheus/client_ruby' - s.license = 'Apache 2.0' + s.license = 'Apache-2.0' - s.files = %w(README.md) + Dir.glob('{lib/**/*}') + s.files = %w(README.md LICENSE) + Dir.glob('{lib/**/*}') s.require_paths = ['lib'] - s.add_dependency 'quantile', '~> 0.2.0' + s.add_dependency 'base64' + s.add_dependency 'cgi' + + s.add_development_dependency 'benchmark' + s.add_development_dependency 'benchmark-ips' + s.add_development_dependency 'concurrent-ruby' + s.add_development_dependency 'timecop' end diff --git a/spec/benchmarks/README.md b/spec/benchmarks/README.md new file mode 100644 index 00000000..5d9d96fc --- /dev/null +++ b/spec/benchmarks/README.md @@ -0,0 +1,67 @@ +# Performance Benchmarks + +The intention behind these benchmarks is twofold: + +- On the one hand, if you have performance concerns for your counters, they'll allow you + to simulate a reasonably realistic scenario, with your particular runtime characteristics, + so you can know what kind of performance to expect under different circumstances, and pick + settings accordingly. + +- On the other hand, if you are developing your own Custom Data Store (more on this in + `/lib/prometheus/client/data_stores/README.md), this will allow you to test how it + performs compared to the built-in ones, and also "system test" it to validate that it + behaves appropriately. + +## Benchmarks included + +### Data Stores Performance + +The Prometheus Ruby Client ships with different built-in data stores, optimized for +different common scenarios (more on this on the repo's main README, under `Data Stores`). + +This benchmark can show you, for your particular runtime environment, what kind of +performance you can expect from each, to pick the one that's best for you. + +More importantly, in a case where the built-in stores may not be useful for your +particular circumstances, you might want to make your own Data Store. If that is the case, +this benchmark will help you compare its performance characteristics to the built-in +stores, and will also run an export after the observations are made, and compare it with +the built-in ones, helping you catch potential bugs in your store, if the output doesn't +match. + +The benchmark was made to try and simulate a somewhat realistic scenario, with plenty of +high-cardinality metrics, which is what you should be aiming for. It has a balance of +counters and histograms, different label counts for different metrics, different thread +counts, etc. All this should be easy to customize to your particular needs by modifying +the constants in the benchmark to tailor to what you need to measure. + +In particular, if going for the goal of "how long it should take to increment a counter", +you probably want to have no labels and no histograms, since that's the reference +performance measurement we use. + +### Labels Performance + +Adding labels to your metrics can have significant performance impact, on two fronts: + +- Labels passed in on every observation need to be validated. This may be alleviated by + using `with_labels`. If used to pre-set *all* labels, you can save a good + amount of processing time, by skipping validation on each observation. This may be + important if you're incrementing metrics on a tight loop, and this benchmark can help + with establishing what's to be expected. + +- Even when caching them, these labels are keys to Hashes, they need to sometimes be + serialized into strings, sometimes merged into other hashes. All this incurs performance + costs. This benchmark will allow you to estimate how much impact they can have. Again, + if incrementing metrics on a tight loop, this will let you estimate whether you might + want to have fewer labels instead. + +It should be easy to modify the constants in this benchmark to your particular situation, +if necessary. + +## Running the benchmarks + +Simply run, from the repo's root directory: + +`bundle exec ruby spec/benchmarks/labels.rb` +`bundle exec ruby spec/benchmarks/data_stores.rb` + diff --git a/spec/benchmarks/data_stores.rb b/spec/benchmarks/data_stores.rb new file mode 100644 index 00000000..773604df --- /dev/null +++ b/spec/benchmarks/data_stores.rb @@ -0,0 +1,330 @@ +require 'benchmark' +require 'concurrent' +require 'prometheus/client' +require 'prometheus/client/counter' +require 'prometheus/client/histogram' +require 'prometheus/client/formats/text' +require 'prometheus/client/data_stores/single_threaded' +require 'prometheus/client/data_stores/synchronized' +require 'prometheus/client/data_stores/direct_file_store' + +# Compare the time it takes different stores to observe a large number of data points, in +# a multi-threaded environment. +# +# If you create a new store and want to benchmark it, add it to the `STORES` array, +# and run the benchmark to see how it compares to the other options. +# +# Each test instantiates a number of Histograms and Counters, with a random number of +# labels, instantiates a number of threads, and then prepares a a large number of +# observations, which it distributes randomly between the different metrics and threads +# created. +# +# It does this for each of the STORES specified and different THREAD_COUNTS, then once +# all that is ready, it starts the benchmark test and lets the threads run to observe +# those data points. +# +# In addition to timing the observation of data points, the benchmark also runs the Text +# Exporter on the results, and compares them between stores to make sure all stores +# result in the same output being generated. If this output doesn't match exactly, +# something is going wrong, and it probably indicates a bug in the store, so this +# benchmark also acts as a sort of system test for stores. If a mismatch is found, a +# WARNING will show up in the output, and both the expected and actual results will be +# dumped to text files, for help in debugging. +# +# Data generation involves randomness, but the RNG is seeded so that different stores are +# exposed to the same pattern of access (as long as two test cases have the same number +# of threads), reducing the effects on the result of randomness in lock contention. +# +# NOTE: If you leave the default of 1_000_000 DATA_POINTS, then the timing result is +# showing "microseconds per observation", which is the unit we care about. +# We're aiming for 1 microsecond per observation, which is not quite achievable in Ruby, +# but that's what we're trying to approach. If you're trying to compare against this +# goal, set NUM_HISTOGRAMS and MAX_LABELS to 0, for a fair comparison, as both labels +# and histograms are much slower than label-less counters. +#----------------------------------------------------------------------------------- + +# Store class that follows the required interface but does nothing. Used as a baseline +# of how much time is spent outside the store. +class NoopStore + def for_metric(metric_name, metric_type:, metric_settings: {}) + MetricStore.new + end + + class MetricStore + def synchronize + yield + end + + def set(labels:, val:); end + def increment(labels:, by: 1); end + def get(labels:); end + def all_values; {}; end + end +end + +#----------------------------------------------------------------------------------- + +RANDOM_SEED = 12345678 +NUM_COUNTERS = 80 +NUM_HISTOGRAMS = 20 +DATA_POINTS = 1_000_000 +MIN_LABELS = 0 +MAX_LABELS = 4 +THREAD_COUNTS = [1, 2, 4, 8, 12, 16, 20] + +TMP_DIR = "/tmp/prometheus_benchmark" + +STORES = [ + { store: NoopStore.new }, + { store: Prometheus::Client::DataStores::SingleThreaded.new, max_threads: 1 }, + { store: Prometheus::Client::DataStores::Synchronized.new }, + { + store: Prometheus::Client::DataStores::DirectFileStore.new(dir: TMP_DIR), + before: -> () { cleanup_dir(TMP_DIR) }, + } +] + +#----------------------------------------------------------------------------------- + +class TestSetup + attr_reader :random, :num_threads, :registry + attr_reader :metrics, :threads # Simple arrays + attr_reader :data_points # Hash, indexed by Thread ID, with an array of points to observe + attr_reader :start_event + + def initialize(store, num_threads) + Prometheus::Client.config.data_store = @store = store + + @random = Random.new(RANDOM_SEED) # Repeatable random numbers for each test + @start_event = Concurrent::Event.new # Event all threads wait on to start, once set up + @num_threads = num_threads + @threads = [] + @metrics = [] + @data_points = {} + @registry = Prometheus::Client::Registry.new + + setup_threads + setup_metrics + create_datapoints + end + + def observe! + start_event.set # Release the threads to process their events + threads.each { |thr| thr.join } # Wait for all threads to finish and die + end + + def export!(expected_output) + output = Prometheus::Client::Formats::Text.marshal(registry) + + # Output validation doesn't work for NoopStore + return nil if @store.is_a?(NoopStore) + + puts "\nWARNING: Empty output" if !output || output.empty? + + # If this is the first store to run for this number of threads, store expected_output + return output if expected_output.nil? + + # Otherwise, make sure this store's output was the same as the previous one. + # If it isn't, there's probably a bug in the store + return output if output == expected_output + + # Outputs don't match. Report + expected_filename = "data_mismatch_#{ @store.class.name }_#{ num_threads }thr_expected.txt" + actual_filename = "data_mismatch_#{ @store.class.name }_#{ num_threads }thr_actual.txt" + puts "\nWARNING: Output Mismatch.\nSee #{ expected_filename }\nand #{ actual_filename }" + + File.open(expected_filename, 'w') {|f| f.write(expected_output) } + File.open(actual_filename, 'w') {|f| f.write(output) } + + return expected_output + end + + private + + def setup_threads + latch = Concurrent::CountDownLatch.new(num_threads) + + num_threads.times do |i| + threads << Thread.new(i) do |thread_id| + latch.count_down + start_event.wait # Wait for the test to start + thread_run(thread_id) # Process this thread's events + end + end + + latch.wait # Wait for all threads to have started + end + + def setup_metrics + NUM_COUNTERS.times do |i| + labelset = generate_labelset + counter = Prometheus::Client::Counter.new( + "counter#{ i }".to_sym, + docstring: "Counter #{ i }", + labels: labelset.keys, + preset_labels: labelset + ) + + metrics << counter + end + + NUM_HISTOGRAMS.times do |i| + labelset = generate_labelset + histogram = Prometheus::Client::Histogram.new( + "histogram#{ i }".to_sym, + docstring: "Histogram #{ i }", + labels: labelset.keys, + preset_labels: labelset + ) + + metrics << histogram + end + + metrics.each { |metric| registry.register(metric) } + end + + def create_datapoints + num_threads.times do |i| + data_points[i] = [] + end + + thread_id = 0 + DATA_POINTS.times do |i| + thread_id = (thread_id + 1) % num_threads + metric = random_metric + + if metric.type == :counter + data_points[thread_id] << [metric] + else + data_points[thread_id] << [metric, random.rand * 10] + end + end + end + + def thread_run(thread_id) + thread_points = data_points[thread_id] + thread_points.each do |point| + metric = point[0] + if metric.type == :counter + metric.increment + else + metric.observe(point[1]) + end + end + end + + def generate_labelset + num_labels = random.rand(MAX_LABELS - MIN_LABELS + 1) + MIN_LABELS + (1..num_labels).map {|j| ["label#{ j }".to_sym, "foo"] }.to_h + end + + def random_metric + metrics[random.rand(metrics.count)] + end +end + +def cleanup_dir(dir) + Dir.glob("#{ dir }/*").each { |file| File.delete(file) } +end + +#----------------------------------------------------------------------------------- + +# Monkey-patch the exporter to round Float numbers +# This is necessary in order to compare outputs from different stores, and make sure +# the user-built stores are working correctly. +# +# In multi-threaded scenarios, adding up a large amount of floats in different orders +# results in small rounding errors when adding the same numbers. This is not a bug +# in the store, or anywhere, it's the nature of Floats. +# E.g.: 4909.026018536727 +# vs 4909.026018536722 +# +# In the real exporter, this is not a problem, because the exported numbers are still +# correct, but when comparing one to the other, these tiny deltas result in false +# alarms for *all* stores under multiple threads. +# +# Monkey-patching the output line to round the number allows us to compare these outputs +# without any noticeable downside. +module Prometheus + module Client + module Formats + module Text + def self.metric(name, labels, value) + format(METRIC_LINE, name, labels, value.round(6)) + end + end + end + end +end + +#----------------------------------------------------------------------------------- + +Benchmark.bm(45) do |bm| + THREAD_COUNTS.each do |num_threads| + expected_exporter_output = nil + + STORES.each do |store_test| + # Single Threaded stores can't run in multiple threads + next if store_test[:max_threads] && num_threads > store_test[:max_threads] + + # Cleanup before test + store_test[:before].call if store_test[:before] + + test_setup = TestSetup.new(store_test[:store], num_threads) + store_name = store_test[:store].class.name.split('::').last + test_name ="#{ (store_test[:name] || store_name).ljust(25) } x#{ num_threads }" + + bm.report("Observe #{test_name}") { test_setup.observe! } + bm.report("Export #{test_name}") do + expected_exporter_output = test_setup.export!(expected_exporter_output) + end + end + + puts "-" * 80 + end +end + + +#-------------------------------------------------------------------------------------- +# Sample Results: +# +# Only counters, no labels, DirectFileStore stored in TMPFS, Ruby 2.5.1 +# ---------------------------------------------------------------- +# user system total real +# Observe NoopStore x1 0.390845 0.019915 0.410760 ( 0.413240) +# Export NoopStore x1 0.000462 0.000029 0.000491 ( 0.000489) +# Observe SingleThreaded x1 0.946516 0.044122 0.990638 ( 0.990801) +# Export SingleThreaded x1 0.000837 0.000000 0.000837 ( 0.000838) +# Observe Synchronized x1 4.038891 0.000000 4.038891 ( 4.039304) +# Export Synchronized x1 0.001227 0.000000 0.001227 ( 0.001229) +# Observe DirectFileStore x1 7.414242 1.732539 9.146781 ( 9.147389) +# Export DirectFileStore x1 0.009920 0.000243 0.010163 ( 0.010170) +# -------------------------------------------------------------------------------- +# Observe NoopStore x2 0.337919 0.000000 0.337919 ( 0.337575) +# Export NoopStore x2 0.000404 0.000000 0.000404 ( 0.000379) +# Observe Synchronized x2 4.313595 0.008714 4.322309 ( 4.314901) +# Export Synchronized x2 0.001649 0.000155 0.001804 ( 0.001809) +# Observe DirectFileStore x2 22.193105 12.739370 34.932475 ( 21.503215) +# Export DirectFileStore x2 0.005982 0.008480 0.014462 ( 0.014471) +# +# +# +# Default benchmark (Mix of Counters and Histograms, and up to 4 labels), +# DirectFileStore stored in TMPFS, Ruby 2.5.1 +# ------------------------------------------ +# user system total real +# Observe NoopStore x1 0.994314 0.027816 1.022130 ( 1.025121) +# Export NoopStore x1 0.000537 0.000032 0.000569 ( 0.000574) +# Observe SingleThreaded x1 4.439427 0.027929 4.467356 ( 4.470777) +# Export SingleThreaded x1 0.006244 0.000000 0.006244 ( 0.006250) +# Observe Synchronized x1 8.292962 0.000000 8.292962 ( 8.293737) +# Export Synchronized x1 0.006698 0.000000 0.006698 ( 0.006706) +# Observe DirectFileStore x1 13.448161 2.517563 15.965724 ( 15.967281) +# Export DirectFileStore x1 0.020115 0.004012 0.024127 ( 0.024135) +# -------------------------------------------------------------------------------- +# Observe NoopStore x2 1.342963 0.020541 1.363504 ( 1.354383) +# Export NoopStore x2 0.002923 0.000000 0.002923 ( 0.002927) +# Observe Synchronized x2 8.810914 0.029352 8.840266 ( 8.828600) +# Export Synchronized x2 0.007535 0.000000 0.007535 ( 0.007540) +# Observe DirectFileStore x2 41.483649 19.362639 60.846288 ( 39.026703) +# Export DirectFileStore x2 0.010133 0.013159 0.023292 ( 0.023302) diff --git a/spec/benchmarks/labels.rb b/spec/benchmarks/labels.rb new file mode 100644 index 00000000..42f8ebda --- /dev/null +++ b/spec/benchmarks/labels.rb @@ -0,0 +1,127 @@ +require 'benchmark/ips' +require 'prometheus/client' +require 'prometheus/client/counter' +require 'prometheus/client/data_stores/single_threaded' + +# Compare the time it takes to observe metrics that have labels (disregarding the actual +# data store) +# +# This benchmark compares 3 different metrics, with 0, 2 and 100 labels respectively, +# and how using `with_values` for some, or all their label values affects performance. +# +# The hypothesis here is that, once labels are introduced, we're validating those labels +# in every observation, but if those labels are "cached" using `with_labels`, we skip that +# validation which should be *considerably* faster. +# +# This completely disregards the storage of this data in memory, and it's highly likely +# that more labels will make things slower in the data store, even if the metrics themselves +# don't add overhead. So the fact that using `with_labels` with all labels adds no overhead +# to the metric itself doesn't mean labels have no overhead. +# +# To see what it looks like with the best-case scenario data store, uncomment the line +# that sets the `data_store` to `SingleThreaded` +#------------------------------------------------------------------------------------- +# Store that doesn't do anything, so we can focus as much as possible on the timings of +# the Metric itself +class NoopStore + def for_metric(metric_name, metric_type:, metric_settings: {}) + MetricStore.new + end + + class MetricStore + def synchronize + yield + end + + def set(labels:, val:); end + def increment(labels:, by: 1); end + def get(labels:); end + def all_values; end + end +end + +Prometheus::Client.config.data_store = NoopStore.new # No data storage +# Prometheus::Client.config.data_store = Prometheus::Client::DataStores::SingleThreaded.new # Simple data storage + +#------------------------------------------------------------------------------------- +# Set up of the 3 metrics, plus their half-cached and full-cached versions +NO_LABELS_COUNTER = Prometheus::Client::Counter.new( + :no_labels, + docstring: "Counter with no labels" +) + +TWO_LABELSET = { label1: "a", label2: "b"} +LAST_ONE_LABELSET = { label2: "b"} +TWO_LABELS_COUNTER = Prometheus::Client::Counter.new( + :two_labels, + docstring: "Counter with 2 labels", + labels: [:label1, :label2] +) +TWO_LABELS_ONE_CACHED = TWO_LABELS_COUNTER.with_labels(label1: "a") +TWO_LABELS_ALL_CACHED = TWO_LABELS_COUNTER.with_labels(label1: "a", label2: "b") + + +HUNDRED_LABELS = (1..100).map{|i| "label#{ i }".to_sym } +HUNDRED_LABELSET = (1..100).map{|i| ["label#{ i }".to_sym, i.to_s] }.to_h +FIRST_FIFTY_LABELSET = (1..50).map{|i| ["label#{ i }".to_sym, i.to_s] }.to_h +LAST_FIFTY_LABELSET = (51..100).map{|i| ["label#{ i }".to_sym, i.to_s] }.to_h + +HUNDRED_LABELS_COUNTER = Prometheus::Client::Counter.new( + :hundred_labels, + docstring: "Counter with 100 labels", + labels: HUNDRED_LABELS +) +HUNDRED_LABELS_HALF_CACHED = HUNDRED_LABELS_COUNTER.with_labels(FIRST_FIFTY_LABELSET) +HUNDRED_LABELS_ALL_CACHED = HUNDRED_LABELS_COUNTER.with_labels(HUNDRED_LABELSET) + +#------------------------------------------------------------------------------------- +# Actual Benchmark + +Benchmark.ips do |x| + x.config(:time => 5, :warmup => 2) + + x.report("0 labels") { NO_LABELS_COUNTER.increment } + x.report("2 labels") { TWO_LABELS_COUNTER.increment(labels: TWO_LABELSET) } + x.report("100 labels") { HUNDRED_LABELS_COUNTER.increment(labels: HUNDRED_LABELSET) } + + x.report("2 lab, half cached") { TWO_LABELS_ONE_CACHED.increment(labels: LAST_ONE_LABELSET) } + x.report("100 lab, half cached") { HUNDRED_LABELS_HALF_CACHED.increment(labels: LAST_FIFTY_LABELSET) } + + x.report("2 lab, all cached") { TWO_LABELS_ALL_CACHED.increment } + x.report("100 lab, all cached") { HUNDRED_LABELS_ALL_CACHED.increment } +end + +#------------------------------------------------------------------------------------- +# Conclusion: +# +# Without a data store: +# +# 0 labels 3.592M (± 3.7%) i/s - 18.081M in 5.039832s +# 2 labels 502.898k (± 3.2%) i/s - 2.536M in 5.048618s +# 100 labels 19.467k (± 4.8%) i/s - 98.280k in 5.061444s +# 2 lab, half cached 432.844k (± 3.0%) i/s - 2.180M in 5.041123s +# 100 lab, half cached 20.444k (± 3.4%) i/s - 103.636k in 5.075070s +# 2 lab, all cached 3.668M (± 3.3%) i/s - 18.338M in 5.004442s +# 100 lab, all cached 3.711M (± 4.0%) i/s - 18.544M in 5.005362s +# +# As we expected, labels introduce a significant overhead, even in small numbers, but +# if they are all pre-set, the effect is negligible. +# Pre-setting *some* labels, however, has no performance impact. It may still be desirable +# to avoid repetition, though. +# +# So, if observing measurements in a tight loop, it's highly recommended to use `with_labels` +# and pre-set all labels. +# +# +# With the simplest possible data store: +# +# 0 labels 1.275M (± 3.1%) i/s - 6.419M in 5.038946s +# 2 labels 195.293k (± 4.3%) i/s - 974.600k in 5.000375s +# 100 labels 6.410k (± 7.5%) i/s - 32.022k in 5.028417s +# 2 lab, half cached 187.255k (± 3.5%) i/s - 948.618k in 5.072189s +# 100 lab, half cached 6.846k (± 2.7%) i/s - 34.424k in 5.031776s +# 2 lab, all cached 376.353k (± 3.3%) i/s - 1.890M in 5.025963s +# 100 lab, all cached 11.669k (± 3.0%) i/s - 58.752k in 5.039468s +# +# As mentioned above, once we're storing the data, labels *can* have a serious impact, +# and that impact will be highly store dependent. \ No newline at end of file diff --git a/spec/examples/data_store_example.rb b/spec/examples/data_store_example.rb new file mode 100644 index 00000000..c60d7928 --- /dev/null +++ b/spec/examples/data_store_example.rb @@ -0,0 +1,76 @@ +# encoding: UTF-8 + +# NOTE: Do not change instances of `eql` to `eq` in this file. +# +# The interface of a store is a labelset (hash of hashes) to a double. It's important +# that we check the values are doubles rather than integers. `==`, which is what `eq` +# calls allows conversion between floats and integers (i.e. `5 == 5.0`). `eql` enforces +# that the two numbers are of the same type. +shared_examples_for Prometheus::Client::DataStores do + describe "MetricStore#set and #get" do + it "returns the value set for each labelset" do + expect(metric_store.get(labels: { foo: "bar" })).to eql(0.0) + end + end + + describe "MetricStore#set and #get" do + it "returns the value set for each labelset" do + metric_store.set(labels: { foo: "bar" }, val: 5) + metric_store.set(labels: { foo: "baz" }, val: 2) + expect(metric_store.get(labels: { foo: "bar" })).to eql(5.0) + expect(metric_store.get(labels: { foo: "baz" })).to eql(2.0) + expect(metric_store.get(labels: { foo: "bat" })).to eql(0.0) + end + end + + describe "MetricStore#increment" do + it "returns the value set for each labelset" do + metric_store.set(labels: { foo: "bar" }, val: 5) + metric_store.set(labels: { foo: "baz" }, val: 2) + + metric_store.increment(labels: { foo: "bar" }) + metric_store.increment(labels: { foo: "baz" }, by: 7) + metric_store.increment(labels: { foo: "zzz" }, by: 3) + + expect(metric_store.get(labels: { foo: "bar" })).to eql(6.0) + expect(metric_store.get(labels: { foo: "baz" })).to eql(9.0) + expect(metric_store.get(labels: { foo: "zzz" })).to eql(3.0) + end + end + + describe "MetricStore#synchronize" do + # I'm not sure it's possible to actually test that this synchronizes, but at least + # it should run the passed block + it "accepts a block and runs it" do + a = 0 + metric_store.synchronize{ a += 1 } + expect(a).to eq(1) + end + + # This is just a safety check that we're not getting "nested transaction" issues + it "allows modifying the store while in synchronized block" do + metric_store.synchronize do + metric_store.increment(labels: { foo: "bar" }) + metric_store.increment(labels: { foo: "baz" }) + end + end + end + + describe "MetricStore#all_values" do + it "returns all specified labelsets, with their associated value" do + metric_store.set(labels: { foo: "bar" }, val: 5) + metric_store.set(labels: { foo: "baz" }, val: 2) + + expect(metric_store.all_values).to eql( + { foo: "bar" } => 5.0, + { foo: "baz" } => 2.0, + ) + end + + context "for a combination of labels that hasn't had a value set" do + it "returns 0.0" do + expect(metric_store.all_values[{ foo: "bar" }]).to eql(0.0) + end + end + end +end diff --git a/spec/examples/metric_example.rb b/spec/examples/metric_example.rb index c577332e..bbe5a454 100644 --- a/spec/examples/metric_example.rb +++ b/spec/examples/metric_example.rb @@ -1,30 +1,32 @@ # encoding: UTF-8 shared_examples_for Prometheus::Client::Metric do - subject { described_class.new(:foo, 'foo description') } + subject { described_class.new(:foo, docstring: 'foo description') } describe '.new' do it 'returns a new metric' do - expect(subject).to be + expect(subject).to be_a(Prometheus::Client::Metric) end it 'raises an exception if a reserved base label is used' do exception = Prometheus::Client::LabelSetValidator::ReservedLabelError expect do - described_class.new(:foo, 'foo docstring', __name__: 'reserved') + described_class.new(:foo, + docstring: 'foo docstring', + preset_labels: { __name__: 'reserved' }) end.to raise_exception exception end it 'raises an exception if the given name is blank' do expect do - described_class.new(nil, 'foo') + described_class.new(nil, docstring: 'foo') end.to raise_exception ArgumentError end it 'raises an exception if docstring is missing' do expect do - described_class.new(:foo, '') + described_class.new(:foo, docstring: '') end.to raise_exception ArgumentError end @@ -37,7 +39,7 @@ "abc\ndef".to_sym, ].each do |name| expect do - described_class.new(name, 'foo') + described_class.new(name, docstring: 'foo') end.to raise_exception(ArgumentError) end end @@ -48,14 +50,4 @@ expect(subject.type).to be_a(Symbol) end end - - describe '#get' do - it 'returns the current metric value' do - expect(subject.get).to be_a(type) - end - - it 'returns the current metric value for a given label set' do - expect(subject.get(test: 'label')).to be_a(type) - end - end end diff --git a/spec/prometheus/client/counter_spec.rb b/spec/prometheus/client/counter_spec.rb index 69bf02a7..51ee15e8 100644 --- a/spec/prometheus/client/counter_spec.rb +++ b/spec/prometheus/client/counter_spec.rb @@ -1,15 +1,26 @@ # encoding: UTF-8 +require 'prometheus/client' require 'prometheus/client/counter' require 'examples/metric_example' +require 'prometheus/client/data_stores/direct_file_store' describe Prometheus::Client::Counter do - let(:counter) { Prometheus::Client::Counter.new(:foo, 'foo description') } + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:expected_labels) { [] } - it_behaves_like Prometheus::Client::Metric do - let(:type) { Float } + let(:counter) do + Prometheus::Client::Counter.new(:foo, + docstring: 'foo description', + labels: expected_labels) end + it_behaves_like Prometheus::Client::Metric + describe '#increment' do it 'increments the counter' do expect do @@ -17,23 +28,27 @@ end.to change { counter.get }.by(1.0) end - it 'increments the counter for a given label set' do - expect do + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'increments the counter for a given label set' do expect do - counter.increment(test: 'label') - end.to change { counter.get(test: 'label') }.by(1.0) - end.to_not change { counter.get } + expect do + counter.increment(labels: { test: 'label' }) + end.to change { counter.get(labels: { test: 'label' }) }.by(1.0) + end.to_not change { counter.get(labels: { test: 'other' }) } + end end it 'increments the counter by a given value' do expect do - counter.increment({}, 5) + counter.increment(by: 5) end.to change { counter.get }.by(5.0) end it 'raises an ArgumentError on negative increments' do expect do - counter.increment({}, -1) + counter.increment(by: -1) end.to raise_error ArgumentError end diff --git a/spec/prometheus/client/data_stores/direct_file_store_spec.rb b/spec/prometheus/client/data_stores/direct_file_store_spec.rb new file mode 100644 index 00000000..c140546e --- /dev/null +++ b/spec/prometheus/client/data_stores/direct_file_store_spec.rb @@ -0,0 +1,362 @@ +# encoding: UTF-8 + +require 'prometheus/client/data_stores/direct_file_store' +require 'examples/data_store_example' + +describe Prometheus::Client::DataStores::DirectFileStore do + subject { described_class.new(dir: "/tmp/prometheus_test") } + let(:metric_store) { subject.for_metric(:metric_name, metric_type: :counter) } + + # Reset the PStores + before do + Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } + end + + it_behaves_like Prometheus::Client::DataStores + + it "only accepts valid :aggregation values as Metric Settings" do + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { aggregation: Prometheus::Client::DataStores::DirectFileStore::SUM }) + end.not_to raise_error + + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { aggregation: :invalid }) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + end + + it "only accepts valid keys as Metric Settings" do + # the only valid key at the moment is :aggregation + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { some_setting: true }) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + end + + it "only accepts :most_recent aggregation for gauges" do + expect do + subject.for_metric(:metric_name, + metric_type: :gauge, + metric_settings: { aggregation: Prometheus::Client::DataStores::DirectFileStore::MOST_RECENT }) + end.not_to raise_error + + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { aggregation: Prometheus::Client::DataStores::DirectFileStore::MOST_RECENT }) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + + expect do + subject.for_metric(:metric_name, + metric_type: :histogram, + metric_settings: { aggregation: Prometheus::Client::DataStores::DirectFileStore::MOST_RECENT }) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + + expect do + subject.for_metric(:metric_name, + metric_type: :summary, + metric_settings: { aggregation: Prometheus::Client::DataStores::DirectFileStore::MOST_RECENT }) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + end + + it "raises when aggregating if we get to that that point with an invalid aggregation mode" do + # This is basically just for coverage of a safety clause that can never be reached + allow(subject).to receive(:validate_metric_settings) # turn off validation + + metric = subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { aggregation: :invalid }) + metric.increment(labels: {}, by: 1) + + expect do + metric.all_values + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + end + + it "opens the same file twice, if it already exists" do + # Testing this simply for coverage + ms = metric_store + ms.increment(labels: {}, by: 1) + + ms2 = subject.for_metric(:metric_name, metric_type: :counter) + ms2.increment(labels: {}, by: 1) + end + + context "when process is forked" do + it "opens a new internal store to avoid two processes using the same file" do + allow(Process).to receive(:pid).and_return(12345) + metric_store = subject.for_metric(:metric_name, metric_type: :counter) + metric_store.increment(labels: {}, by: 1) + + allow(Process).to receive(:pid).and_return(23456) + metric_store.increment(labels: {}, by: 1) + expect(Dir.glob('/tmp/prometheus_test/*').size).to eq(2) + expect(metric_store.all_values).to eq({} => 2.0) + end + end + + it "coalesces values irrespective of the order of labels" do + metric_store1 = subject.for_metric(:metric_name, metric_type: :counter) + metric_store1.increment(labels: { foo: "1", bar: "1" }, by: 1) + metric_store1.increment(labels: { foo: "1", bar: "2" }, by: 7) + metric_store1.increment(labels: { foo: "2", bar: "1" }, by: 3) + + metric_store1.increment(labels: { foo: "1", bar: "1" }, by: 10) + metric_store1.increment(labels: { bar: "1", foo: "1" }, by: 10) + + expect(metric_store1.all_values).to eq( + { foo: "1", bar: "1" } => 21.0, + { foo: "1", bar: "2" } => 7.0, + { foo: "2", bar: "1" } => 3.0, + ) + end + + context "for a non-gauge metric" do + it "sums values from different processes by default" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric(:metric_name, metric_type: :counter) + metric_store1.set(labels: { foo: "bar" }, val: 1) + metric_store1.set(labels: { foo: "baz" }, val: 7) + metric_store1.set(labels: { foo: "yyy" }, val: 3) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2 = subject.for_metric(:metric_name, metric_type: :counter) + metric_store2.set(labels: { foo: "bar" }, val: 3) + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + expect(metric_store2.all_values).to eq( + { foo: "bar" } => 4.0, + { foo: "baz" } => 9.0, + { foo: "yyy" } => 3.0, + { foo: "zzz" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + end + + context "for a gauge metric" do + it "exposes each process's individual value by default" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :gauge, + ) + metric_store1.set(labels: { foo: "bar" }, val: 1) + metric_store1.set(labels: { foo: "baz" }, val: 7) + metric_store1.set(labels: { foo: "yyy" }, val: 3) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2 = subject.for_metric( + :metric_name, + metric_type: :gauge, + ) + metric_store2.set(labels: { foo: "bar" }, val: 3) + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + expect(metric_store1.all_values).to eq( + { foo: "bar", pid: "12345" } => 1.0, + { foo: "bar", pid: "23456" } => 3.0, + { foo: "baz", pid: "12345" } => 7.0, + { foo: "baz", pid: "23456" } => 2.0, + { foo: "yyy", pid: "12345" } => 3.0, + { foo: "zzz", pid: "23456" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + + it "coalesces values irrespective of the order of labels" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric(:metric_name, metric_type: :gauge) + metric_store1.set(labels: { foo: "1", bar: "1" }, val: 1) + metric_store1.set(labels: { foo: "1", bar: "2" }, val: 7) + metric_store1.set(labels: { foo: "2", bar: "1" }, val: 3) + + metric_store1.set(labels: { bar: "1", foo: "1" }, val: 10) + + expect(metric_store1.all_values).to eq( + { foo: "1", bar: "1", pid: "12345" } => 10.0, + { foo: "1", bar: "2", pid: "12345" } => 7.0, + { foo: "2", bar: "1", pid: "12345" } => 3.0, + ) + + end + end + + context "with a metric that takes MAX instead of SUM" do + it "reports the maximum values from different processes" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :max } + ) + metric_store1.set(labels: { foo: "bar" }, val: 1) + metric_store1.set(labels: { foo: "baz" }, val: 7) + metric_store1.set(labels: { foo: "yyy" }, val: 3) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :max } + ) + metric_store2.set(labels: { foo: "bar" }, val: 3) + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + expect(metric_store1.all_values).to eq( + { foo: "bar" } => 3.0, + { foo: "baz" } => 7.0, + { foo: "yyy" } => 3.0, + { foo: "zzz" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + end + + context "with a metric that takes MIN instead of SUM" do + it "reports the minimum values from different processes" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :min } + ) + metric_store1.set(labels: { foo: "bar" }, val: 1) + metric_store1.set(labels: { foo: "baz" }, val: 7) + metric_store1.set(labels: { foo: "yyy" }, val: 3) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :min } + ) + metric_store2.set(labels: { foo: "bar" }, val: 3) + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + expect(metric_store1.all_values).to eq( + { foo: "bar" } => 1.0, + { foo: "baz" } => 2.0, + { foo: "yyy" } => 3.0, + { foo: "zzz" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + end + + context "with a metric that takes ALL instead of SUM" do + it "reports all the values from different processes" do + allow(Process).to receive(:pid).and_return(12345) + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :counter, + metric_settings: { aggregation: :all } + ) + metric_store1.set(labels: { foo: "bar" }, val: 1) + metric_store1.set(labels: { foo: "baz" }, val: 7) + metric_store1.set(labels: { foo: "yyy" }, val: 3) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2 = subject.for_metric( + :metric_name, + metric_type: :counter, + metric_settings: { aggregation: :all } + ) + metric_store2.set(labels: { foo: "bar" }, val: 3) + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + expect(metric_store1.all_values).to eq( + { foo: "bar", pid: "12345" } => 1.0, + { foo: "bar", pid: "23456" } => 3.0, + { foo: "baz", pid: "12345" } => 7.0, + { foo: "baz", pid: "23456" } => 2.0, + { foo: "yyy", pid: "12345" } => 3.0, + { foo: "zzz", pid: "23456" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + end + + context "with a metric that takes MOST_RECENT instead of SUM" do + it "reports the most recently written value from different processes" do + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :most_recent } + ) + metric_store2 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :most_recent } + ) + + allow(Process).to receive(:pid).and_return(12345) + metric_store1.set(labels: { foo: "bar" }, val: 1) + + allow(Process).to receive(:pid).and_return(23456) + metric_store2.set(labels: { foo: "bar" }, val: 3) # Supercedes 'bar' in PID 12345 + metric_store2.set(labels: { foo: "baz" }, val: 2) + metric_store2.set(labels: { foo: "zzz" }, val: 1) + + allow(Process).to receive(:pid).and_return(12345) + metric_store1.set(labels: { foo: "baz" }, val: 4) # Supercedes 'baz' in PID 23456 + + expect(metric_store1.all_values).to eq( + { foo: "bar" } => 3.0, + { foo: "baz" } => 4.0, + { foo: "zzz" } => 1.0, + ) + + # Both processes should return the same value + expect(metric_store1.all_values).to eq(metric_store2.all_values) + end + + it "does now allow `increment`, only `set`" do + metric_store1 = subject.for_metric( + :metric_name, + metric_type: :gauge, + metric_settings: { aggregation: :most_recent } + ) + + expect do + metric_store1.increment(labels: {}) + end.to raise_error(Prometheus::Client::DataStores::DirectFileStore::InvalidStoreSettingsError) + end + end + + it "resizes the File if metrics get too big" do + truncate_calls_count = 0 + allow_any_instance_of(Prometheus::Client::DataStores::DirectFileStore::FileMappedDict). + to receive(:resize_file).and_wrap_original do |original_method, *args, &block| + + truncate_calls_count += 1 + original_method.call(*args, &block) + end + + really_long_string = "a" * 500_000 + 10.times do |i| + metric_store.set(labels: { foo: "#{ really_long_string }#{ i }" }, val: 1) + end + + expect(truncate_calls_count).to be >= 3 + end +end diff --git a/spec/prometheus/client/data_stores/single_threaded_spec.rb b/spec/prometheus/client/data_stores/single_threaded_spec.rb new file mode 100644 index 00000000..681eeb1e --- /dev/null +++ b/spec/prometheus/client/data_stores/single_threaded_spec.rb @@ -0,0 +1,19 @@ +# encoding: UTF-8 + +require 'prometheus/client/data_stores/single_threaded' +require 'examples/data_store_example' + +describe Prometheus::Client::DataStores::SingleThreaded do + subject { described_class.new } + let(:metric_store) { subject.for_metric(:metric_name, metric_type: :counter) } + + it_behaves_like Prometheus::Client::DataStores + + it "does not accept Metric Settings" do + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { some_setting: true }) + end.to raise_error(Prometheus::Client::DataStores::SingleThreaded::InvalidStoreSettingsError) + end +end diff --git a/spec/prometheus/client/data_stores/synchronized_spec.rb b/spec/prometheus/client/data_stores/synchronized_spec.rb new file mode 100644 index 00000000..b69bdfa2 --- /dev/null +++ b/spec/prometheus/client/data_stores/synchronized_spec.rb @@ -0,0 +1,19 @@ +# encoding: UTF-8 + +require 'prometheus/client/data_stores/synchronized' +require 'examples/data_store_example' + +describe Prometheus::Client::DataStores::Synchronized do + subject { described_class.new } + let(:metric_store) { subject.for_metric(:metric_name, metric_type: :counter) } + + it_behaves_like Prometheus::Client::DataStores + + it "does not accept Metric Settings" do + expect do + subject.for_metric(:metric_name, + metric_type: :counter, + metric_settings: { some_setting: true }) + end.to raise_error(Prometheus::Client::DataStores::Synchronized::InvalidStoreSettingsError) + end +end diff --git a/spec/prometheus/client/formats/text_spec.rb b/spec/prometheus/client/formats/text_spec.rb index 60aa0c60..d200ec1c 100644 --- a/spec/prometheus/client/formats/text_spec.rb +++ b/spec/prometheus/client/formats/text_spec.rb @@ -1,71 +1,54 @@ # encoding: UTF-8 +require 'prometheus/client' +require 'prometheus/client/registry' require 'prometheus/client/formats/text' describe Prometheus::Client::Formats::Text do - let(:summary_value) do - { 0.5 => 4.2, 0.9 => 8.32, 0.99 => 15.3 }.tap do |value| - allow(value).to receive_messages(sum: 1243.21, total: 93) - end + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new end - let(:histogram_value) do - { 10 => 1.0, 20 => 2.0, 30 => 2.0 }.tap do |value| - allow(value).to receive_messages(sum: 15.2, total: 2.0) - end - end + let(:registry) { Prometheus::Client::Registry.new } + + before do + foo = registry.counter(:foo, + docstring: 'foo description', + labels: [:umlauts, :utf, :code], + preset_labels: {umlauts: 'Björn', utf: '佖佥'}) + foo.increment(labels: { code: 'red'}, by: 42) + foo.increment(labels: { code: 'green'}, by: 3.14E42) + foo.increment(labels: { code: 'blue'}, by: 1.23e-45) + + + bar = registry.gauge(:bar, + docstring: "bar description\nwith newline", + labels: [:status, :code]) + bar.set(15, labels: { status: 'success', code: 'pink'}) + + + baz = registry.counter(:baz, + docstring: 'baz "description" \\escaping', + labels: [:text]) + baz.increment(labels: { text: "with \"quotes\", \\escape \n and newline" }, by: 15.0) + + + qux = registry.summary(:qux, + docstring: 'qux description', + labels: [:for, :code], + preset_labels: { for: 'sake', code: '1' }) + 92.times { qux.observe(0) } + qux.observe(1243.21) + - let(:registry) do - metrics = [ - double( - name: :foo, - docstring: 'foo description', - base_labels: { umlauts: 'Björn', utf: '佖佥' }, - type: :counter, - values: { - { code: 'red' } => 42.0, - { code: 'green' } => 3.14E42, - { code: 'blue' } => -1.23e-45, - }, - ), - double( - name: :bar, - docstring: "bar description\nwith newline", - base_labels: { status: 'success' }, - type: :gauge, - values: { - { code: 'pink' } => 15.0, - }, - ), - double( - name: :baz, - docstring: 'baz "description" \\escaping', - base_labels: {}, - type: :counter, - values: { - { text: "with \"quotes\", \\escape \n and newline" } => 15.0, - }, - ), - double( - name: :qux, - docstring: 'qux description', - base_labels: { for: 'sake' }, - type: :summary, - values: { - { code: '1' } => summary_value, - }, - ), - double( - name: :xuq, - docstring: 'xuq description', - base_labels: {}, - type: :histogram, - values: { - { code: 'ah' } => histogram_value, - }, - ), - ] - double(metrics: metrics) + xuq = registry.histogram(:xuq, + docstring: 'xuq description', + labels: [:code], + preset_labels: {code: 'ah'}, + buckets: [10, 20, 30]) + xuq.observe(12) + xuq.observe(3.2) end describe '.marshal' do @@ -75,7 +58,7 @@ # HELP foo foo description foo{umlauts="Björn",utf="佖佥",code="red"} 42.0 foo{umlauts="Björn",utf="佖佥",code="green"} 3.14e+42 -foo{umlauts="Björn",utf="佖佥",code="blue"} -1.23e-45 +foo{umlauts="Björn",utf="佖佥",code="blue"} 1.23e-45 # TYPE bar gauge # HELP bar bar description\nwith newline bar{status="success",code="pink"} 15.0 @@ -84,11 +67,8 @@ baz{text="with \"quotes\", \\escape \n and newline"} 15.0 # TYPE qux summary # HELP qux qux description -qux{for="sake",code="1",quantile="0.5"} 4.2 -qux{for="sake",code="1",quantile="0.9"} 8.32 -qux{for="sake",code="1",quantile="0.99"} 15.3 qux_sum{for="sake",code="1"} 1243.21 -qux_count{for="sake",code="1"} 93 +qux_count{for="sake",code="1"} 93.0 # TYPE xuq histogram # HELP xuq xuq description xuq_bucket{code="ah",le="10"} 1.0 diff --git a/spec/prometheus/client/gauge_spec.rb b/spec/prometheus/client/gauge_spec.rb index d3e092bb..417daad2 100644 --- a/spec/prometheus/client/gauge_spec.rb +++ b/spec/prometheus/client/gauge_spec.rb @@ -1,42 +1,80 @@ # encoding: UTF-8 +require 'prometheus/client' require 'prometheus/client/gauge' require 'examples/metric_example' describe Prometheus::Client::Gauge do - let(:gauge) { Prometheus::Client::Gauge.new(:foo, 'foo description') } + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:expected_labels) { [] } - it_behaves_like Prometheus::Client::Metric do - let(:type) { NilClass } + let(:gauge) do + Prometheus::Client::Gauge.new(:foo, + docstring: 'foo description', + labels: expected_labels) end + it_behaves_like Prometheus::Client::Metric + describe '#set' do it 'sets a metric value' do expect do - gauge.set({}, 42) - end.to change { gauge.get }.from(nil).to(42) + gauge.set(42) + end.to change { gauge.get }.from(0).to(42) end - it 'sets a metric value for a given label set' do - expect do + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'sets a metric value for a given label set' do expect do - gauge.set({ test: 'value' }, 42) - end.to change { gauge.get(test: 'value') }.from(nil).to(42) - end.to_not change { gauge.get } + expect do + gauge.set(42, labels: { test: 'value' }) + end.to change { gauge.get(labels: { test: 'value' }) }.from(0).to(42) + end.to_not change { gauge.get(labels: { test: 'other' }) } + end end context 'given an invalid value' do it 'raises an ArgumentError' do expect do - gauge.set({}, nil) + gauge.set(nil) end.to raise_exception(ArgumentError) end end end + describe '#set_to_current_time' do + it 'it sets the gauge to the current Unix epoch time' do + Timecop.freeze(Time.at(12345.1)) do + expect do + gauge.set_to_current_time + end.to change { gauge.get }.from(0).to(12345.1) + end + end + + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'sets a metric value for a given label set' do + Timecop.freeze(Time.at(12345.1)) do + expect do + expect do + gauge.set_to_current_time(labels: { test: 'value' }) + end.to change { gauge.get(labels: { test: 'value' }) }.from(0).to(12345.1) + end.to_not change { gauge.get(labels: { test: 'other' }) } + end + end + end + end + describe '#increment' do before do - gauge.set(RSpec.current_example.metadata[:labels] || {}, 0) + gauge.set(0, labels: RSpec.current_example.metadata[:labels] || {}) end it 'increments the gauge' do @@ -45,17 +83,21 @@ end.to change { gauge.get }.by(1.0) end - it 'increments the gauge for a given label set', labels: { test: 'one' } do - expect do + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'increments the gauge for a given label set', labels: { test: 'one' } do expect do - gauge.increment(test: 'one') - end.to change { gauge.get(test: 'one') }.by(1.0) - end.to_not change { gauge.get(test: 'another') } + expect do + gauge.increment(labels: { test: 'one' }) + end.to change { gauge.get(labels: { test: 'one' }) }.by(1.0) + end.to_not change { gauge.get(labels: { test: 'another' }) } + end end it 'increments the gauge by a given value' do expect do - gauge.increment({}, 5) + gauge.increment(by: 5) end.to change { gauge.get }.by(5.0) end @@ -76,26 +118,30 @@ describe '#decrement' do before do - gauge.set(RSpec.current_example.metadata[:labels] || {}, 0) + gauge.set(0, labels: RSpec.current_example.metadata[:labels] || {}) end - it 'increments the gauge' do + it 'decrements the gauge' do expect do gauge.decrement end.to change { gauge.get }.by(-1.0) end - it 'decrements the gauge for a given label set', labels: { test: 'one' } do - expect do + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'decrements the gauge for a given label set', labels: { test: 'one' } do expect do - gauge.decrement(test: 'one') - end.to change { gauge.get(test: 'one') }.by(-1.0) - end.to_not change { gauge.get(test: 'another') } + expect do + gauge.decrement(labels: { test: 'one' }) + end.to change { gauge.get(labels: { test: 'one' }) }.by(-1.0) + end.to_not change { gauge.get(labels: { test: 'another' }) } + end end it 'decrements the gauge by a given value' do expect do - gauge.decrement({}, 5) + gauge.decrement(by: 5) end.to change { gauge.get }.by(-5.0) end diff --git a/spec/prometheus/client/histogram_spec.rb b/spec/prometheus/client/histogram_spec.rb index cee7dbae..5335e8be 100644 --- a/spec/prometheus/client/histogram_spec.rb +++ b/spec/prometheus/client/histogram_spec.rb @@ -1,72 +1,221 @@ # encoding: UTF-8 +require 'prometheus/client' require 'prometheus/client/histogram' require 'examples/metric_example' describe Prometheus::Client::Histogram do - let(:histogram) do - described_class.new(:bar, 'bar description', {}, [2.5, 5, 10]) + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new end - it_behaves_like Prometheus::Client::Metric do - let(:type) { Hash } + let(:expected_labels) { [] } + + let(:histogram) do + described_class.new(:bar, + docstring: 'bar description', + labels: expected_labels, + buckets: [2.5, 5, 10]) end + it_behaves_like Prometheus::Client::Metric + describe '#initialization' do it 'raise error for unsorted buckets' do expect do - described_class.new(:bar, 'bar description', {}, [5, 2.5, 10]) + described_class.new(:bar, docstring: 'bar description', buckets: [5, 2.5, 10]) end.to raise_error ArgumentError end + + it 'raise error for `le` label' do + expect do + described_class.new(:bar, docstring: 'bar description', labels: [:le]) + end.to raise_error Prometheus::Client::LabelSetValidator::ReservedLabelError + end + end + + describe ".linear_buckets" do + it "generates buckets" do + expect(described_class.linear_buckets(start: 1, width: 2, count: 5)). + to eql([1.0, 3.0, 5.0, 7.0, 9.0]) + end + end + + describe ".exponential_buckets" do + it "generates buckets" do + expect(described_class.exponential_buckets(start: 1, factor: 2, count: 5)). + to eql([1.0, 2.0, 4.0, 8.0, 16.0]) + end end describe '#observe' do it 'records the given value' do expect do - histogram.observe({}, 5) + histogram.observe(5) end.to change { histogram.get } end it 'raise error for le labels' do expect do - histogram.observe({ le: 1 }, 5) - end.to raise_error ArgumentError + histogram.observe(5, labels: { le: 1 }) + end.to raise_error Prometheus::Client::LabelSetValidator::InvalidLabelSetError + end + + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'observes a value for a given label set' do + expect do + expect do + histogram.observe(5, labels: { test: 'value' }) + end.to change { histogram.get(labels: { test: 'value' }) } + end.to_not change { histogram.get(labels: { test: 'other' }) } + end end end describe '#get' do + let(:expected_labels) { [:foo] } + before do - histogram.observe({ foo: 'bar' }, 3) - histogram.observe({ foo: 'bar' }, 5.2) - histogram.observe({ foo: 'bar' }, 13) - histogram.observe({ foo: 'bar' }, 4) + histogram.observe(3, labels: { foo: 'bar' }) + histogram.observe(5.2, labels: { foo: 'bar' }) + histogram.observe(13, labels: { foo: 'bar' }) + histogram.observe(4, labels: { foo: 'bar' }) end it 'returns a set of buckets values' do - expect(histogram.get(foo: 'bar')).to eql(2.5 => 0.0, 5 => 2.0, 10 => 3.0) + expect(histogram.get(labels: { foo: 'bar' })) + .to eql( + "2.5" => 0.0, "5" => 2.0, "10" => 3.0, "+Inf" => 4.0, "sum" => 25.2 + ) end - it 'returns a value which responds to #sum and #total' do - value = histogram.get(foo: 'bar') + it 'returns a value which includes sum' do + value = histogram.get(labels: { foo: 'bar' }) - expect(value.sum).to eql(25.2) - expect(value.total).to eql(4.0) + expect(value["sum"]).to eql(25.2) end it 'uses zero as default value' do - expect(histogram.get({})).to eql(2.5 => 0.0, 5 => 0.0, 10 => 0.0) + expect(histogram.get(labels: { foo: '' })).to eql( + "2.5" => 0.0, "5" => 0.0, "10" => 0.0, "+Inf" => 0.0, "sum" => 0.0 + ) end end describe '#values' do + let(:expected_labels) { [:status] } + it 'returns a hash of all recorded summaries' do - histogram.observe({ status: 'bar' }, 3) - histogram.observe({ status: 'foo' }, 6) + histogram.observe(3, labels: { status: 'bar' }) + histogram.observe(6, labels: { status: 'foo' }) + histogram.observe(10, labels: { status: 'baz' }) expect(histogram.values).to eql( - { status: 'bar' } => { 2.5 => 0.0, 5 => 1.0, 10 => 1.0 }, - { status: 'foo' } => { 2.5 => 0.0, 5 => 0.0, 10 => 1.0 }, + { status: 'bar' } => { "2.5" => 0.0, "5" => 1.0, "10" => 1.0, "+Inf" => 1.0, "sum" => 3.0 }, + { status: 'foo' } => { "2.5" => 0.0, "5" => 0.0, "10" => 1.0, "+Inf" => 1.0, "sum" => 6.0 }, + { status: 'baz' } => { "2.5" => 0.0, "5" => 0.0, "10" => 1.0, "+Inf" => 1.0, "sum" => 10.0 }, ) end end + + describe '#init_label_set' do + context "with labels" do + let(:expected_labels) { [:status] } + + it 'initializes the metric for a given label set' do + expect(histogram.values).to eql({}) + + histogram.init_label_set(status: 'bar') + histogram.init_label_set(status: 'foo') + + expect(histogram.values).to eql( + { status: 'bar' } => { "2.5" => 0.0, "5" => 0.0, "10" => 0.0, "+Inf" => 0.0, "sum" => 0.0 }, + { status: 'foo' } => { "2.5" => 0.0, "5" => 0.0, "10" => 0.0, "+Inf" => 0.0, "sum" => 0.0 }, + ) + end + end + + context "without labels" do + it 'automatically initializes the metric' do + expect(histogram.values).to eql( + {} => { "2.5" => 0.0, "5" => 0.0, "10" => 0.0, "+Inf" => 0.0, "sum" => 0.0 }, + ) + end + end + end + + describe '#with_labels' do + let(:expected_labels) { [:foo] } + + it 'pre-sets labels for observations' do + expect { histogram.observe(2) } + .to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError) + expect { histogram.with_labels(foo: 'value').observe(2) }.not_to raise_error + end + + it 'registers `with_labels` observations in the original metric store' do + histogram.observe(7, labels: { foo: 'value1'}) + histogram_with_labels = histogram.with_labels({ foo: 'value2'}) + histogram_with_labels.observe(20) + + expected_values = { + {foo: 'value1'} => {'2.5' => 0.0, '5' => 0.0, '10' => 1.0, '+Inf' => 1.0, 'sum' => 7.0}, + {foo: 'value2'} => {'2.5' => 0.0, '5' => 0.0, '10' => 0.0, '+Inf' => 1.0, 'sum' => 20.0} + } + expect(histogram_with_labels.values).to eql(expected_values) + expect(histogram.values).to eql(expected_values) + end + + context 'when using DirectFileStore' do + before do + Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: '/tmp/prometheus_test') + end + + after do + Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:expected_labels) { [:foo, :bar] } + + # This is a slightly weird test, and largely a duplicate of one in + # spec/prometheus/client/metric_spec.rb. + # + # The reason we have this copy of the test is because histogram.rb + # implements its own fix for the issue this test guards against due to + # having slightly different constructor signature (which gets called in + # `with_labels`). + # + # See the comment in spec/prometheus/client/metric_spec.rb for an + # explanation of what this test is doing and why. + it "doesn't corrupt the data files" do + histogram_with_labels = histogram.with_labels({ foo: 'longervalue'}) + + # Initialize / read the files for both views of the metric + histogram.observe(1, labels: { foo: 'value1', bar: 'zzz'}) + histogram_with_labels.observe(1, labels: {bar: 'zzz'}) + + # After both MetricStores have their files, add a new entry to both + histogram.observe(1, labels: { foo: 'value1', bar: 'aaa'}) # If there's a bug, we partially overwrite { foo: 'longervalue', bar: 'zzz'} + histogram_with_labels.observe(1, labels: {bar: 'aaa'}) # Extend the file so we read past that overwrite + + expect { histogram.values }.not_to raise_error # Check it hasn't corrupted our files + expect { histogram_with_labels.values }.not_to raise_error # Check it hasn't corrupted our files + + expected_values = { + {foo: 'value1', bar: 'zzz'} => {"2.5" => 1.0, "5"=>1.0, "10" => 1.0, "+Inf" => 1.0, "sum"=>1.0}, + {foo: 'value1', bar: 'aaa'} => {"2.5" => 1.0, "5"=>1.0, "10" => 1.0, "+Inf" => 1.0, "sum"=>1.0}, + {foo: 'longervalue', bar: 'zzz'} => {"2.5" => 1.0, "5"=>1.0, "10" => 1.0, "+Inf" => 1.0, "sum"=>1.0}, + {foo: 'longervalue', bar: 'aaa'} => {"2.5" => 1.0, "5"=>1.0, "10" => 1.0, "+Inf" => 1.0, "sum"=>1.0}, + } + + expect(histogram.values).to eql(expected_values) + expect(histogram_with_labels.values).to eql(expected_values) + end + end + end end diff --git a/spec/prometheus/client/label_set_validator_spec.rb b/spec/prometheus/client/label_set_validator_spec.rb index 6098a10d..ffd1387c 100644 --- a/spec/prometheus/client/label_set_validator_spec.rb +++ b/spec/prometheus/client/label_set_validator_spec.rb @@ -3,7 +3,11 @@ require 'prometheus/client/label_set_validator' describe Prometheus::Client::LabelSetValidator do - let(:validator) { Prometheus::Client::LabelSetValidator.new } + let(:expected_labels) { [] } + let(:additional_reserved_labels) { [] } + let(:validator) do + Prometheus::Client::LabelSetValidator.new(expected_labels: expected_labels, reserved_labels: additional_reserved_labels) + end let(:invalid) { Prometheus::Client::LabelSetValidator::InvalidLabelSetError } describe '.new' do @@ -12,58 +16,90 @@ end end - describe '#valid?' do + describe '#validate_symbols!' do it 'returns true for a valid label check' do - expect(validator.valid?(version: 'alpha')).to eql(true) + expect(validator.validate_symbols!(version: 'alpha')).to eql(true) end - it 'raises Invaliddescribed_classError if a label set is not a hash' do + it 'raises InvalidLabelSetError if a label set is not a hash' do expect do - validator.valid?('invalid') + validator.validate_symbols!('invalid') end.to raise_exception invalid end it 'raises InvalidLabelError if a label key is not a symbol' do expect do - validator.valid?('key' => 'value') + validator.validate_symbols!('key' => 'value') end.to raise_exception(described_class::InvalidLabelError) end it 'raises InvalidLabelError if a label key starts with __' do expect do - validator.valid?(__reserved__: 'key') + validator.validate_symbols!(__reserved__: 'key') end.to raise_exception(described_class::ReservedLabelError) end - it 'raises ReservedLabelError if a label key is reserved' do - [:job, :instance].each do |label| - expect do - validator.valid?(label => 'value') - end.to raise_exception(described_class::ReservedLabelError) + it 'raises InvalidLabelError if a label key contains invalid characters' do + expect do + validator.validate_symbols!(:@foo => 'key') + end.to raise_exception(described_class::InvalidLabelError) + end + + context "with only the base set of reserved labels" do + it "doesn't raise ReservedLabelError for the additional reserved label" do + expect { validator.validate_symbols!(additional: 'value') }. + to_not raise_exception + end + + it 'raises ReservedLabelError if a label key is reserved' do + expect { validator.validate_symbols!(pid: 'value') }. + to raise_exception(described_class::ReservedLabelError) + end + end + + context "with an additional reserved label" do + let(:additional_reserved_labels) { [:additional] } + + it 'raises ReservedLabelError if a label key is reserved' do + [:additional, :pid].each do |label| + expect do + validator.validate_symbols!(label => 'value') + end.to raise_exception(described_class::ReservedLabelError) + end end end end - describe '#validate' do + describe '#validate_labelset!' do + let(:expected_labels) { [:method, :code] } + it 'returns a given valid label set' do - hash = { version: 'alpha' } + hash = { method: 'get', code: '200' } - expect(validator.validate(hash)).to eql(hash) + expect(validator.validate_labelset!(hash)).to eql(hash) end - it 'raises an exception if a given label set is not valid' do - input = 'broken' - expect(validator).to receive(:valid?).with(input).and_raise(invalid) + it 'returns an exception if there are malformed labels' do + expect do + validator.validate_labelset!('method' => 'get', :code => '200') + end.to raise_exception(invalid, /keys given: \["method", :code\] vs. keys expected: \[:code, :method\]/) + + end - expect { validator.validate(input) }.to raise_exception(invalid) + it 'raises an exception if there are unexpected labels' do + expect do + validator.validate_labelset!(method: 'get', code: '200', exception: 'NoMethodError') + end.to raise_exception(invalid, /keys given: \[:method, :code, :exception\] vs. keys expected: \[:code, :method\]/) end - it 'raises InvalidLabelSetError for varying label sets' do - validator.validate(method: 'get', code: '200') + it 'raises an exception if there are missing labels' do + expect do + validator.validate_labelset!(method: 'get') + end.to raise_exception(invalid, /keys given: \[:method\] vs. keys expected: \[:code, :method\]/) expect do - validator.validate(method: 'get', exception: 'NoMethodError') - end.to raise_exception(invalid) + validator.validate_labelset!(code: '200') + end.to raise_exception(invalid, /keys given: \[:code\] vs. keys expected: \[:code, :method\]/) end end end diff --git a/spec/prometheus/client/metric_spec.rb b/spec/prometheus/client/metric_spec.rb new file mode 100644 index 00000000..7b84c67d --- /dev/null +++ b/spec/prometheus/client/metric_spec.rb @@ -0,0 +1,244 @@ +# encoding: UTF-8 + +require 'prometheus/client' +require 'prometheus/client/metric' +require 'prometheus/client/data_stores/direct_file_store' + +describe Prometheus::Client::Metric do + let(:test_counter) do + Class.new(Prometheus::Client::Metric) do + def type + :counter + end + + def increment(by: 1, labels: {}) + raise ArgumentError, 'increment must be a non-negative number' if by < 0 + + label_set = label_set_for(labels) + @store.increment(labels: label_set, by: by) + end + end + end + + let(:expected_labels) { [] } + + subject(:counter) do + test_counter.new(:foo, + docstring: 'foo description', + labels: expected_labels) + end + + describe '#get' do + it 'returns the current metric value' do + subject.increment + + expect(subject.get).to eql(1.0) + end + + context "with a subject that expects labels" do + subject { test_counter.new(:foo, docstring: 'Labels', labels: [:test]) } + + it 'returns the current metric value for a given label set' do + subject.increment(labels: { test: 'label' }) + + expect(subject.get(labels: { test: 'label' })).to eql(1.0) + end + end + end + + describe '#increment' do + it 'raises an InvalidLabelSetError if sending unexpected labels' do + expect do + counter.increment(labels: { test: 'label' }) + end.to raise_error Prometheus::Client::LabelSetValidator::InvalidLabelSetError + end + + context "with non-string label values" do + subject { test_counter.new(:foo, docstring: 'Labels', labels: [:foo]) } + + it "converts labels to strings for consistent storage" do + subject.increment(labels: { foo: :label }) + expect(subject.get(labels: { foo: 'label' })).to eq(1.0) + end + + context "and some labels preset" do + subject do + test_counter.new(:foo, + docstring: 'Labels', + labels: [:foo, :bar], + preset_labels: { foo: :label }) + end + + it "converts labels to strings for consistent storage" do + subject.increment(labels: { bar: :label }) + expect(subject.get(labels: { foo: 'label', bar: 'label' })).to eq(1.0) + end + end + end + end + + describe '#init_label_set' do + context "with labels" do + let(:expected_labels) { [:test] } + + it 'initializes the metric for a given label set' do + expect(counter.values).to eql({}) + + counter.init_label_set(test: 'value') + + expect(counter.values).to eql({test: 'value'} => 0.0) + end + end + + context "without labels" do + it 'automatically initializes the metric' do + expect(counter.values).to eql({} => 0.0) + end + end + end + + describe '#with_labels' do + let(:expected_labels) { [:foo] } + + it 'pre-sets labels for observations' do + expect { counter.increment } + .to raise_error(Prometheus::Client::LabelSetValidator::InvalidLabelSetError) + expect { counter.with_labels(foo: 'label').increment }.not_to raise_error + end + + it 'registers `with_labels` observations in the original metric store' do + counter.increment(labels: { foo: 'value1'}) + counter_with_labels = counter.with_labels({ foo: 'value2'}) + counter_with_labels.increment(by: 2) + + expect(counter_with_labels.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0) + expect(counter.values).to eql({foo: 'value1'} => 1.0, {foo: 'value2'} => 2.0) + end + + context 'when using DirectFileStore' do + before do + Dir.glob('/tmp/prometheus_test/*').each { |file| File.delete(file) } + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: '/tmp/prometheus_test') + end + + let(:expected_labels) { [:foo, :bar] } + + # Testing for file corruption: this is weird and complicated, so it needs explaining + # + # Files get corrupted when we have two different instances of `FileMappedDict` + # reading and writing the same file. This corruption is expected; we should never have + # two instances of `FileMappedDict` for the same file. If we do, it's a bug in our client. + # + # To clarify, the bug is that *we ended up with two instances for the same file*, not + # that the instances are now corrupting the file. + # + # This is why we're testing this in `with_labels`. It's the only use case we've found + # were we ended up with two instances (before we fixed that bug). `with_labels` is + # incidental, if we find another way to get "duplicate" instances, we should add this + # same exact test, except for the first line, where we need to instead reproduce + # whatever bug gets us that second instance. + # + # The first thing we need to understand is why having two instances of `FileMappedDict` + # corrupts the files: + # + # `FileMappedDict` keeps track, in an internal variable, of how many bytes in the file + # have been used. When adding a new "entry" (observing a new labelset), it serializes + # it and adds it at "the end" (according to its internal byte counter), and it also updates + # the counter at the beginning of the file. However, it never re-reads that counter + # from the file, because there shouldn't be any reason for it to have changed. + # + # If there are two instances pointing to the same file, initially they will both + # share that internal counter, as they do the first read of the file, but if then + # each of them adds an entry, their internal "length" counters will disagree, and + # they'll start overwriting each other's entries. + # + # Importantly, if all of the entries happen to have the same length, it will be "fine". + # Some of the labelsets will effectively disappear, but there will be no corruption, + # because all the important things will fall in the right offsets by pure chance. This + # would be very rare in production, but in a test, it's what normally happens because + # we set all labels to "foo", "bar", etc. This is the reason for "longervalue" below, + # we need to have different labelset lenghts to reproduce the corruption. + # + # With this background about the internals, we can now get to why the specific sequence of + # steps below ends up in corrupted files. + # + # For this to make sense, i'll need to describe the contents of the file at each step. + # I'll represent it like this: `27|labelset1,value1|labelset2,value2|labelset3,value3|` + # + # These are not the bytes we store in the file, but conceptually it's equivalent, + # with two caveats: + # - The counter at the beginning (27 == 3 * 9) here shows the combined length of labelsets. + # It'd normally also include the length of values, but doing that makes this explanation + # much harder to follow. + # - Each entry also starts with a 4-byte int specifying the length of its labelset, so + # we know how much to read. Again, I'm omitting that for readability. + # + # + # Steps to reproduce: + # - We declare `counter` and `counter_with_labels` as a clone. Neither has read the file. + # - We increment `counter`, which creates the file and adds the entry ("labelset1") + # - File: `9|labelset1,value1|` + # - We increment `counter_with_labels`, which reads the file, and adds the new entry + # to it ("muchlongerlabelset2"). + # - File: `28|labelset1,value1|muchlongerlabelset2, value2|` + # - `counter` and `counter_with_labels` now disagree about the length of this file + # (`counter` doesn't know the file has grown). + # - We now add a new entry to `counter` ("labelset3"), which thinks the file is shorter + # than it actually is. + # - File: `18|labelset1,value1|labelset3,value3|et2, value2|` + # - The initial counter reflects both labelsets for `counter`; then we have those + # labelsetsp; and finally some "garbage" after the "end" (the garbage is the + # last few bytes of the much longer entry added before by `counter_with_labels`) + # - so far, though, we're still good. If you read the file, all entries are "fine", + # because you're only reading up to the "18" length specified at the beginning. + # - for the problem to manifest itself, we need to increment that counter at the + # beginning, so we'll read the garbage. **BUT**, if we add a new labelset to + # `counter`, it'll overwrite the "garbage" with good data, and the file will + # continue to be fine. + # - We add a new entry to `counter_with_labels`. This updates the length counter at + # the beginning of the file. + # - File: `47|labelset1,value1|labelset3,value3|et2, value2|muchlongerlabelset4, value4|` + # + # - Now the file is properly corrupted. When reading it, `FileMappedDict` sees: + # - labelset1,value1 (cool) + # - labelset3,value3 (cool) + # - et2, value2 (boom) + # |-> the beginning of this entry is garbage because we're actually at the middle + # of an entry, not a beginning. + # + # What actually breaks is that each of these entries is expected to have, at their + # beginning, the length in bytes of its labelset, so we know how much to read. + # Now we have garbage in that position, and `FileMappedDict` will either: + # - Try to interpret those four bytes as a long, get an invalid result. + # - Try to read an invalid amount of data (maybe a negative amount). + # - After reading the labelset, try to read the float and go past the end of the file + # - Actually read what it thinks is a float, try to `unpack` it, and fail because + # it's actually garbage. + # - I'm sure there are other fun ways for it to fail. + it "doesn't corrupt the data files" do + counter_with_labels = counter.with_labels({ foo: 'longervalue'}) + + # Initialize / read the files for both views of the metric + counter.increment(labels: { foo: 'value1', bar: 'zzz'}) + counter_with_labels.increment(by: 2, labels: {bar: 'zzz'}) + + # After both MetricStores have their files, add a new entry to both + counter.increment(labels: { foo: 'value1', bar: 'aaa'}) # If there's a bug, we partially overwrite { foo: 'longervalue', bar: 'zzz'} + counter_with_labels.increment(by: 2, labels: {bar: 'aaa'}) # Extend the file so we read past that overwrite + + expect { counter.values }.not_to raise_error # Check it hasn't corrupted our files + expect { counter_with_labels.values }.not_to raise_error # Check it hasn't corrupted our files + + expected_values = { + {foo: 'value1', bar: 'zzz'} => 1.0, + {foo: 'value1', bar: 'aaa'} => 1.0, + {foo: 'longervalue', bar: 'zzz'} => 2.0, + {foo: 'longervalue', bar: 'aaa'} => 2.0, + } + + expect(counter.values).to eql(expected_values) + expect(counter_with_labels.values).to eql(expected_values) + end + end + end +end diff --git a/spec/prometheus/client/push_spec.rb b/spec/prometheus/client/push_spec.rb index cffe6a6b..2f0b70a5 100644 --- a/spec/prometheus/client/push_spec.rb +++ b/spec/prometheus/client/push_spec.rb @@ -1,10 +1,13 @@ # encoding: UTF-8 +require 'prometheus/client/gauge' require 'prometheus/client/push' describe Prometheus::Client::Push do - let(:registry) { Prometheus::Client.registry } - let(:push) { Prometheus::Client::Push.new('test-job') } + let(:gateway) { 'http://localhost:9091' } + let(:registry) { Prometheus::Client::Registry.new } + let(:grouping_key) { {} } + let(:push) { Prometheus::Client::Push.new(job: 'test-job', gateway: gateway, grouping_key: grouping_key, open_timeout: 5, read_timeout: 30) } describe '.new' do it 'returns a new push instance' do @@ -12,102 +15,337 @@ end it 'uses localhost as default Pushgateway' do + push = Prometheus::Client::Push.new(job: 'test-job') + expect(push.gateway).to eql('http://localhost:9091') end it 'allows to specify a custom Pushgateway' do - push = Prometheus::Client::Push.new('test-job', nil, 'http://pu.sh:1234') + push = Prometheus::Client::Push.new(job: 'test-job', gateway: 'http://pu.sh:1234') expect(push.gateway).to eql('http://pu.sh:1234') end + it 'raises an ArgumentError if the job is nil' do + expect do + Prometheus::Client::Push.new(job: nil) + end.to raise_error ArgumentError + end + + it 'raises an ArgumentError if the job is empty' do + expect do + Prometheus::Client::Push.new(job: "") + end.to raise_error ArgumentError + end + it 'raises an ArgumentError if the given gateway URL is invalid' do ['inva.lid:1233', 'http://[invalid]'].each do |url| expect do - Prometheus::Client::Push.new('test-job', nil, url) + Prometheus::Client::Push.new(job: 'test-job', gateway: url) end.to raise_error ArgumentError end end + + it 'raises InvalidLabelError if a grouping key label has an invalid name' do + expect do + Prometheus::Client::Push.new(job: "test-job", grouping_key: { "not_a_symbol" => "foo" }) + end.to raise_error Prometheus::Client::LabelSetValidator::InvalidLabelError + end end - describe '#path' do - it 'uses the default metrics path if no instance value given' do - push = Prometheus::Client::Push.new('test-job') + describe '#add' do + it 'sends a given registry to via HTTP POST' do + expect(push).to receive(:request).with(Net::HTTP::Post, registry) - expect(push.path).to eql('/metrics/jobs/test-job') + push.add(registry) end + end - it 'uses the full metrics path if an instance value is given' do - push = Prometheus::Client::Push.new('bar-job', 'foo') + describe '#replace' do + it 'sends a given registry to via HTTP PUT' do + expect(push).to receive(:request).with(Net::HTTP::Put, registry) - expect(push.path).to eql('/metrics/jobs/bar-job/instances/foo') + push.replace(registry) end + end - it 'escapes non-URL characters' do - push = Prometheus::Client::Push.new('bar job', 'foo ') + describe '#delete' do + it 'deletes existing metrics with HTTP DELETE' do + expect(push).to receive(:request).with(Net::HTTP::Delete) - expected = '/metrics/jobs/bar%20job/instances/foo%20%3Cmy%20instance%3E' - expect(push.path).to eql(expected) + push.delete end end - describe '#add' do - it 'pushes a given registry to the configured Pushgateway via HTTP' do - http = double(:http) - expect(http).to receive(:send_request).with( - 'POST', - '/metrics/jobs/foo/instances/bar', - Prometheus::Client::Formats::Text.marshal(registry), - 'Content-Type' => Prometheus::Client::Formats::Text::CONTENT_TYPE, + describe '#path' do + it 'uses the default metrics path if no grouping key given' do + push = Prometheus::Client::Push.new(job: 'test-job') + + expect(push.path).to eql('/metrics/job/test-job') + end + + it 'appends additional grouping labels to the path if specified' do + push = Prometheus::Client::Push.new( + job: 'test-job', + grouping_key: { foo: "bar", baz: "qux"}, ) - expect(http).to receive(:use_ssl=).with(false) - expect(Net::HTTP).to receive(:new).with('pu.sh', 9091).and_return(http) - described_class.new('foo', 'bar', 'http://pu.sh:9091').add(registry) + expect(push.path).to eql('/metrics/job/test-job/foo/bar/baz/qux') end - it 'pushes a given registry to the configured Pushgateway via HTTPS' do - http = double(:http) - expect(http).to receive(:send_request).with( - 'POST', - '/metrics/jobs/foo/instances/bar', - Prometheus::Client::Formats::Text.marshal(registry), - 'Content-Type' => Prometheus::Client::Formats::Text::CONTENT_TYPE, + it 'converts non-string job names to strings' do + push = Prometheus::Client::Push.new( + job: :foo, + ) + + expect(push.path).to eql('/metrics/job/foo') + end + + it 'converts non-string grouping labels to strings' do + push = Prometheus::Client::Push.new( + job: 'test-job', + grouping_key: { foo: :bar, baz: :qux}, + ) + + expect(push.path).to eql('/metrics/job/test-job/foo/bar/baz/qux') + end + + it 'encodes the job name in url-safe base64 if it contains `/`' do + push = Prometheus::Client::Push.new( + job: 'foo/test-job', + ) + + expect(push.path).to eql('/metrics/job@base64/Zm9vL3Rlc3Qtam9i') + end + + it 'encodes grouping key label values containing `/` in url-safe base64' do + push = Prometheus::Client::Push.new( + job: 'test-job', + grouping_key: { foo: "bar/baz"}, + ) + + expect(push.path).to eql('/metrics/job/test-job/foo@base64/YmFyL2Jheg==') + end + + it 'encodes empty grouping key label values as a single base64 padding character' do + push = Prometheus::Client::Push.new( + job: 'test-job', + grouping_key: { foo: ""}, ) - expect(http).to receive(:use_ssl=).with(true) - expect(Net::HTTP).to receive(:new).with('pu.sh', 9091).and_return(http) - described_class.new('foo', 'bar', 'https://pu.sh:9091').add(registry) + expect(push.path).to eql('/metrics/job/test-job/foo@base64/=') + end + + it 'URL-encodes all other non-URL-safe characters' do + push = Prometheus::Client::Push.new(job: '', grouping_key: { foo_label: '' }) + + expected = '/metrics/job/%3Cbar%20job%3E/foo_label/%3Cbar%20value%3E' + expect(push.path).to eql(expected) end end - describe '#replace' do - it 'replaces any existing metrics with registry' do - http = double(:http) - expect(http).to receive(:send_request).with( - 'PUT', - '/metrics/jobs/foo/instances/bar', - Prometheus::Client::Formats::Text.marshal(registry), - 'Content-Type' => Prometheus::Client::Formats::Text::CONTENT_TYPE, + describe '#request' do + let(:content_type) { Prometheus::Client::Formats::Text::CONTENT_TYPE } + let(:data) { Prometheus::Client::Formats::Text.marshal(registry) } + let(:uri) { URI.parse("#{gateway}/metrics/job/test-job") } + let(:response) do + double( + :response, + code: '200', + message: 'OK', + body: 'Everything worked' ) + end + + it 'sends marshalled registry to the specified gateway' do + request = double(:request) + expect(request).to receive(:content_type=).with(content_type) + expect(request).to receive(:body=).with(data) + expect(Net::HTTP::Post).to receive(:new).with(uri).and_return(request) + + http = double(:http) expect(http).to receive(:use_ssl=).with(false) - expect(Net::HTTP).to receive(:new).with('pu.sh', 9091).and_return(http) + expect(http).to receive(:open_timeout=).with(5) + expect(http).to receive(:read_timeout=).with(30) + expect(http).to receive(:request).with(request).and_return(response) + expect(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) - described_class.new('foo', 'bar', 'http://pu.sh:9091').replace(registry) + push.send(:request, Net::HTTP::Post, registry) end - end - describe '#delete' do - it 'deletes existing metrics from the configured Pushgateway' do + context 'for a 3xx response' do + let(:response) do + double( + :response, + code: '301', + message: 'Moved Permanently', + body: 'Probably no body, but technically you can return one' + ) + end + + it 'raises a redirect error' do + request = double(:request) + allow(request).to receive(:content_type=) + allow(request).to receive(:body=) + allow(Net::HTTP::Post).to receive(:new).with(uri).and_return(request) + + http = double(:http) + allow(http).to receive(:use_ssl=) + allow(http).to receive(:open_timeout=) + allow(http).to receive(:read_timeout=) + allow(http).to receive(:request).with(request).and_return(response) + allow(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + expect { push.send(:request, Net::HTTP::Post, registry) }.to raise_error( + Prometheus::Client::Push::HttpRedirectError + ) + end + end + + context 'for a 4xx response' do + let(:response) do + double( + :response, + code: '400', + message: 'Bad Request', + body: 'Info on why the request was bad' + ) + end + + it 'raises a client error' do + request = double(:request) + allow(request).to receive(:content_type=) + allow(request).to receive(:body=) + allow(Net::HTTP::Post).to receive(:new).with(uri).and_return(request) + + http = double(:http) + allow(http).to receive(:use_ssl=) + allow(http).to receive(:open_timeout=) + allow(http).to receive(:read_timeout=) + allow(http).to receive(:request).with(request).and_return(response) + allow(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + expect { push.send(:request, Net::HTTP::Post, registry) }.to raise_error( + Prometheus::Client::Push::HttpClientError + ) + end + end + + context 'for a 5xx response' do + let(:response) do + double( + :response, + code: '500', + message: 'Internal Server Error', + body: 'Apology for the server code being broken' + ) + end + + it 'raises a server error' do + request = double(:request) + allow(request).to receive(:content_type=) + allow(request).to receive(:body=) + allow(Net::HTTP::Post).to receive(:new).with(uri).and_return(request) + + http = double(:http) + allow(http).to receive(:use_ssl=) + allow(http).to receive(:open_timeout=) + allow(http).to receive(:read_timeout=) + allow(http).to receive(:request).with(request).and_return(response) + allow(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + expect { push.send(:request, Net::HTTP::Post, registry) }.to raise_error( + Prometheus::Client::Push::HttpServerError + ) + end + end + + it 'deletes data from the registry' do + request = double(:request) + expect(request).to receive(:content_type=).with(content_type) + expect(Net::HTTP::Delete).to receive(:new).with(uri).and_return(request) + http = double(:http) - expect(http).to receive(:send_request).with( - 'DELETE', - '/metrics/jobs/foo/instances/bar', - ) expect(http).to receive(:use_ssl=).with(false) - expect(Net::HTTP).to receive(:new).with('pu.sh', 9091).and_return(http) + expect(http).to receive(:open_timeout=).with(5) + expect(http).to receive(:read_timeout=).with(30) + expect(http).to receive(:request).with(request).and_return(response) + expect(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + push.send(:request, Net::HTTP::Delete) + end + + context 'HTTPS support' do + let(:gateway) { 'https://localhost:9091' } + + it 'uses HTTPS when requested' do + request = double(:request) + expect(request).to receive(:content_type=).with(content_type) + expect(request).to receive(:body=).with(data) + expect(Net::HTTP::Post).to receive(:new).with(uri).and_return(request) + + http = double(:http) + expect(http).to receive(:use_ssl=).with(true) + expect(http).to receive(:open_timeout=).with(5) + expect(http).to receive(:read_timeout=).with(30) + expect(http).to receive(:request).with(request).and_return(response) + expect(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + push.send(:request, Net::HTTP::Post, registry) + end + end + + context 'Basic Auth support' do + context 'when credentials are passed in the gateway URL' do + let(:gateway) { 'https://super:secret@localhost:9091' } + + it "raises an ArgumentError explaining why we don't support that mechanism" do + expect { push }.to raise_error ArgumentError, /in the gateway URL.*username `super`/m + end + end - described_class.new('foo', 'bar', 'http://pu.sh:9091').delete + context 'when credentials are passed to the separate `basic_auth` method' do + let(:gateway) { 'https://localhost:9091' } + + it 'passes the credentials on to the HTTP client' do + request = double(:request) + expect(request).to receive(:content_type=).with(content_type) + expect(request).to receive(:basic_auth).with('super', 'secret') + expect(request).to receive(:body=).with(data) + expect(Net::HTTP::Put).to receive(:new).with(uri).and_return(request) + + http = double(:http) + expect(http).to receive(:use_ssl=).with(true) + expect(http).to receive(:open_timeout=).with(5) + expect(http).to receive(:read_timeout=).with(30) + expect(http).to receive(:request).with(request).and_return(response) + expect(Net::HTTP).to receive(:new).with('localhost', 9091).and_return(http) + + push.basic_auth("super", "secret") + + push.send(:request, Net::HTTP::Put, registry) + end + end + end + + context 'with a grouping key that clashes with a metric label' do + let(:grouping_key) { { foo: "bar"} } + + before do + gauge = Prometheus::Client::Gauge.new( + :test_gauge, + labels: [:foo], + docstring: "test docstring" + ) + registry.register(gauge) + gauge.set(42, labels: { foo: "bar" }) + end + + it 'raises an error when grouping key labels conflict with metric labels' do + expect { push.send(:request, Net::HTTP::Post, registry) }.to raise_error( + Prometheus::Client::LabelSetValidator::InvalidLabelSetError + ) + end end end end diff --git a/spec/prometheus/client/registry_spec.rb b/spec/prometheus/client/registry_spec.rb index 9b589c38..3c4da190 100644 --- a/spec/prometheus/client/registry_spec.rb +++ b/spec/prometheus/client/registry_spec.rb @@ -33,10 +33,6 @@ mutex = Mutex.new containers = [] - def registry.exist?(*args) - super.tap { sleep(0.01) } - end - Array.new(5) do Thread.new do result = begin @@ -52,9 +48,23 @@ def registry.exist?(*args) end end + describe '#unregister' do + it 'unregister a registered metric' do + registry.register(double(name: :test)) + registry.unregister(:test) + expect(registry.exist?(:test)).to eql(false) + end + + it "doesn't raise when unregistering a not registered metrics" do + expect do + registry.unregister(:test) + end.not_to raise_error + end + end + describe '#counter' do it 'registers a new counter metric container and returns the counter' do - metric = registry.counter(:test, 'test docstring') + metric = registry.counter(:test, docstring: 'test docstring') expect(metric).to be_a(Prometheus::Client::Counter) end @@ -62,7 +72,7 @@ def registry.exist?(*args) describe '#gauge' do it 'registers a new gauge metric container and returns the gauge' do - metric = registry.gauge(:test, 'test docstring') + metric = registry.gauge(:test, docstring: 'test docstring') expect(metric).to be_a(Prometheus::Client::Gauge) end @@ -70,7 +80,7 @@ def registry.exist?(*args) describe '#summary' do it 'registers a new summary metric container and returns the summary' do - metric = registry.summary(:test, 'test docstring') + metric = registry.summary(:test, docstring: 'test docstring') expect(metric).to be_a(Prometheus::Client::Summary) end @@ -78,7 +88,7 @@ def registry.exist?(*args) describe '#histogram' do it 'registers a new histogram metric container and returns the histogram' do - metric = registry.histogram(:test, 'test docstring') + metric = registry.histogram(:test, docstring: 'test docstring') expect(metric).to be_a(Prometheus::Client::Histogram) end diff --git a/spec/prometheus/client/summary_spec.rb b/spec/prometheus/client/summary_spec.rb index 627039a5..ba02ad36 100644 --- a/spec/prometheus/client/summary_spec.rb +++ b/spec/prometheus/client/summary_spec.rb @@ -1,56 +1,114 @@ # encoding: UTF-8 +require 'prometheus/client' require 'prometheus/client/summary' require 'examples/metric_example' describe Prometheus::Client::Summary do - let(:summary) { Prometheus::Client::Summary.new(:bar, 'bar description') } + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + + let(:expected_labels) { [] } + + let(:summary) do + Prometheus::Client::Summary.new(:bar, + docstring: 'bar description', + labels: expected_labels) + end - it_behaves_like Prometheus::Client::Metric do - let(:type) { Hash } + it_behaves_like Prometheus::Client::Metric + + describe '#initialization' do + it 'raise error for `quantile` label' do + expect do + described_class.new(:bar, docstring: 'bar description', labels: [:quantile]) + end.to raise_error Prometheus::Client::LabelSetValidator::ReservedLabelError + end end describe '#observe' do it 'records the given value' do expect do - summary.observe({}, 5) - end.to change { summary.get } + summary.observe(5) + end.to change { summary.get }. + from({ "count" => 0.0, "sum" => 0.0 }). + to({ "count" => 1.0, "sum" => 5.0 }) end - end - describe '#get' do - before do - summary.observe({ foo: 'bar' }, 3) - summary.observe({ foo: 'bar' }, 5.2) - summary.observe({ foo: 'bar' }, 13) - summary.observe({ foo: 'bar' }, 4) + it 'raise error for quantile labels' do + expect do + summary.observe(5, labels: { quantile: 1 }) + end.to raise_error Prometheus::Client::LabelSetValidator::InvalidLabelSetError end - it 'returns a set of quantile values' do - expect(summary.get(foo: 'bar')).to eql(0.5 => 4, 0.9 => 5.2, 0.99 => 5.2) + context "with a an expected label set" do + let(:expected_labels) { [:test] } + + it 'observes a value for a given label set' do + expect do + expect do + summary.observe(5, labels: { test: 'value' }) + end.to change { summary.get(labels: { test: 'value' })["count"] } + end.to_not change { summary.get(labels: { test: 'other' })["count"] } + end end + end - it 'returns a value which responds to #sum and #total' do - value = summary.get(foo: 'bar') + describe '#get' do + let(:expected_labels) { [:foo] } - expect(value.sum).to eql(25.2) - expect(value.total).to eql(4) + before do + summary.observe(3, labels: { foo: 'bar' }) + summary.observe(5.2, labels: { foo: 'bar' }) + summary.observe(13, labels: { foo: 'bar' }) + summary.observe(4, labels: { foo: 'bar' }) end - it 'uses nil as default value' do - expect(summary.get({})).to eql(0.5 => nil, 0.9 => nil, 0.99 => nil) + it 'returns a value which responds to #sum and #total' do + expect(summary.get(labels: { foo: 'bar' })). + to eql({ "count" => 4.0, "sum" => 25.2 }) end end describe '#values' do + let(:expected_labels) { [:status] } + it 'returns a hash of all recorded summaries' do - summary.observe({ status: 'bar' }, 3) - summary.observe({ status: 'foo' }, 5) + summary.observe(3, labels: { status: 'bar' }) + summary.observe(5, labels: { status: 'foo' }) expect(summary.values).to eql( - { status: 'bar' } => { 0.5 => 3, 0.9 => 3, 0.99 => 3 }, - { status: 'foo' } => { 0.5 => 5, 0.9 => 5, 0.99 => 5 }, + { status: 'bar' } => { "count" => 1.0, "sum" => 3.0 }, + { status: 'foo' } => { "count" => 1.0, "sum" => 5.0 }, ) end end + + describe '#init_label_set' do + context "with labels" do + let(:expected_labels) { [:status] } + + it 'initializes the metric for a given label set' do + expect(summary.values).to eql({}) + + summary.init_label_set(status: 'bar') + summary.init_label_set(status: 'foo') + + expect(summary.values).to eql( + { status: 'bar' } => { "count" => 0.0, "sum" => 0.0 }, + { status: 'foo' } => { "count" => 0.0, "sum" => 0.0 }, + ) + end + end + + context "without labels" do + it 'automatically initializes the metric' do + expect(summary.values).to eql( + {} => { "count" => 0.0, "sum" => 0.0 }, + ) + end + end + end end diff --git a/spec/prometheus/middleware/collector_spec.rb b/spec/prometheus/middleware/collector_spec.rb index 5dd38708..9d65e158 100644 --- a/spec/prometheus/middleware/collector_spec.rb +++ b/spec/prometheus/middleware/collector_spec.rb @@ -6,18 +6,25 @@ describe Prometheus::Middleware::Collector do include Rack::Test::Methods + # Reset the data store + before do + Prometheus::Client.config.data_store = Prometheus::Client::DataStores::Synchronized.new + end + let(:registry) do Prometheus::Client::Registry.new end let(:original_app) do - ->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } + ->(_) { [200, { 'content-type' => 'text/html' }, ['OK']] } end let!(:app) do described_class.new(original_app, registry: registry) end + let(:dummy_error) { RuntimeError.new("Dummy error from tests") } + it 'returns the app response' do get '/foo' @@ -27,7 +34,7 @@ it 'handles errors in the registry gracefully' do counter = registry.get(:http_server_requests_total) - expect(counter).to receive(:increment).and_raise(NoMethodError) + expect(counter).to receive(:increment).and_raise(dummy_error) get '/foo' @@ -35,53 +42,93 @@ end it 'traces request information' do - expect(Benchmark).to receive(:realtime).and_yield.and_return(0.2) + expect(app).to receive(:realtime).and_yield.and_return(0.2) get '/foo' metric = :http_server_requests_total labels = { method: 'get', path: '/foo', code: '200' } - expect(registry.get(metric).get(labels)).to eql(1.0) + expect(registry.get(metric).get(labels: labels)).to eql(1.0) metric = :http_server_request_duration_seconds labels = { method: 'get', path: '/foo' } - expect(registry.get(metric).get(labels)).to include(0.1 => 0, 0.25 => 1) + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.25" => 1) + end + + it 'includes SCRIPT_NAME in the path if provided' do + metric = :http_server_requests_total + + get '/foo' + expect(registry.get(metric).values.keys.last[:path]).to eql("/foo") + + env('SCRIPT_NAME', '/engine') + get '/foo' + env('SCRIPT_NAME', nil) + expect(registry.get(metric).values.keys.last[:path]).to eql("/engine/foo") end it 'normalizes paths containing numeric IDs by default' do - expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + expect(app).to receive(:realtime).and_yield.and_return(0.3) get '/foo/42/bars' metric = :http_server_requests_total labels = { method: 'get', path: '/foo/:id/bars', code: '200' } - expect(registry.get(metric).get(labels)).to eql(1.0) + expect(registry.get(metric).get(labels: labels)).to eql(1.0) metric = :http_server_request_duration_seconds labels = { method: 'get', path: '/foo/:id/bars' } - expect(registry.get(metric).get(labels)).to include(0.1 => 0, 0.5 => 1) + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) end it 'normalizes paths containing UUIDs by default' do - expect(Benchmark).to receive(:realtime).and_yield.and_return(0.3) + expect(app).to receive(:realtime).and_yield.and_return(0.3) get '/foo/5180349d-a491-4d73-af30-4194a46bdff3/bars' metric = :http_server_requests_total labels = { method: 'get', path: '/foo/:uuid/bars', code: '200' } - expect(registry.get(metric).get(labels)).to eql(1.0) + expect(registry.get(metric).get(labels: labels)).to eql(1.0) metric = :http_server_request_duration_seconds labels = { method: 'get', path: '/foo/:uuid/bars' } - expect(registry.get(metric).get(labels)).to include(0.1 => 0, 0.5 => 1) + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'handles consecutive path segments containing IDs' do + expect(app).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/42/24' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/:id/:id', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/:id/:id' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) + end + + it 'handles consecutive path segments containing UUIDs' do + expect(app).to receive(:realtime).and_yield.and_return(0.3) + + get '/foo/5180349d-a491-4d73-af30-4194a46bdff3/5180349d-a491-4d73-af30-4194a46bdff2' + + metric = :http_server_requests_total + labels = { method: 'get', path: '/foo/:uuid/:uuid', code: '200' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) + + metric = :http_server_request_duration_seconds + labels = { method: 'get', path: '/foo/:uuid/:uuid' } + expect(registry.get(metric).get(labels: labels)).to include("0.1" => 0, "0.5" => 1) end context 'when the app raises an exception' do let(:original_app) do lambda do |env| - raise NoMethodError if env['PATH_INFO'] == '/broken' + raise dummy_error if env['PATH_INFO'] == '/broken' - [200, { 'Content-Type' => 'text/html' }, ['OK']] + [200, { 'content-type' => 'text/html' }, ['OK']] end end @@ -90,34 +137,11 @@ end it 'traces exceptions' do - expect { get '/broken' }.to raise_error NoMethodError + expect { get '/broken' }.to raise_error RuntimeError metric = :http_server_exceptions_total - labels = { exception: 'NoMethodError' } - expect(registry.get(metric).get(labels)).to eql(1.0) - end - end - - context 'when using a custom counter label builder' do - let(:app) do - described_class.new( - original_app, - registry: registry, - counter_label_builder: lambda do |env, code| - { - code: code, - method: env['REQUEST_METHOD'].downcase, - } - end, - ) - end - - it 'allows labels configuration' do - get '/foo/bar' - - metric = :http_server_requests_total - labels = { method: 'get', code: '200' } - expect(registry.get(metric).get(labels)).to eql(1.0) + labels = { exception: 'RuntimeError' } + expect(registry.get(metric).get(labels: labels)).to eql(1.0) end end diff --git a/spec/prometheus/middleware/exporter_spec.rb b/spec/prometheus/middleware/exporter_spec.rb index b1a1a791..e8232fc5 100644 --- a/spec/prometheus/middleware/exporter_spec.rb +++ b/spec/prometheus/middleware/exporter_spec.rb @@ -6,13 +6,14 @@ describe Prometheus::Middleware::Exporter do include Rack::Test::Methods + let(:options) { { registry: registry } } let(:registry) do Prometheus::Client::Registry.new end let(:app) do - app = ->(_) { [200, { 'Content-Type' => 'text/html' }, ['OK']] } - described_class.new(app, registry: registry) + app = ->(_) { [200, { 'content-type' => 'text/html' }, ['OK']] } + described_class.new(app, **options) end context 'when requesting app endpoints' do @@ -28,13 +29,13 @@ text = Prometheus::Client::Formats::Text shared_examples 'ok' do |headers, fmt| - it "responds with 200 OK and Content-Type #{fmt::CONTENT_TYPE}" do - registry.counter(:foo, 'foo counter').increment({}, 9) + it "responds with 200 OK and content-type #{fmt::CONTENT_TYPE}" do + registry.counter(:foo, docstring: 'foo counter').increment(by: 9) get '/metrics', nil, headers expect(last_response.status).to eql(200) - expect(last_response.header['Content-Type']).to eql(fmt::CONTENT_TYPE) + expect(last_response.headers['content-type']).to eql(fmt::CONTENT_TYPE) expect(last_response.body).to eql(fmt.marshal(registry)) end end @@ -46,7 +47,7 @@ get '/metrics', nil, headers expect(last_response.status).to eql(406) - expect(last_response.header['Content-Type']).to eql('text/plain') + expect(last_response.headers['content-type']).to eql('text/plain') expect(last_response.body).to eql(message) end end @@ -96,5 +97,30 @@ include_examples 'ok', { 'HTTP_ACCEPT' => accept }, text end + + context 'when a port is specified' do + let(:options) { { registry: registry, port: 9999 } } + + context 'when a request is on the specified port' do + it 'responds with 200 OK' do + registry.counter(:foo, docstring: 'foo counter').increment(by: 9) + + get 'http://example.org:9999/metrics', nil, {} + + expect(last_response.status).to eql(200) + expect(last_response.headers['content-type']).to eql(text::CONTENT_TYPE) + expect(last_response.body).to eql(text.marshal(registry)) + end + end + + context 'when a request is not on the specified port' do + it 'returns the app response' do + get 'http://example.org:8888/metrics', nil, {} + + expect(last_response).to be_ok + expect(last_response.body).to eql('OK') + end + end + end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 8cf81717..b6f707b1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,13 +1,14 @@ # encoding: UTF-8 require 'simplecov' -require 'coveralls' +require 'timecop' -SimpleCov.formatter = - if ENV['CI'] - Coveralls::SimpleCov::Formatter - else - SimpleCov::Formatter::HTMLFormatter - end +RSpec.configure do |c| + c.warnings = true +end + +SimpleCov.formatter = SimpleCov::Formatter::HTMLFormatter SimpleCov.start + +Timecop.safe_mode = true