| title: | max.nn.kv_cache |
| type: | module |
| lang: | python |
| wrapper_class: | rst-module-autosummary |
.. automodule:: max.nn.kv_cache
:no-members:
.. currentmodule:: max.nn.kv_cache
.. autosummary::
:nosignatures:
:toctree: generated
:template: autosummary/class.rst
KVCacheBuffer
KVCacheParamInterface
KVCacheParams
KVCacheQuantizationConfig
KVConnectorType
KVCacheMemory
MultiKVCacheParams
ReplicatedKVCacheMemory
.. autosummary::
:nosignatures:
:toctree: generated
:template: autosummary/class.rst
KVCacheInputs
KVCacheInputsPerDevice
BatchCharacteristics
PagedCacheValues
.. autosummary::
:nosignatures:
:toctree: generated
:template: autosummary/class.rst
AttentionDispatchResolver
AttnKey
MHAAttnKey
MLAAttnKey
.. autosummary::
:nosignatures:
:toctree: generated
:template: autosummary/class.rst
KVCacheMetrics
.. autosummary::
:nosignatures:
:toctree: generated
:template: autosummary/function.rst
build_max_lengths_tensor
compute_max_seq_len_fitting_in_cache
compute_num_device_blocks
compute_num_host_blocks
estimated_memory_size